diff options
Diffstat (limited to 'llvm/lib/CodeGen')
228 files changed, 24595 insertions, 9898 deletions
diff --git a/llvm/lib/CodeGen/AllocationOrder.cpp b/llvm/lib/CodeGen/AllocationOrder.cpp index c99800659bfd..2aef1234ac0e 100644 --- a/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/llvm/lib/CodeGen/AllocationOrder.cpp @@ -26,17 +26,15 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" // Compare VirtRegMap::getRegAllocPref(). -AllocationOrder::AllocationOrder(unsigned VirtReg, - const VirtRegMap &VRM, - const RegisterClassInfo &RegClassInfo, - const LiveRegMatrix *Matrix) - : Pos(0), HardHints(false) { +AllocationOrder AllocationOrder::create(unsigned VirtReg, const VirtRegMap &VRM, + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix) { const MachineFunction &MF = VRM.getMachineFunction(); const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo(); - Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); - if (TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix)) - HardHints = true; - rewind(); + auto Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); + SmallVector<MCPhysReg, 16> Hints; + bool HardHints = + TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix); LLVM_DEBUG({ if (!Hints.empty()) { @@ -51,4 +49,5 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, assert(is_contained(Order, Hints[I]) && "Target hint is outside allocation order."); #endif + return AllocationOrder(std::move(Hints), Order, HardHints); } diff --git a/llvm/lib/CodeGen/AllocationOrder.h b/llvm/lib/CodeGen/AllocationOrder.h index fa0690ab4ea5..0701e6810100 100644 --- a/llvm/lib/CodeGen/AllocationOrder.h +++ b/llvm/lib/CodeGen/AllocationOrder.h @@ -17,9 +17,9 @@ #define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCRegister.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Register.h" namespace llvm { @@ -28,67 +28,95 @@ class VirtRegMap; class LiveRegMatrix; class LLVM_LIBRARY_VISIBILITY AllocationOrder { - SmallVector<MCPhysReg, 16> Hints; + const SmallVector<MCPhysReg, 16> Hints; ArrayRef<MCPhysReg> Order; - int Pos; - - // If HardHints is true, *only* Hints will be returned. - bool HardHints; + // How far into the Order we can iterate. This is 0 if the AllocationOrder is + // constructed with HardHints = true, Order.size() otherwise. While + // technically a size_t, it will participate in comparisons with the + // Iterator's Pos, which must be signed, so it's typed here as signed, too, to + // avoid warnings and under the assumption that the size of Order is + // relatively small. + // IterationLimit defines an invalid iterator position. + const int IterationLimit; public: + /// Forward iterator for an AllocationOrder. + class Iterator final { + const AllocationOrder &AO; + int Pos = 0; + + public: + Iterator(const AllocationOrder &AO, int Pos) : AO(AO), Pos(Pos) {} + + /// Return true if the curent position is that of a preferred register. + bool isHint() const { return Pos < 0; } + + /// Return the next physical register in the allocation order. + MCRegister operator*() const { + if (Pos < 0) + return AO.Hints.end()[Pos]; + assert(Pos < AO.IterationLimit); + return AO.Order[Pos]; + } + + /// Advance the iterator to the next position. If that's past the Hints + /// list, advance to the first value that's not also in the Hints list. + Iterator &operator++() { + if (Pos < AO.IterationLimit) + ++Pos; + while (Pos >= 0 && Pos < AO.IterationLimit && AO.isHint(AO.Order[Pos])) + ++Pos; + return *this; + } + + bool operator==(const Iterator &Other) const { + assert(&AO == &Other.AO); + return Pos == Other.Pos; + } + + bool operator!=(const Iterator &Other) const { return !(*this == Other); } + }; /// Create a new AllocationOrder for VirtReg. /// @param VirtReg Virtual register to allocate for. /// @param VRM Virtual register map for function. /// @param RegClassInfo Information about reserved and allocatable registers. - AllocationOrder(unsigned VirtReg, - const VirtRegMap &VRM, - const RegisterClassInfo &RegClassInfo, - const LiveRegMatrix *Matrix); - - /// Get the allocation order without reordered hints. - ArrayRef<MCPhysReg> getOrder() const { return Order; } - - /// Return the next physical register in the allocation order, or 0. - /// It is safe to call next() again after it returned 0, it will keep - /// returning 0 until rewind() is called. - unsigned next(unsigned Limit = 0) { - if (Pos < 0) - return Hints.end()[Pos++]; - if (HardHints) - return 0; - if (!Limit) - Limit = Order.size(); - while (Pos < int(Limit)) { - unsigned Reg = Order[Pos++]; - if (!isHint(Reg)) - return Reg; - } - return 0; + static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM, + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix); + + /// Create an AllocationOrder given the Hits, Order, and HardHits values. + /// Use the create method above - the ctor is for unittests. + AllocationOrder(SmallVector<MCPhysReg, 16> &&Hints, ArrayRef<MCPhysReg> Order, + bool HardHints) + : Hints(std::move(Hints)), Order(Order), + IterationLimit(HardHints ? 0 : static_cast<int>(Order.size())) {} + + Iterator begin() const { + return Iterator(*this, -(static_cast<int>(Hints.size()))); } - /// As next(), but allow duplicates to be returned, and stop before the - /// Limit'th register in the RegisterClassInfo allocation order. - /// - /// This can produce more than Limit registers if there are hints. - unsigned nextWithDups(unsigned Limit) { - if (Pos < 0) - return Hints.end()[Pos++]; - if (HardHints) - return 0; - if (Pos < int(Limit)) - return Order[Pos++]; - return 0; - } + Iterator end() const { return Iterator(*this, IterationLimit); } - /// Start over from the beginning. - void rewind() { Pos = -int(Hints.size()); } + Iterator getOrderLimitEnd(unsigned OrderLimit) const { + assert(OrderLimit <= Order.size()); + if (OrderLimit == 0) + return end(); + Iterator Ret(*this, + std::min(static_cast<int>(OrderLimit) - 1, IterationLimit)); + return ++Ret; + } - /// Return true if the last register returned from next() was a preferred register. - bool isHint() const { return Pos <= 0; } + /// Get the allocation order without reordered hints. + ArrayRef<MCPhysReg> getOrder() const { return Order; } - /// Return true if PhysReg is a preferred register. - bool isHint(unsigned PhysReg) const { return is_contained(Hints, PhysReg); } + /// Return true if Reg is a preferred physical register. + bool isHint(Register Reg) const { + assert(!Reg.isPhysical() || + Reg.id() < + static_cast<uint32_t>(std::numeric_limits<MCPhysReg>::max())); + return Reg.isPhysical() && is_contained(Hints, Reg.id()); + } }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index 7da28ffec85c..ebeff1fec30b 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -88,19 +88,25 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, uint64_t StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = DL.getStructLayout(STy); + // If the Offsets aren't needed, don't query the struct layout. This allows + // us to support structs with scalable vectors for operations that don't + // need offsets. + const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr; for (StructType::element_iterator EB = STy->element_begin(), EI = EB, EE = STy->element_end(); - EI != EE; ++EI) + EI != EE; ++EI) { + // Don't compute the element offset if we didn't get a StructLayout above. + uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0; ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets, - StartingOffset + SL->getElementOffset(EI - EB)); + StartingOffset + EltOffset); + } return; } // Given an array type, recursively traverse the elements. if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Type *EltTy = ATy->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(EltTy); + uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue(); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets, StartingOffset + i * EltSize); @@ -131,16 +137,21 @@ void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty, uint64_t StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast<StructType>(&Ty)) { - const StructLayout *SL = DL.getStructLayout(STy); - for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) + // If the Offsets aren't needed, don't query the struct layout. This allows + // us to support structs with scalable vectors for operations that don't + // need offsets. + const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr; + for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) { + uint64_t EltOffset = SL ? SL->getElementOffset(I) : 0; computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets, - StartingOffset + SL->getElementOffset(I)); + StartingOffset + EltOffset); + } return; } // Given an array type, recursively traverse the elements. if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) { Type *EltTy = ATy->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(EltTy); + uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue(); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) computeValueLLTs(DL, *EltTy, ValueTys, Offsets, StartingOffset + i * EltSize); @@ -174,27 +185,6 @@ GlobalValue *llvm::ExtractTypeInfo(Value *V) { return GV; } -/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being -/// processed uses a memory 'm' constraint. -bool -llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, - const TargetLowering &TLI) { - for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { - InlineAsm::ConstraintInfo &CI = CInfos[i]; - for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { - TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); - if (CType == TargetLowering::C_Memory) - return true; - } - - // Indirect operand accesses access memory. - if (CI.isIndirect) - return true; - } - - return false; -} - /// getFCmpCondCode - Return the ISD condition code corresponding to /// the given LLVM IR floating-point condition code. This includes /// consideration of global floating-point math flags. @@ -537,11 +527,15 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) { // Debug info intrinsics do not get in the way of tail call optimization. if (isa<DbgInfoIntrinsic>(BBI)) continue; - // A lifetime end or assume intrinsic should not stop tail call - // optimization. + // Pseudo probe intrinsics do not block tail call optimization either. + if (isa<PseudoProbeInst>(BBI)) + continue; + // A lifetime end, assume or noalias.decl intrinsic should not stop tail + // call optimization. if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI)) if (II->getIntrinsicID() == Intrinsic::lifetime_end || - II->getIntrinsicID() == Intrinsic::assume) + II->getIntrinsicID() == Intrinsic::assume || + II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || !isSafeToSpeculativelyExecute(&*BBI)) @@ -739,8 +733,7 @@ static void collectEHScopeMembers( if (Visiting->isEHScopeReturnBlock()) continue; - for (const MachineBasicBlock *Succ : Visiting->successors()) - Worklist.push_back(Succ); + append_range(Worklist, Visiting->successors()); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp new file mode 100644 index 000000000000..95d878e65be4 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -0,0 +1,79 @@ +//===-- CodeGen/AsmPrinter/AIXException.cpp - AIX Exception Impl ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing AIX exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCSectionXCOFF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +AIXException::AIXException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} + +void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, + const MCSymbol *PerSym) { + // Generate EH Info Table. + // The EH Info Table, aka, 'compat unwind section' on AIX, have the following + // format: struct eh_info_t { + // unsigned version; /* EH info verion 0 */ + // #if defined(__64BIT__) + // char _pad[4]; /* padding */ + // #endif + // unsigned long lsda; /* Pointer to LSDA */ + // unsigned long personality; /* Pointer to the personality routine */ + // } + + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getCompactUnwindSection()); + MCSymbol *EHInfoLabel = + TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(Asm->MF); + Asm->OutStreamer->emitLabel(EHInfoLabel); + + // Version number. + Asm->emitInt32(0); + + const DataLayout &DL = MMI->getModule()->getDataLayout(); + const unsigned PointerSize = DL.getPointerSize(); + + // Add necessary paddings in 64 bit mode. + Asm->OutStreamer->emitValueToAlignment(PointerSize); + + // LSDA location. + Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(LSDA, Asm->OutContext), + PointerSize); + + // Personality routine. + Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(PerSym, Asm->OutContext), + PointerSize); +} + +void AIXException::endFunction(const MachineFunction *MF) { + if (!TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF)) + return; + + const MCSymbol *LSDALabel = emitExceptionTable(); + + const Function &F = MF->getFunction(); + assert(F.hasPersonalityFn() && + "Landingpads are presented, but no personality routine is found."); + const Function *Per = + dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + const MCSymbol *PerSym = Asm->TM.getSymbol(Per); + + emitExceptionInfoTable(LSDALabel, PerSym); +} + +} // End of namespace llvm diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index dea0227f7578..4e45a0ffc60f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -190,7 +190,6 @@ public: template <typename DataT> class Dwarf5AccelTableWriter : public AccelTableWriter { struct Header { - uint32_t UnitLength = 0; uint16_t Version = 5; uint16_t Padding = 0; uint32_t CompUnitCount; @@ -271,7 +270,7 @@ void AccelTableWriter::emitOffsets(const MCSymbol *Base) const { continue; PrevHash = HashValue; Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); - Asm->emitLabelDifference(Hash->Sym, Base, sizeof(uint32_t)); + Asm->emitLabelDifference(Hash->Sym, Base, Asm->getDwarfOffsetByteSize()); } } } @@ -367,9 +366,8 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit( assert(CompUnitCount > 0 && "Index must have at least one CU."); AsmPrinter *Asm = Ctx.Asm; - Asm->OutStreamer->AddComment("Header: unit length"); - Asm->emitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart, - sizeof(uint32_t)); + Asm->emitDwarfUnitLength(Ctx.ContributionEnd, Ctx.ContributionStart, + "Header: unit length"); Asm->OutStreamer->emitLabel(Ctx.ContributionStart); Asm->OutStreamer->AddComment("Header: version"); Asm->emitInt16(Version); @@ -506,7 +504,7 @@ template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const { for (const auto *Value : Hash->Values) emitEntry(*static_cast<const DataT *>(Value)); Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString()); - Asm->emitInt32(0); + Asm->emitInt8(0); } } } @@ -593,10 +591,14 @@ void llvm::emitDWARF5AccelTable( } void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const { + assert(Die.getDebugSectionOffset() <= UINT32_MAX && + "The section offset exceeds the limit."); Asm->emitInt32(Die.getDebugSectionOffset()); } void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const { + assert(Die.getDebugSectionOffset() <= UINT32_MAX && + "The section offset exceeds the limit."); Asm->emitInt32(Die.getDebugSectionOffset()); Asm->emitInt16(Die.getTag()); Asm->emitInt8(0); diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 883aaf5aefc4..3df8e35accc4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -29,9 +29,7 @@ MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start"); MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end"); - Asm.OutStreamer->AddComment("Length of contribution"); - Asm.emitLabelDifference(EndLabel, BeginLabel, - 4); // TODO: Support DWARF64 format. + Asm.emitDwarfUnitLength(EndLabel, BeginLabel, "Length of contribution"); Asm.OutStreamer->emitLabel(BeginLabel); Asm.OutStreamer->AddComment("DWARF version number"); Asm.emitInt16(Asm.getDwarfVersion()); diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/llvm/lib/CodeGen/AsmPrinter/AddressPool.h index f92cf72093ca..f1edc6c330d5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -48,7 +48,7 @@ public: bool hasBeenUsed() const { return HasBeenUsed; } - void resetUsedFlag() { HasBeenUsed = false; } + void resetUsedFlag(bool HasBeenUsed = false) { this->HasBeenUsed = HasBeenUsed; } MCSymbol *getLabel() { return AddressTableBaseSym; } void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index f8f7b74baf91..85754bf29d0c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -14,6 +14,7 @@ #include "CodeViewDebug.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "PseudoProbePrinter.h" #include "WasmException.h" #include "WinCFGuard.h" #include "WinException.h" @@ -30,6 +31,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -77,6 +79,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCAsmInfo.h" @@ -131,17 +134,25 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" -static const char *const DWARFGroupName = "dwarf"; -static const char *const DWARFGroupDescription = "DWARF Emission"; -static const char *const DbgTimerName = "emit"; -static const char *const DbgTimerDescription = "Debug Info Emission"; -static const char *const EHTimerName = "write_exception"; -static const char *const EHTimerDescription = "DWARF Exception Writer"; -static const char *const CFGuardName = "Control Flow Guard"; -static const char *const CFGuardDescription = "Control Flow Guard"; -static const char *const CodeViewLineTablesGroupName = "linetables"; -static const char *const CodeViewLineTablesGroupDescription = - "CodeView Line Tables"; +// FIXME: this option currently only applies to DWARF, and not CodeView, tables +static cl::opt<bool> + DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); + +const char DWARFGroupName[] = "dwarf"; +const char DWARFGroupDescription[] = "DWARF Emission"; +const char DbgTimerName[] = "emit"; +const char DbgTimerDescription[] = "Debug Info Emission"; +const char EHTimerName[] = "write_exception"; +const char EHTimerDescription[] = "DWARF Exception Writer"; +const char CFGuardName[] = "Control Flow Guard"; +const char CFGuardDescription[] = "Control Flow Guard"; +const char CodeViewLineTablesGroupName[] = "linetables"; +const char CodeViewLineTablesGroupDescription[] = "CodeView Line Tables"; +const char PPTimerName[] = "emit"; +const char PPTimerDescription[] = "Pseudo Probe Emission"; +const char PPGroupName[] = "pseudo probe"; +const char PPGroupDescription[] = "Pseudo Probe Emission"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -188,7 +199,8 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) } AsmPrinter::~AsmPrinter() { - assert(!DD && Handlers.empty() && "Debug/EH info didn't get finalized"); + assert(!DD && Handlers.size() == NumUserHandlers && + "Debug/EH info didn't get finalized"); if (GCMetadataPrinters) { gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); @@ -231,9 +243,11 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { } void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) { - assert(DD && "Dwarf debug file is not defined."); - assert(OutStreamer->hasRawTextSupport() && "Expected assembly output mode."); - (void)DD->emitInitialLocDirective(MF, /*CUID=*/0); + if (DD) { + assert(OutStreamer->hasRawTextSupport() && + "Expected assembly output mode."); + (void)DD->emitInitialLocDirective(MF, /*CUID=*/0); + } } /// getCurrentSection() - Return the current section we are emitting to. @@ -261,6 +275,9 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer->InitSections(false); + if (DisableDebugInfoPrinting) + MMI->setDebugInfoAvailability(false); + // Emit the version-min deployment target directive if needed. // // FIXME: If we end up with a collection of these sorts of Darwin-specific @@ -296,6 +313,7 @@ bool AsmPrinter::doInitialization(Module &M) { std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( TM.getTargetTriple().str(), TM.getTargetCPU(), TM.getTargetFeatureString())); + assert(STI && "Unable to create subtarget info"); OutStreamer->AddComment("Start of file scope inline assembly"); OutStreamer->AddBlankLine(); emitInlineAsm(M.getModuleInlineAsm() + "\n", @@ -313,14 +331,21 @@ bool AsmPrinter::doInitialization(Module &M) { CodeViewLineTablesGroupDescription); } if (!EmitCodeView || M.getDwarfVersion()) { - DD = new DwarfDebug(this, &M); - DD->beginModule(); - Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName, - DbgTimerDescription, DWARFGroupName, - DWARFGroupDescription); + if (!DisableDebugInfoPrinting) { + DD = new DwarfDebug(this); + Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName, + DbgTimerDescription, DWARFGroupName, + DWARFGroupDescription); + } } } + if (M.getNamedMetadata(PseudoProbeDescMetadataName)) { + PP = new PseudoProbeHandler(this, &M); + Handlers.emplace_back(std::unique_ptr<PseudoProbeHandler>(PP), PPTimerName, + PPTimerDescription, PPGroupName, PPGroupDescription); + } + switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: @@ -368,6 +393,9 @@ bool AsmPrinter::doInitialization(Module &M) { case ExceptionHandling::Wasm: ES = new WasmException(this); break; + case ExceptionHandling::AIX: + ES = new AIXException(this); + break; } if (ES) Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName, @@ -379,6 +407,13 @@ bool AsmPrinter::doInitialization(Module &M) { Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName, CFGuardDescription, DWARFGroupName, DWARFGroupDescription); + + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->beginModule(&M); + } + return false; } @@ -449,10 +484,8 @@ MCSymbol *AsmPrinter::getSymbolPreferLocal(const GlobalValue &GV) const { if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) { const Module &M = *GV.getParent(); if (TM.getRelocationModel() != Reloc::Static && - M.getPIELevel() == PIELevel::Default) - if (GV.isDSOLocal() || (TM.getTargetTriple().isX86() && - GV.getParent()->noSemanticInterposition())) - return getSymbolWithGlobalValueBase(&GV, "$local"); + M.getPIELevel() == PIELevel::Default && GV.isDSOLocal()) + return getSymbolWithGlobalValueBase(&GV, "$local"); } return TM.getSymbol(&GV); } @@ -500,8 +533,8 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { GVSym->redefineIfPossible(); if (GVSym->isDefined() || GVSym->isVariable()) - report_fatal_error("symbol '" + Twine(GVSym->getName()) + - "' is already defined"); + OutContext.reportError(SMLoc(), "symbol '" + Twine(GVSym->getName()) + + "' is already defined"); if (MAI->hasDotTypeDotSizeDirective()) OutStreamer->emitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject); @@ -812,13 +845,21 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { if ((Size = MI.getRestoreSize(TII))) { CommentOS << *Size << "-byte Reload\n"; } else if ((Size = MI.getFoldedRestoreSize(TII))) { - if (*Size) - CommentOS << *Size << "-byte Folded Reload\n"; + if (*Size) { + if (*Size == unsigned(MemoryLocation::UnknownSize)) + CommentOS << "Unknown-size Folded Reload\n"; + else + CommentOS << *Size << "-byte Folded Reload\n"; + } } else if ((Size = MI.getSpillSize(TII))) { CommentOS << *Size << "-byte Spill\n"; } else if ((Size = MI.getFoldedSpillSize(TII))) { - if (*Size) - CommentOS << *Size << "-byte Folded Spill\n"; + if (*Size) { + if (*Size == unsigned(MemoryLocation::UnknownSize)) + CommentOS << "Unknown-size Folded Spill\n"; + else + CommentOS << *Size << "-byte Folded Spill\n"; + } } // Check for spill-induced copies @@ -877,7 +918,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // The second operand is only an offset if it's an immediate. bool MemLoc = MI->isIndirectDebugValue(); - int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0; + auto Offset = StackOffset::getFixed(MemLoc ? MI->getOperand(1).getImm() : 0); const DIExpression *Expr = MI->getDebugExpression(); if (Expr->getNumElements()) { OS << '['; @@ -916,6 +957,8 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } else if (MI->getDebugOperand(0).isTargetIndex()) { auto Op = MI->getDebugOperand(0); OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")"; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer->emitRawComment(OS.str()); return true; } else { Register Reg; @@ -941,7 +984,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } if (MemLoc) - OS << '+' << Offset << ']'; + OS << '+' << Offset.getFixed() << ']'; // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer->emitRawComment(OS.str()); @@ -1023,6 +1066,56 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { MCConstantExpr::create(FrameOffset, OutContext)); } +/// Returns the BB metadata to be emitted in the .llvm_bb_addr_map section for a +/// given basic block. This can be used to capture more precise profile +/// information. We use the last 3 bits (LSBs) to ecnode the following +/// information: +/// * (1): set if return block (ret or tail call). +/// * (2): set if ends with a tail call. +/// * (3): set if exception handling (EH) landing pad. +/// The remaining bits are zero. +static unsigned getBBAddrMapMetadata(const MachineBasicBlock &MBB) { + const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); + return ((unsigned)MBB.isReturnBlock()) | + ((!MBB.empty() && TII->isTailCall(MBB.back())) << 1) | + (MBB.isEHPad() << 2); +} + +void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { + MCSection *BBAddrMapSection = + getObjFileLowering().getBBAddrMapSection(*MF.getSection()); + assert(BBAddrMapSection && ".llvm_bb_addr_map section is not initialized."); + + const MCSymbol *FunctionSymbol = getFunctionBegin(); + + OutStreamer->PushSection(); + OutStreamer->SwitchSection(BBAddrMapSection); + OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); + // Emit the total number of basic blocks in this function. + OutStreamer->emitULEB128IntValue(MF.size()); + // Emit BB Information for each basic block in the funciton. + for (const MachineBasicBlock &MBB : MF) { + const MCSymbol *MBBSymbol = + MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol(); + // Emit the basic block offset. + emitLabelDifferenceAsULEB128(MBBSymbol, FunctionSymbol); + // Emit the basic block size. When BBs have alignments, their size cannot + // always be computed from their offsets. + emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol); + OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); + } + OutStreamer->PopSection(); +} + +void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) { + auto GUID = MI.getOperand(0).getImm(); + auto Index = MI.getOperand(1).getImm(); + auto Type = MI.getOperand(2).getImm(); + auto Attr = MI.getOperand(3).getImm(); + DILocation *DebugLoc = MI.getDebugLoc(); + PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc); +} + void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { if (!MF.getTarget().Options.EmitStackSizeSection) return; @@ -1069,8 +1162,6 @@ void AsmPrinter::emitFunctionBody() { // Emit target-specific gunk before the function body. emitFunctionBodyStart(); - bool ShouldPrintDebugScopes = MMI->hasDebugInfo(); - if (isVerbose()) { // Get MachineDominatorTree or compute it on the fly if it's unavailable MDT = getAnalysisIfAvailable<MachineDominatorTree>(); @@ -1093,9 +1184,11 @@ void AsmPrinter::emitFunctionBody() { bool HasAnyRealCode = false; int NumInstsInFunction = 0; + bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); for (auto &MBB : *MF) { // Print a label for the basic block. emitBasicBlockStart(MBB); + DenseMap<StringRef, unsigned> MnemonicCounts; for (auto &MI : MBB) { // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && @@ -1108,13 +1201,10 @@ void AsmPrinter::emitFunctionBody() { if (MCSymbol *S = MI.getPreInstrSymbol()) OutStreamer->emitLabel(S); - if (ShouldPrintDebugScopes) { - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, - HI.TimerGroupName, HI.TimerGroupDescription, - TimePassesIsEnabled); - HI.Handler->beginInstruction(&MI); - } + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->beginInstruction(&MI); } if (isVerbose()) @@ -1142,6 +1232,11 @@ void AsmPrinter::emitFunctionBody() { emitInstruction(&MI); } break; + case TargetOpcode::DBG_INSTR_REF: + // This instruction reference will have been resolved to a machine + // location, and a nearby DBG_VALUE created. We can safely ignore + // the instruction reference. + break; case TargetOpcode::DBG_LABEL: if (isVerbose()) { if (!emitDebugLabelComment(&MI, *this)) @@ -1154,8 +1249,18 @@ void AsmPrinter::emitFunctionBody() { case TargetOpcode::KILL: if (isVerbose()) emitKill(&MI, *this); break; + case TargetOpcode::PSEUDO_PROBE: + emitPseudoProbe(MI); + break; default: emitInstruction(&MI); + if (CanDoExtraAnalysis) { + MCInst MCI; + MCI.setOpcode(MI.getOpcode()); + auto Name = OutStreamer->getMnemonic(MCI); + auto I = MnemonicCounts.insert({Name, 0u}); + I.first->second++; + } break; } @@ -1163,54 +1268,69 @@ void AsmPrinter::emitFunctionBody() { if (MCSymbol *S = MI.getPostInstrSymbol()) OutStreamer->emitLabel(S); - if (ShouldPrintDebugScopes) { - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, - HI.TimerGroupName, HI.TimerGroupDescription, - TimePassesIsEnabled); - HI.Handler->endInstruction(); - } + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->endInstruction(); } } - // We need a temporary symbol for the end of this basic block, if either we - // have BBLabels enabled and we want to emit size directive for the BBs, or - // if this basic blocks marks the end of a section (except the section - // containing the entry basic block as the end symbol for that section is - // CurrentFnEnd). - MCSymbol *CurrentBBEnd = nullptr; - if ((MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) || - (MBB.isEndSection() && !MBB.sameSection(&MF->front()))) { - CurrentBBEnd = OutContext.createTempSymbol(); - OutStreamer->emitLabel(CurrentBBEnd); - } + // We must emit temporary symbol for the end of this basic block, if either + // we have BBLabels enabled or if this basic blocks marks the end of a + // section (except the section containing the entry basic block as the end + // symbol for that section is CurrentFnEnd). + if (MF->hasBBLabels() || + (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection() && + !MBB.sameSection(&MF->front()))) + OutStreamer->emitLabel(MBB.getEndSymbol()); - // Helper for emitting the size directive associated with a basic block - // symbol. - auto emitELFSizeDirective = [&](MCSymbol *SymForSize) { - assert(CurrentBBEnd && "Basicblock end symbol not set!"); - const MCExpr *SizeExp = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(CurrentBBEnd, OutContext), - MCSymbolRefExpr::create(SymForSize, OutContext), OutContext); - OutStreamer->emitELFSize(SymForSize, SizeExp); - }; - - // Emit size directive for the size of each basic block, if BBLabels is - // enabled. - if (MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) - emitELFSizeDirective(MBB.getSymbol()); - - // Emit size directive for the size of each basic block section once we - // get to the end of that section. if (MBB.isEndSection()) { + // The size directive for the section containing the entry block is + // handled separately by the function section. if (!MBB.sameSection(&MF->front())) { - if (MAI->hasDotTypeDotSizeDirective()) - emitELFSizeDirective(CurrentSectionBeginSym); + if (MAI->hasDotTypeDotSizeDirective()) { + // Emit the size directive for the basic block section. + const MCExpr *SizeExp = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(MBB.getEndSymbol(), OutContext), + MCSymbolRefExpr::create(CurrentSectionBeginSym, OutContext), + OutContext); + OutStreamer->emitELFSize(CurrentSectionBeginSym, SizeExp); + } MBBSectionRanges[MBB.getSectionIDNum()] = - MBBSectionRange{CurrentSectionBeginSym, CurrentBBEnd}; + MBBSectionRange{CurrentSectionBeginSym, MBB.getEndSymbol()}; } } emitBasicBlockEnd(MBB); + + if (CanDoExtraAnalysis) { + // Skip empty blocks. + if (MBB.empty()) + continue; + + MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionMix", + MBB.begin()->getDebugLoc(), &MBB); + + // Generate instruction mix remark. First, sort counts in descending order + // by count and name. + SmallVector<std::pair<StringRef, unsigned>, 128> MnemonicVec; + for (auto &KV : MnemonicCounts) + MnemonicVec.emplace_back(KV.first, KV.second); + + sort(MnemonicVec, [](const std::pair<StringRef, unsigned> &A, + const std::pair<StringRef, unsigned> &B) { + if (A.second > B.second) + return true; + if (A.second == B.second) + return StringRef(A.first) < StringRef(B.first); + return false; + }); + R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n"; + for (auto &KV : MnemonicVec) { + auto Name = (Twine("INST_") + KV.first.trim()).str(); + R << KV.first << ": " << ore::NV(Name, KV.second) << "\n"; + } + ORE->emit(R); + } } EmittedInsts += NumInstsInFunction; @@ -1297,6 +1417,11 @@ void AsmPrinter::emitFunctionBody() { HI.Handler->endFunction(MF); } + // Emit section containing BB address offsets and their metadata, when + // BB labels are requested for this function. + if (MF->hasBBLabels()) + emitBBAddrMapSection(*MF); + // Emit section containing stack size metadata. emitStackSizeSection(*MF); @@ -1390,16 +1515,7 @@ void AsmPrinter::emitGlobalGOTEquivs() { void AsmPrinter::emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol& GIS) { MCSymbol *Name = getSymbol(&GIS); - - if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective()) - OutStreamer->emitSymbolAttribute(Name, MCSA_Global); - else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage()) - OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference); - else - assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage"); - bool IsFunction = GIS.getValueType()->isFunctionTy(); - // Treat bitcasts of functions as functions also. This is important at least // on WebAssembly where object and function addresses can't alias each other. if (!IsFunction) @@ -1408,6 +1524,30 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, IsFunction = CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy(); + // AIX's assembly directive `.set` is not usable for aliasing purpose, + // so AIX has to use the extra-label-at-definition strategy. At this + // point, all the extra label is emitted, we just have to emit linkage for + // those labels. + if (TM.getTargetTriple().isOSBinFormatXCOFF()) { + assert(!isa<GlobalIFunc>(GIS) && "IFunc is not supported on AIX."); + assert(MAI->hasVisibilityOnlyWithLinkage() && + "Visibility should be handled with emitLinkage() on AIX."); + emitLinkage(&GIS, Name); + // If it's a function, also emit linkage for aliases of function entry + // point. + if (IsFunction) + emitLinkage(&GIS, + getObjFileLowering().getFunctionEntryPointSymbol(&GIS, TM)); + return; + } + + if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective()) + OutStreamer->emitSymbolAttribute(Name, MCSA_Global); + else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage()) + OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference); + else + assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage"); + // Set the symbol type to function if the alias has a function type. // This affects codegen when the aliasee is not a function. if (IsFunction) @@ -1517,9 +1657,8 @@ bool AsmPrinter::doFinalization(Module &M) { // Variable `Name` is the function descriptor symbol (see above). Get the // function entry point symbol. MCSymbol *FnEntryPointSym = TLOF.getFunctionEntryPointSymbol(&F, TM); - if (cast<MCSymbolXCOFF>(FnEntryPointSym)->hasRepresentedCsectSet()) - // Emit linkage for the function entry point. - emitLinkage(&F, FnEntryPointSym); + // Emit linkage for the function entry point. + emitLinkage(&F, FnEntryPointSym); // Emit linkage for the function descriptor. emitLinkage(&F, Name); @@ -1584,7 +1723,11 @@ bool AsmPrinter::doFinalization(Module &M) { HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->endModule(); } - Handlers.clear(); + + // This deletes all the ephemeral handlers that AsmPrinter added, while + // keeping all the user-added handlers alive until the AsmPrinter is + // destroyed. + Handlers.erase(Handlers.begin() + NumUserHandlers, Handlers.end()); DD = nullptr; // If the target wants to know about weak references, print them all. @@ -1668,51 +1811,6 @@ bool AsmPrinter::doFinalization(Module &M) { if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) OutStreamer->SwitchSection(S); - if (TM.getTargetTriple().isOSBinFormatCOFF()) { - // Emit /EXPORT: flags for each exported global as necessary. - const auto &TLOF = getObjFileLowering(); - std::string Flags; - - for (const GlobalValue &GV : M.global_values()) { - raw_string_ostream OS(Flags); - TLOF.emitLinkerFlagsForGlobal(OS, &GV); - OS.flush(); - if (!Flags.empty()) { - OutStreamer->SwitchSection(TLOF.getDrectveSection()); - OutStreamer->emitBytes(Flags); - } - Flags.clear(); - } - - // Emit /INCLUDE: flags for each used global as necessary. - if (const auto *LU = M.getNamedGlobal("llvm.used")) { - assert(LU->hasInitializer() && - "expected llvm.used to have an initializer"); - assert(isa<ArrayType>(LU->getValueType()) && - "expected llvm.used to be an array type"); - if (const auto *A = cast<ConstantArray>(LU->getInitializer())) { - for (const Value *Op : A->operands()) { - const auto *GV = cast<GlobalValue>(Op->stripPointerCasts()); - // Global symbols with internal or private linkage are not visible to - // the linker, and thus would cause an error when the linker tried to - // preserve the symbol due to the `/include:` directive. - if (GV->hasLocalLinkage()) - continue; - - raw_string_ostream OS(Flags); - TLOF.emitLinkerFlagsForUsed(OS, GV); - OS.flush(); - - if (!Flags.empty()) { - OutStreamer->SwitchSection(TLOF.getDrectveSection()); - OutStreamer->emitBytes(Flags); - } - Flags.clear(); - } - } - } - } - if (TM.Options.EmitAddrsig) { // Emit address-significance attributes for all globals. OutStreamer->emitAddrsig(); @@ -1756,10 +1854,11 @@ bool AsmPrinter::doFinalization(Module &M) { return false; } -MCSymbol *AsmPrinter::getCurExceptionSym() { - if (!CurExceptionSym) - CurExceptionSym = createTempSymbol("exception"); - return CurExceptionSym; +MCSymbol *AsmPrinter::getMBBExceptionSym(const MachineBasicBlock &MBB) { + auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionIDNum()); + if (Res.second) + Res.first->second = createTempSymbol("exception"); + return Res.first->second; } void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { @@ -1786,13 +1885,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnBegin = nullptr; CurrentSectionBeginSym = nullptr; MBBSectionRanges.clear(); - CurExceptionSym = nullptr; + MBBSectionExceptionSyms.clear(); bool NeedsLocalForSize = MAI->needsLocalForSize(); if (F.hasFnAttribute("patchable-function-entry") || F.hasFnAttribute("function-instrument") || F.hasFnAttribute("xray-instruction-threshold") || needFuncLabelsForEHOrDebugInfo(MF) || NeedsLocalForSize || - MF.getTarget().Options.EmitStackSizeSection) { + MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -1882,8 +1981,7 @@ void AsmPrinter::emitConstantPool() { unsigned NewOffset = alignTo(Offset, CPE.getAlign()); OutStreamer->emitZeros(NewOffset - Offset); - Type *Ty = CPE.getType(); - Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty); + Offset = NewOffset + CPE.getSizeInBytes(getDataLayout()); OutStreamer->emitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) @@ -2083,47 +2181,50 @@ void AsmPrinter::emitLLVMUsedList(const ConstantArray *InitList) { } } -namespace { - -struct Structor { - int Priority = 0; - Constant *Func = nullptr; - GlobalValue *ComdatKey = nullptr; - - Structor() = default; -}; - -} // end anonymous namespace - -/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init -/// priority. -void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List, - bool isCtor) { - // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is the - // init priority. - if (!isa<ConstantArray>(List)) return; +void AsmPrinter::preprocessXXStructorList(const DataLayout &DL, + const Constant *List, + SmallVector<Structor, 8> &Structors) { + // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is + // the init priority. + if (!isa<ConstantArray>(List)) + return; // Gather the structors in a form that's convenient for sorting by priority. - SmallVector<Structor, 8> Structors; for (Value *O : cast<ConstantArray>(List)->operands()) { auto *CS = cast<ConstantStruct>(O); if (CS->getOperand(1)->isNullValue()) - break; // Found a null terminator, skip the rest. + break; // Found a null terminator, skip the rest. ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); - if (!Priority) continue; // Malformed. + if (!Priority) + continue; // Malformed. Structors.push_back(Structor()); Structor &S = Structors.back(); S.Priority = Priority->getLimitedValue(65535); S.Func = CS->getOperand(1); - if (!CS->getOperand(2)->isNullValue()) + if (!CS->getOperand(2)->isNullValue()) { + if (TM.getTargetTriple().isOSAIX()) + llvm::report_fatal_error( + "associated data of XXStructor list is not yet supported on AIX"); S.ComdatKey = dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts()); + } } // Emit the function pointers in the target-specific order llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) { return L.Priority < R.Priority; }); +} + +/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init +/// priority. +void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List, + bool IsCtor) { + SmallVector<Structor, 8> Structors; + preprocessXXStructorList(DL, List, Structors); + if (Structors.empty()) + return; + const Align Align = DL.getPointerPrefAlignment(); for (Structor &S : Structors) { const TargetLoweringObjectFile &Obj = getObjFileLowering(); @@ -2139,8 +2240,9 @@ void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List, KeySym = getSymbol(GV); } + MCSection *OutputSection = - (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) + (IsCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) : Obj.getStaticDtorSection(S.Priority, KeySym)); OutStreamer->SwitchSection(OutputSection); if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection()) @@ -2274,12 +2376,25 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx); + if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV)) + return getObjFileLowering().lowerDSOLocalEquivalent(Equiv, TM); + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); if (!CE) { llvm_unreachable("Unknown constant value to lower!"); } switch (CE->getOpcode()) { + case Instruction::AddrSpaceCast: { + const Constant *Op = CE->getOperand(0); + unsigned DstAS = CE->getType()->getPointerAddressSpace(); + unsigned SrcAS = Op->getType()->getPointerAddressSpace(); + if (TM.isNoopAddrSpaceCast(SrcAS, DstAS)) + return lowerConstant(Op); + + // Fallthrough to error. + LLVM_FALLTHROUGH; + } default: { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a @@ -2345,7 +2460,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // // If the pointer is larger than the resultant integer, then // as with Trunc just depend on the assembler to truncate it. - if (DL.getTypeAllocSize(Ty) <= DL.getTypeAllocSize(Op->getType())) + if (DL.getTypeAllocSize(Ty).getFixedSize() <= + DL.getTypeAllocSize(Op->getType()).getFixedSize()) return OpExpr; // Otherwise the pointer is smaller than the resultant integer, mask off @@ -2359,18 +2475,25 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { case Instruction::Sub: { GlobalValue *LHSGV; APInt LHSOffset; + DSOLocalEquivalent *DSOEquiv; if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHSGV, LHSOffset, - getDataLayout())) { + getDataLayout(), &DSOEquiv)) { GlobalValue *RHSGV; APInt RHSOffset; if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset, getDataLayout())) { const MCExpr *RelocExpr = getObjFileLowering().lowerRelativeReference(LHSGV, RHSGV, TM); - if (!RelocExpr) + if (!RelocExpr) { + const MCExpr *LHSExpr = + MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx); + if (DSOEquiv && + getObjFileLowering().supportDSOLocalEquivalentLowering()) + LHSExpr = + getObjFileLowering().lowerDSOLocalEquivalent(DSOEquiv, TM); RelocExpr = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx), - MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx); + LHSExpr, MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx); + } int64_t Addend = (LHSOffset - RHSOffset).getSExtValue(); if (Addend != 0) RelocExpr = MCBinaryExpr::createAdd( @@ -2779,7 +2902,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { const uint64_t StoreSize = DL.getTypeStoreSize(CV->getType()); - if (StoreSize < 8) { + if (StoreSize <= 8) { if (AP.isVerbose()) AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", CI->getZExtValue()); @@ -3001,7 +3124,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, OS.indent(Loop->getLoopDepth()*2-2); OS << "This "; - if (Loop->empty()) + if (Loop->isInnermost()) OS << "Inner "; OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n'; @@ -3025,6 +3148,16 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { if (Alignment != Align(1)) emitAlignment(Alignment); + // Switch to a new section if this basic block must begin a section. The + // entry block is always placed in the function section and is handled + // separately. + if (MBB.isBeginSection() && !MBB.isEntryBlock()) { + OutStreamer->SwitchSection( + getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(), + MBB, TM)); + CurrentSectionBeginSym = MBB.getSymbol(); + } + // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label // here, because multiple LLVM BB's may have been RAUW'd to this block after @@ -3055,33 +3188,25 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { emitBasicBlockLoopComments(MBB, MLI, *this); } - if (MBB.pred_empty() || - (!MF->hasBBLabels() && isBlockOnlyReachableByFallthrough(&MBB) && - !MBB.isEHFuncletEntry() && !MBB.hasLabelMustBeEmitted())) { + // Print the main label for the block. + if (shouldEmitLabelForBasicBlock(MBB)) { + if (isVerbose() && MBB.hasLabelMustBeEmitted()) + OutStreamer->AddComment("Label of block must be emitted"); + OutStreamer->emitLabel(MBB.getSymbol()); + } else { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":", false); } - } else { - if (isVerbose() && MBB.hasLabelMustBeEmitted()) { - OutStreamer->AddComment("Label of block must be emitted"); - } - auto *BBSymbol = MBB.getSymbol(); - // Switch to a new section if this basic block must begin a section. - if (MBB.isBeginSection()) { - OutStreamer->SwitchSection( - getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(), - MBB, TM)); - CurrentSectionBeginSym = BBSymbol; - } - OutStreamer->emitLabel(BBSymbol); - // With BB sections, each basic block must handle CFI information on its own - // if it begins a section. - if (MBB.isBeginSection()) - for (const HandlerInfo &HI : Handlers) - HI.Handler->beginBasicBlock(MBB); } + + // With BB sections, each basic block must handle CFI information on its own + // if it begins a section (Entry block is handled separately by + // AsmPrinterHandler::beginFunction). + if (MBB.isBeginSection() && !MBB.isEntryBlock()) + for (const HandlerInfo &HI : Handlers) + HI.Handler->beginBasicBlock(MBB); } void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { @@ -3113,15 +3238,26 @@ void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility, OutStreamer->emitSymbolAttribute(Sym, Attr); } +bool AsmPrinter::shouldEmitLabelForBasicBlock( + const MachineBasicBlock &MBB) const { + // With `-fbasic-block-sections=`, a label is needed for every non-entry block + // in the labels mode (option `=labels`) and every section beginning in the + // sections mode (`=all` and `=list=`). + if ((MF->hasBBLabels() || MBB.isBeginSection()) && !MBB.isEntryBlock()) + return true; + // A label is needed for any block with at least one predecessor (when that + // predecessor is not the fallthrough predecessor, or if it is an EH funclet + // entry, or if a label is forced). + return !MBB.pred_empty() && + (!isBlockOnlyReachableByFallthrough(&MBB) || MBB.isEHFuncletEntry() || + MBB.hasLabelMustBeEmitted()); +} + /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. bool AsmPrinter:: isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { - // With BasicBlock Sections, beginning of the section is not a fallthrough. - if (MBB->isBeginSection()) - return false; - // If this is a landing pad, it isn't a fall through. If it has no preds, // then nothing falls through to it. if (MBB->isEHPad() || MBB->pred_empty()) @@ -3232,14 +3368,10 @@ void AsmPrinter::emitXRayTable() { MCSection *InstMap = nullptr; MCSection *FnSledIndex = nullptr; const Triple &TT = TM.getTargetTriple(); - // Use PC-relative addresses on all targets except MIPS (MIPS64 cannot use - // PC-relative addresses because R_MIPS_PC64 does not exist). - bool PCRel = !TT.isMIPS(); + // Use PC-relative addresses on all targets. if (TT.isOSBinFormatELF()) { auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym); auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; - if (!PCRel) - Flags |= ELF::SHF_WRITE; StringRef GroupName; if (F.hasComdat()) { Flags |= ELF::SHF_GROUP; @@ -3273,25 +3405,20 @@ void AsmPrinter::emitXRayTable() { OutStreamer->SwitchSection(InstMap); OutStreamer->emitLabel(SledsStart); for (const auto &Sled : Sleds) { - if (PCRel) { - MCSymbol *Dot = Ctx.createTempSymbol(); - OutStreamer->emitLabel(Dot); - OutStreamer->emitValueImpl( - MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx), - MCSymbolRefExpr::create(Dot, Ctx), Ctx), - WordSizeBytes); - OutStreamer->emitValueImpl( - MCBinaryExpr::createSub( - MCSymbolRefExpr::create(CurrentFnBegin, Ctx), - MCBinaryExpr::createAdd( - MCSymbolRefExpr::create(Dot, Ctx), - MCConstantExpr::create(WordSizeBytes, Ctx), Ctx), - Ctx), - WordSizeBytes); - } else { - OutStreamer->emitSymbolValue(Sled.Sled, WordSizeBytes); - OutStreamer->emitSymbolValue(CurrentFnSym, WordSizeBytes); - } + MCSymbol *Dot = Ctx.createTempSymbol(); + OutStreamer->emitLabel(Dot); + OutStreamer->emitValueImpl( + MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx), + MCSymbolRefExpr::create(Dot, Ctx), Ctx), + WordSizeBytes); + OutStreamer->emitValueImpl( + MCBinaryExpr::createSub( + MCSymbolRefExpr::create(CurrentFnBegin, Ctx), + MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Dot, Ctx), + MCConstantExpr::create(WordSizeBytes, Ctx), + Ctx), + Ctx), + WordSizeBytes); Sled.emit(WordSizeBytes, OutStreamer.get()); } MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true); @@ -3366,3 +3493,17 @@ uint16_t AsmPrinter::getDwarfVersion() const { void AsmPrinter::setDwarfVersion(uint16_t Version) { OutStreamer->getContext().setDwarfVersion(Version); } + +bool AsmPrinter::isDwarf64() const { + return OutStreamer->getContext().getDwarfFormat() == dwarf::DWARF64; +} + +unsigned int AsmPrinter::getDwarfOffsetByteSize() const { + return dwarf::getDwarfOffsetByteSize( + OutStreamer->getContext().getDwarfFormat()); +} + +unsigned int AsmPrinter::getUnitLengthFieldByteSize() const { + return dwarf::getUnitLengthFieldByteSize( + OutStreamer->getContext().getDwarfFormat()); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index d81a9be26d39..c6e43445e7d0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include <cstdint> using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -97,6 +98,12 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) { case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8 : return "indirect pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel | + dwarf::DW_EH_PE_sdata4: + return "indirect datarel sdata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel | + dwarf::DW_EH_PE_sdata8: + return "indirect datarel sdata8"; } return "<unknown encoding>"; @@ -137,8 +144,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { } } -void AsmPrinter::emitTTypeReference(const GlobalValue *GV, - unsigned Encoding) const { +void AsmPrinter::emitTTypeReference(const GlobalValue *GV, unsigned Encoding) { if (GV) { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); @@ -154,19 +160,22 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, if (!ForceOffset) { // On COFF targets, we have to emit the special .secrel32 directive. if (MAI->needsDwarfSectionOffsetDirective()) { + assert(!isDwarf64() && + "emitting DWARF64 is not implemented for COFF targets"); OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0); return; } // If the format uses relocations with dwarf, refer to the symbol directly. if (MAI->doesDwarfUseRelocationsAcrossSections()) { - OutStreamer->emitSymbolValue(Label, 4); + OutStreamer->emitSymbolValue(Label, getDwarfOffsetByteSize()); return; } } // Otherwise, emit it as a label difference from the start of the section. - emitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); + emitLabelDifference(Label, Label->getSection().getBeginSymbol(), + getDwarfOffsetByteSize()); } void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { @@ -177,12 +186,38 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { } // Just emit the offset directly; no need for symbol math. - emitInt32(S.Offset); + OutStreamer->emitIntValue(S.Offset, getDwarfOffsetByteSize()); } void AsmPrinter::emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { - // TODO: Support DWARF64 - emitLabelPlusOffset(Label, Offset, 4); + emitLabelPlusOffset(Label, Offset, getDwarfOffsetByteSize()); +} + +void AsmPrinter::emitDwarfLengthOrOffset(uint64_t Value) const { + assert(isDwarf64() || Value <= UINT32_MAX); + OutStreamer->emitIntValue(Value, getDwarfOffsetByteSize()); +} + +void AsmPrinter::maybeEmitDwarf64Mark() const { + if (!isDwarf64()) + return; + OutStreamer->AddComment("DWARF64 Mark"); + OutStreamer->emitInt32(dwarf::DW_LENGTH_DWARF64); +} + +void AsmPrinter::emitDwarfUnitLength(uint64_t Length, + const Twine &Comment) const { + assert(isDwarf64() || Length <= dwarf::DW_LENGTH_lo_reserved); + maybeEmitDwarf64Mark(); + OutStreamer->AddComment(Comment); + OutStreamer->emitIntValue(Length, getDwarfOffsetByteSize()); +} + +void AsmPrinter::emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo, + const Twine &Comment) const { + maybeEmitDwarf64Mark(); + OutStreamer->AddComment(Comment); + OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, getDwarfOffsetByteSize()); } void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo, @@ -241,6 +276,7 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { OutStreamer->emitCFIGnuArgsSize(Inst.getOffset()); break; case MCCFIInstruction::OpEscape: + OutStreamer->AddComment(Inst.getComment()); OutStreamer->emitCFIEscape(Inst.getValues()); break; case MCCFIInstruction::OpRestore: diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 538107cecd8b..4a67b0bc2c4d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -146,6 +147,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, // we only need MCInstrInfo for asm parsing. We create one unconditionally // because it's not subtarget dependent. std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo()); + assert(MII && "Failed to create instruction info"); std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser( STI, *Parser, *MII, MCOptions)); if (!TAP) @@ -232,7 +234,8 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, const char *IDStart = LastEmitted; const char *IDEnd = IDStart; - while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + while (isDigit(*IDEnd)) + ++IDEnd; unsigned Val; if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) @@ -397,7 +400,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, const char *IDStart = LastEmitted; const char *IDEnd = IDStart; - while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + while (isDigit(*IDEnd)) + ++IDEnd; unsigned Val; if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) @@ -547,22 +551,23 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); // Emit warnings if we use reserved registers on the clobber list, as - // that might give surprising results. - std::vector<std::string> RestrRegs; + // that might lead to undefined behaviour. + SmallVector<Register, 8> RestrRegs; + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // Start with the first operand descriptor, and iterate over them. for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands(); I < NumOps; ++I) { const MachineOperand &MO = MI->getOperand(I); - if (MO.isImm()) { - unsigned Flags = MO.getImm(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber && - !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) { - RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg())); - } - // Skip to one before the next operand descriptor, if it exists. - I += InlineAsm::getNumOperandRegisters(Flags); + if (!MO.isImm()) + continue; + unsigned Flags = MO.getImm(); + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber) { + Register Reg = MI->getOperand(I + 1).getReg(); + if (!TRI->isAsmClobberable(*MF, Reg)) + RestrRegs.push_back(Reg); } + // Skip to one before the next operand descriptor, if it exists. + I += InlineAsm::getNumOperandRegisters(Flags); } if (!RestrRegs.empty()) { @@ -572,14 +577,15 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin()); std::string Msg = "inline asm clobber list contains reserved registers: "; - for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) { + for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; ++I) { if(I != RestrRegs.begin()) Msg += ", "; - Msg += *I; + Msg += TRI->getName(*I); } - std::string Note = "Reserved registers on the clobber list may not be " - "preserved across the asm statement, and clobbering them may " - "lead to undefined behaviour."; + const char *Note = + "Reserved registers on the clobber list may not be " + "preserved across the asm statement, and clobbering them may " + "lead to undefined behaviour."; SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg); SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); } diff --git a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index 90929a217368..5e7db1f2f76c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -29,7 +29,7 @@ class ByteStreamer { public: // For now we're just handling the calls we need for dwarf emission/hashing. - virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0; + virtual void emitInt8(uint8_t Byte, const Twine &Comment = "") = 0; virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0; @@ -41,7 +41,7 @@ private: public: APByteStreamer(AsmPrinter &Asm) : AP(Asm) {} - void EmitInt8(uint8_t Byte, const Twine &Comment) override { + void emitInt8(uint8_t Byte, const Twine &Comment) override { AP.OutStreamer->AddComment(Comment); AP.emitInt8(Byte); } @@ -61,7 +61,7 @@ class HashingByteStreamer final : public ByteStreamer { DIEHash &Hash; public: HashingByteStreamer(DIEHash &H) : Hash(H) {} - void EmitInt8(uint8_t Byte, const Twine &Comment) override { + void emitInt8(uint8_t Byte, const Twine &Comment) override { Hash.update(Byte); } void emitSLEB128(uint64_t DWord, const Twine &Comment) override { @@ -88,7 +88,7 @@ public: std::vector<std::string> &Comments, bool GenerateComments) : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) { } - void EmitInt8(uint8_t Byte, const Twine &Comment) override { + void emitInt8(uint8_t Byte, const Twine &Comment) override { Buffer.push_back(Byte); if (GenerateComments) Comments.push_back(Comment.str()); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 3f053c7a38c7..b15e750aaf85 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -13,15 +13,10 @@ #include "CodeViewDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/Triple.h" @@ -40,7 +35,6 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h" #include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" @@ -48,14 +42,12 @@ #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeTableCollection.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -71,7 +63,6 @@ #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -85,12 +76,8 @@ #include <cassert> #include <cctype> #include <cstddef> -#include <cstdint> #include <iterator> #include <limits> -#include <string> -#include <utility> -#include <vector> using namespace llvm; using namespace llvm::codeview; @@ -139,7 +126,9 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) { case Triple::ArchType::x86_64: return CPUType::X64; case Triple::ArchType::thumb: - return CPUType::Thumb; + // LLVM currently doesn't support Windows CE and so thumb + // here is indiscriminately mapped to ARMNT specifically. + return CPUType::ARMNT; case Triple::ArchType::aarch64: return CPUType::ARM64; default: @@ -148,28 +137,7 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) { } CodeViewDebug::CodeViewDebug(AsmPrinter *AP) - : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) { - // If module doesn't have named metadata anchors or COFF debug section - // is not available, skip any debug info related stuff. - if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || - !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) { - Asm = nullptr; - MMI->setDebugInfoAvailability(false); - return; - } - // Tell MMI that we have debug info. - MMI->setDebugInfoAvailability(true); - - TheCPU = - mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch()); - - collectGlobalVariableInfo(); - - // Check if we should emit type record hashes. - ConstantInt *GH = mdconst::extract_or_null<ConstantInt>( - MMI->getModule()->getModuleFlag("CodeViewGHash")); - EmitDebugGlobalHashes = GH && !GH->isZero(); -} + : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {} StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { std::string &Filepath = FileToFilepathMap[File]; @@ -507,8 +475,7 @@ void CodeViewDebug::recordLocalVariable(LocalVariable &&Var, static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs, const DILocation *Loc) { - auto B = Locs.begin(), E = Locs.end(); - if (std::find(B, E, Loc) == E) + if (!llvm::is_contained(Locs, Loc)) Locs.push_back(Loc); } @@ -574,12 +541,31 @@ void CodeViewDebug::emitCodeViewMagicVersion() { OS.emitInt32(COFF::DEBUG_SECTION_MAGIC); } +void CodeViewDebug::beginModule(Module *M) { + // If module doesn't have named metadata anchors or COFF debug section + // is not available, skip any debug info related stuff. + if (!M->getNamedMetadata("llvm.dbg.cu") || + !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) { + Asm = nullptr; + return; + } + // Tell MMI that we have and need debug info. + MMI->setDebugInfoAvailability(true); + + TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch()); + + collectGlobalVariableInfo(); + + // Check if we should emit type record hashes. + ConstantInt *GH = + mdconst::extract_or_null<ConstantInt>(M->getModuleFlag("CodeViewGHash")); + EmitDebugGlobalHashes = GH && !GH->isZero(); +} + void CodeViewDebug::endModule() { if (!Asm || !MMI->hasDebugInfo()) return; - assert(Asm != nullptr); - // The COFF .debug$S section consists of several subsections, each starting // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length // of the payload followed by the payload itself. The subsections are 4-byte @@ -600,13 +586,18 @@ void CodeViewDebug::endModule() { if (!P.first->isDeclarationForLinker()) emitDebugInfoForFunction(P.first, *P.second); - // Emit global variable debug information. - setCurrentSubprogram(nullptr); - emitDebugInfoForGlobals(); + // Get types used by globals without emitting anything. + // This is meant to collect all static const data members so they can be + // emitted as globals. + collectDebugInfoForGlobals(); // Emit retained types. emitDebugInfoForRetainedTypes(); + // Emit global variable debug information. + setCurrentSubprogram(nullptr); + emitDebugInfoForGlobals(); + // Switch back to the generic .debug$S section after potentially processing // comdat symbol sections. switchToDebugSectionForSymbol(nullptr); @@ -1195,12 +1186,15 @@ void CodeViewDebug::collectVariableInfoFromMFTable( // Get the frame register used and the offset. Register FrameReg; - int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg); + StackOffset FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg); uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg); + assert(!FrameOffset.getScalable() && + "Frame offsets with a scalable component are not supported"); + // Calculate the label ranges. LocalVarDefRange DefRange = - createDefRangeMem(CVReg, FrameOffset + ExprOffset); + createDefRangeMem(CVReg, FrameOffset.getFixed() + ExprOffset); for (const InsnRange &Range : Scope->getRanges()) { const MCSymbol *Begin = getLabelBeforeInsn(Range.first); @@ -1592,11 +1586,16 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { assert(Element->getTag() == dwarf::DW_TAG_subrange_type); const DISubrange *Subrange = cast<DISubrange>(Element); - assert(!Subrange->getRawLowerBound() && - "codeview doesn't support subranges with lower bounds"); int64_t Count = -1; - if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>()) - Count = CI->getSExtValue(); + // Calculate the count if either LowerBound is absent or is zero and + // either of Count or UpperBound are constant. + auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>(); + if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) { + if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>()) + Count = CI->getSExtValue(); + else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt*>()) + Count = UI->getSExtValue() + 1; // LowerBound is zero + } // Forward declarations of arrays without a size and VLAs use a count of -1. // Emit a count of zero in these cases to match what MSVC does for arrays @@ -2150,6 +2149,15 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info, const DIDerivedType *DDTy) { if (!DDTy->getName().empty()) { Info.Members.push_back({DDTy, 0}); + + // Collect static const data members with values. + if ((DDTy->getFlags() & DINode::FlagStaticMember) == + DINode::FlagStaticMember) { + if (DDTy->getConstant() && (isa<ConstantInt>(DDTy->getConstant()) || + isa<ConstantFP>(DDTy->getConstant()))) + StaticConstMembers.push_back(DDTy); + } + return; } @@ -3052,15 +3060,32 @@ void CodeViewDebug::collectGlobalVariableInfo() { } } +void CodeViewDebug::collectDebugInfoForGlobals() { + for (const CVGlobalVariable &CVGV : GlobalVariables) { + const DIGlobalVariable *DIGV = CVGV.DIGV; + const DIScope *Scope = DIGV->getScope(); + getCompleteTypeIndex(DIGV->getType()); + getFullyQualifiedName(Scope, DIGV->getName()); + } + + for (const CVGlobalVariable &CVGV : ComdatVariables) { + const DIGlobalVariable *DIGV = CVGV.DIGV; + const DIScope *Scope = DIGV->getScope(); + getCompleteTypeIndex(DIGV->getType()); + getFullyQualifiedName(Scope, DIGV->getName()); + } +} + void CodeViewDebug::emitDebugInfoForGlobals() { // First, emit all globals that are not in a comdat in a single symbol // substream. MSVC doesn't like it if the substream is empty, so only open // it if we have at least one global to emit. switchToDebugSectionForSymbol(nullptr); - if (!GlobalVariables.empty()) { + if (!GlobalVariables.empty() || !StaticConstMembers.empty()) { OS.AddComment("Symbol subsection for globals"); MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); emitGlobalVariableList(GlobalVariables); + emitStaticConstMemberList(); endCVSubsection(EndLabel); } @@ -3099,6 +3124,61 @@ void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) { } } +void CodeViewDebug::emitStaticConstMemberList() { + for (const DIDerivedType *DTy : StaticConstMembers) { + const DIScope *Scope = DTy->getScope(); + + APSInt Value; + if (const ConstantInt *CI = + dyn_cast_or_null<ConstantInt>(DTy->getConstant())) + Value = APSInt(CI->getValue(), + DebugHandlerBase::isUnsignedDIType(DTy->getBaseType())); + else if (const ConstantFP *CFP = + dyn_cast_or_null<ConstantFP>(DTy->getConstant())) + Value = APSInt(CFP->getValueAPF().bitcastToAPInt(), true); + else + llvm_unreachable("cannot emit a constant without a value"); + + std::string QualifiedName = getFullyQualifiedName(Scope, DTy->getName()); + + MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT); + OS.AddComment("Type"); + OS.emitInt32(getTypeIndex(DTy->getBaseType()).getIndex()); + OS.AddComment("Value"); + + // Encoded integers shouldn't need more than 10 bytes. + uint8_t Data[10]; + BinaryStreamWriter Writer(Data, llvm::support::endianness::little); + CodeViewRecordIO IO(Writer); + cantFail(IO.mapEncodedInteger(Value)); + StringRef SRef((char *)Data, Writer.getOffset()); + OS.emitBinaryData(SRef); + + OS.AddComment("Name"); + emitNullTerminatedSymbolName(OS, QualifiedName); + endSymbolRecord(SConstantEnd); + } +} + +static bool isFloatDIType(const DIType *Ty) { + if (isa<DICompositeType>(Ty)) + return false; + + if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { + dwarf::Tag T = (dwarf::Tag)Ty->getTag(); + if (T == dwarf::DW_TAG_pointer_type || + T == dwarf::DW_TAG_ptr_to_member_type || + T == dwarf::DW_TAG_reference_type || + T == dwarf::DW_TAG_rvalue_reference_type) + return false; + assert(DTy->getBaseType() && "Expected valid base type"); + return isFloatDIType(DTy->getBaseType()); + } + + auto *BTy = cast<DIBasicType>(Ty); + return (BTy->getEncoding() == dwarf::DW_ATE_float); +} + void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { const DIGlobalVariable *DIGV = CVGV.DIGV; @@ -3134,7 +3214,12 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>(); assert(DIE->isConstant() && "Global constant variables must contain a constant expression."); - uint64_t Val = DIE->getElement(1); + + // Use unsigned for floats. + bool isUnsigned = isFloatDIType(DIGV->getType()) + ? true + : DebugHandlerBase::isUnsignedDIType(DIGV->getType()); + APSInt Value(APInt(/*BitWidth=*/64, DIE->getElement(1)), isUnsigned); MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT); OS.AddComment("Type"); @@ -3145,7 +3230,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { uint8_t data[10]; BinaryStreamWriter Writer(data, llvm::support::endianness::little); CodeViewRecordIO IO(Writer); - cantFail(IO.mapEncodedInteger(Val)); + cantFail(IO.mapEncodedInteger(Value)); StringRef SRef((char *)data, Writer.getOffset()); OS.emitBinaryData(SRef); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 82f0293874d0..9eee5492bc81 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -203,6 +203,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { // Array of non-COMDAT global variables. SmallVector<CVGlobalVariable, 1> GlobalVariables; + /// List of static const data members to be emitted as S_CONSTANTs. + SmallVector<const DIDerivedType *, 4> StaticConstMembers; + /// The set of comdat .debug$S sections that we've seen so far. Each section /// must start with a magic version number that must only be emitted once. /// This set tracks which sections we've already opened. @@ -227,10 +230,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void calculateRanges(LocalVariable &Var, const DbgValueHistoryMap::Entries &Entries); - static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children, - const FunctionInfo &FI, - const InlineSite &Site); - /// Remember some debug info about each function. Keep it in a stable order to /// emit at the end of the TU. MapVector<const Function *, std::unique_ptr<FunctionInfo>> FnDebugInfo; @@ -313,9 +312,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitDebugInfoForUDTs( const std::vector<std::pair<std::string, const DIType *>> &UDTs); + void collectDebugInfoForGlobals(); void emitDebugInfoForGlobals(); void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals); void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV); + void emitStaticConstMemberList(); /// Opens a subsection of the given kind in a .debug$S codeview section. /// Returns an end label for use with endCVSubsection when the subsection is @@ -464,6 +465,8 @@ protected: public: CodeViewDebug(AsmPrinter *AP); + void beginModule(Module *M) override; + void setSymbolSize(const MCSymbol *, uint64_t) override {} /// Emit the COFF section that holds the line table information. diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index edf82fbed650..39b0b027c765 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -194,7 +194,7 @@ DIEAbbrev DIE::generateAbbrev() const { return Abbrev; } -unsigned DIE::getDebugSectionOffset() const { +uint64_t DIE::getDebugSectionOffset() const { const DIEUnit *Unit = getUnit(); assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset"); return Unit->getDebugSectionOffset() + getOffset(); @@ -313,10 +313,8 @@ unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP, //===----------------------------------------------------------------------===// // DIEUnit Implementation //===----------------------------------------------------------------------===// -DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag) - : Die(UnitTag), Section(nullptr), Offset(0), Length(0), Version(V), - AddrSize(A) -{ +DIEUnit::DIEUnit(dwarf::Tag UnitTag) + : Die(UnitTag), Section(nullptr), Offset(0) { Die.Owner = this; assert((UnitTag == dwarf::DW_TAG_compile_unit || UnitTag == dwarf::DW_TAG_skeleton_unit || @@ -430,10 +428,10 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of integer value in bytes. /// unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - dwarf::FormParams Params = {0, 0, dwarf::DWARF32}; - if (AP) - Params = {AP->getDwarfVersion(), uint8_t(AP->getPointerSize()), - AP->OutStreamer->getContext().getDwarfFormat()}; + assert(AP && "AsmPrinter is required to set FormParams"); + dwarf::FormParams Params = {AP->getDwarfVersion(), + uint8_t(AP->getPointerSize()), + AP->OutStreamer->getContext().getDwarfFormat()}; if (Optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, Params)) return *FixedSize; @@ -472,10 +470,16 @@ void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { /// SizeOf - Determine size of expression value in bytes. /// unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - if (Form == dwarf::DW_FORM_data4) return 4; - if (Form == dwarf::DW_FORM_sec_offset) return 4; - if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getPointerSize(); + switch (Form) { + case dwarf::DW_FORM_data4: + return 4; + case dwarf::DW_FORM_data8: + return 8; + case dwarf::DW_FORM_sec_offset: + return AP->getDwarfOffsetByteSize(); + default: + llvm_unreachable("DIE Value form not supported yet"); + } } LLVM_DUMP_METHOD @@ -488,19 +492,26 @@ void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; } /// EmitValue - Emit label value. /// void DIELabel::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->emitLabelReference( - Label, SizeOf(AP, Form), - Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset || - Form == dwarf::DW_FORM_ref_addr || Form == dwarf::DW_FORM_data4); + bool IsSectionRelative = Form != dwarf::DW_FORM_addr; + AP->emitLabelReference(Label, SizeOf(AP, Form), IsSectionRelative); } /// SizeOf - Determine size of label value in bytes. /// unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - if (Form == dwarf::DW_FORM_data4) return 4; - if (Form == dwarf::DW_FORM_sec_offset) return 4; - if (Form == dwarf::DW_FORM_strp) return 4; - return AP->MAI->getCodePointerSize(); + switch (Form) { + case dwarf::DW_FORM_data4: + return 4; + case dwarf::DW_FORM_data8: + return 8; + case dwarf::DW_FORM_sec_offset: + case dwarf::DW_FORM_strp: + return AP->getDwarfOffsetByteSize(); + case dwarf::DW_FORM_addr: + return AP->MAI->getCodePointerSize(); + default: + llvm_unreachable("DIE Value form not supported yet"); + } } LLVM_DUMP_METHOD @@ -536,10 +547,16 @@ void DIEDelta::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { /// SizeOf - Determine size of delta value in bytes. /// unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - if (Form == dwarf::DW_FORM_data4) return 4; - if (Form == dwarf::DW_FORM_sec_offset) return 4; - if (Form == dwarf::DW_FORM_strp) return 4; - return AP->MAI->getCodePointerSize(); + switch (Form) { + case dwarf::DW_FORM_data4: + return 4; + case dwarf::DW_FORM_data8: + return 8; + case dwarf::DW_FORM_sec_offset: + return AP->getDwarfOffsetByteSize(); + default: + llvm_unreachable("DIE Value form not supported yet"); + } } LLVM_DUMP_METHOD @@ -645,7 +662,7 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref_addr: { // Get the absolute offset for this DIE within the debug info/types section. - unsigned Addr = Entry->getDebugSectionOffset(); + uint64_t Addr = Entry->getDebugSectionOffset(); if (const MCSymbol *SectionSym = Entry->getUnit()->getCrossSectionRelativeBaseAddress()) { AP->emitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); @@ -802,13 +819,24 @@ void DIEBlock::print(raw_ostream &O) const { //===----------------------------------------------------------------------===// unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - if (Form == dwarf::DW_FORM_loclistx) + switch (Form) { + case dwarf::DW_FORM_loclistx: return getULEB128Size(Index); - if (Form == dwarf::DW_FORM_data4) - return 4; - if (Form == dwarf::DW_FORM_sec_offset) + case dwarf::DW_FORM_data4: + assert(!AP->isDwarf64() && + "DW_FORM_data4 is not suitable to emit a pointer to a location list " + "in the 64-bit DWARF format"); return 4; - return AP->MAI->getCodePointerSize(); + case dwarf::DW_FORM_data8: + assert(AP->isDwarf64() && + "DW_FORM_data8 is not suitable to emit a pointer to a location list " + "in the 32-bit DWARF format"); + return 8; + case dwarf::DW_FORM_sec_offset: + return AP->getDwarfOffsetByteSize(); + default: + llvm_unreachable("DIE Value form not supported yet"); + } } /// EmitValue - Emit label value. diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index f26ef63eedec..da9997efc01f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -12,6 +12,7 @@ #include "DIEHash.h" #include "ByteStreamer.h" +#include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" @@ -214,7 +215,15 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, // all of the data is going to be added as integers. void DIEHash::hashBlockData(const DIE::const_value_range &Values) { for (const auto &V : Values) - Hash.update((uint64_t)V.getDIEInteger().getValue()); + if (V.getType() == DIEValue::isBaseTypeRef) { + const DIE &C = + *CU->ExprRefedBaseTypes[V.getDIEBaseTypeRef().getIndex()].Die; + StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name); + assert(!Name.empty() && + "Base types referenced from DW_OP_convert should have a name"); + hashNestedType(C, Name); + } else + Hash.update((uint64_t)V.getDIEInteger().getValue()); } // Hash the contents of a loclistptr class. diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 1a69f6772873..29e1da4c5d60 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -31,7 +31,8 @@ class DIEHash { }; public: - DIEHash(AsmPrinter *A = nullptr) : AP(A) {} + DIEHash(AsmPrinter *A = nullptr, DwarfCompileUnit *CU = nullptr) + : AP(A), CU(CU) {} /// Computes the CU signature. uint64_t computeCUSignature(StringRef DWOName, const DIE &Die); @@ -101,6 +102,7 @@ private: private: MD5 Hash; AsmPrinter *AP; + DwarfCompileUnit *CU; DenseMap<const DIE *, unsigned> Numbering; }; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 584b7614915d..1c9131edab83 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -8,9 +8,11 @@ #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -51,6 +53,37 @@ static Register isDescribedByReg(const MachineInstr &MI) { : Register(); } +void InstructionOrdering::initialize(const MachineFunction &MF) { + // We give meta instructions the same ordinal as the preceding instruction + // because this class is written for the task of comparing positions of + // variable location ranges against scope ranges. To reflect what we'll see + // in the binary, when we look at location ranges we must consider all + // DBG_VALUEs between two real instructions at the same position. And a + // scope range which ends on a meta instruction should be considered to end + // at the last seen real instruction. E.g. + // + // 1 instruction p Both the variable location for x and for y start + // 1 DBG_VALUE for "x" after instruction p so we give them all the same + // 1 DBG_VALUE for "y" number. If a scope range ends at DBG_VALUE for "y", + // 2 instruction q we should treat it as ending after instruction p + // because it will be the last real instruction in the + // range. DBG_VALUEs at or after this position for + // variables declared in the scope will have no effect. + clear(); + unsigned Position = 0; + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB) + InstNumberMap[&MI] = MI.isMetaInstruction() ? Position : ++Position; +} + +bool InstructionOrdering::isBefore(const MachineInstr *A, + const MachineInstr *B) const { + assert(A->getParent() && B->getParent() && "Operands must have a parent"); + assert(A->getMF() == B->getMF() && + "Operands must be in the same MachineFunction"); + return InstNumberMap.lookup(A) < InstNumberMap.lookup(B); +} + bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var, const MachineInstr &MI, EntryIndex &NewIndex) { @@ -90,6 +123,156 @@ void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) { EndIndex = Index; } +/// Check if the instruction range [StartMI, EndMI] intersects any instruction +/// range in Ranges. EndMI can be nullptr to indicate that the range is +/// unbounded. Assumes Ranges is ordered and disjoint. Returns true and points +/// to the first intersecting scope range if one exists. +static Optional<ArrayRef<InsnRange>::iterator> +intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, + const ArrayRef<InsnRange> &Ranges, + const InstructionOrdering &Ordering) { + for (auto RangesI = Ranges.begin(), RangesE = Ranges.end(); + RangesI != RangesE; ++RangesI) { + if (EndMI && Ordering.isBefore(EndMI, RangesI->first)) + return None; + if (EndMI && !Ordering.isBefore(RangesI->second, EndMI)) + return RangesI; + if (Ordering.isBefore(StartMI, RangesI->second)) + return RangesI; + } + return None; +} + +void DbgValueHistoryMap::trimLocationRanges( + const MachineFunction &MF, LexicalScopes &LScopes, + const InstructionOrdering &Ordering) { + // The indices of the entries we're going to remove for each variable. + SmallVector<EntryIndex, 4> ToRemove; + // Entry reference count for each variable. Clobbers left with no references + // will be removed. + SmallVector<int, 4> ReferenceCount; + // Entries reference other entries by index. Offsets is used to remap these + // references if any entries are removed. + SmallVector<size_t, 4> Offsets; + + for (auto &Record : VarEntries) { + auto &HistoryMapEntries = Record.second; + if (HistoryMapEntries.empty()) + continue; + + InlinedEntity Entity = Record.first; + const DILocalVariable *LocalVar = cast<DILocalVariable>(Entity.first); + + LexicalScope *Scope = nullptr; + if (const DILocation *InlinedAt = Entity.second) { + Scope = LScopes.findInlinedScope(LocalVar->getScope(), InlinedAt); + } else { + Scope = LScopes.findLexicalScope(LocalVar->getScope()); + // Ignore variables for non-inlined function level scopes. The scope + // ranges (from scope->getRanges()) will not include any instructions + // before the first one with a debug-location, which could cause us to + // incorrectly drop a location. We could introduce special casing for + // these variables, but it doesn't seem worth it because no out-of-scope + // locations have been observed for variables declared in function level + // scopes. + if (Scope && + (Scope->getScopeNode() == Scope->getScopeNode()->getSubprogram()) && + (Scope->getScopeNode() == LocalVar->getScope())) + continue; + } + + // If there is no scope for the variable then something has probably gone + // wrong. + if (!Scope) + continue; + + ToRemove.clear(); + // Zero the reference counts. + ReferenceCount.assign(HistoryMapEntries.size(), 0); + // Index of the DBG_VALUE which marks the start of the current location + // range. + EntryIndex StartIndex = 0; + ArrayRef<InsnRange> ScopeRanges(Scope->getRanges()); + for (auto EI = HistoryMapEntries.begin(), EE = HistoryMapEntries.end(); + EI != EE; ++EI, ++StartIndex) { + // Only DBG_VALUEs can open location ranges so skip anything else. + if (!EI->isDbgValue()) + continue; + + // Index of the entry which closes this range. + EntryIndex EndIndex = EI->getEndIndex(); + // If this range is closed bump the reference count of the closing entry. + if (EndIndex != NoEntry) + ReferenceCount[EndIndex] += 1; + // Skip this location range if the opening entry is still referenced. It + // may close a location range which intersects a scope range. + // TODO: We could be 'smarter' and trim these kinds of ranges such that + // they do not leak out of the scope ranges if they partially overlap. + if (ReferenceCount[StartIndex] > 0) + continue; + + const MachineInstr *StartMI = EI->getInstr(); + const MachineInstr *EndMI = EndIndex != NoEntry + ? HistoryMapEntries[EndIndex].getInstr() + : nullptr; + // Check if the location range [StartMI, EndMI] intersects with any scope + // range for the variable. + if (auto R = intersects(StartMI, EndMI, ScopeRanges, Ordering)) { + // Adjust ScopeRanges to exclude ranges which subsequent location ranges + // cannot possibly intersect. + ScopeRanges = ArrayRef<InsnRange>(R.getValue(), ScopeRanges.end()); + } else { + // If the location range does not intersect any scope range then the + // DBG_VALUE which opened this location range is usless, mark it for + // removal. + ToRemove.push_back(StartIndex); + // Because we'll be removing this entry we need to update the reference + // count of the closing entry, if one exists. + if (EndIndex != NoEntry) + ReferenceCount[EndIndex] -= 1; + } + } + + // If there is nothing to remove then jump to next variable. + if (ToRemove.empty()) + continue; + + // Mark clobbers that will no longer close any location ranges for removal. + for (size_t i = 0; i < HistoryMapEntries.size(); ++i) + if (ReferenceCount[i] <= 0 && HistoryMapEntries[i].isClobber()) + ToRemove.push_back(i); + + llvm::sort(ToRemove); + + // Build an offset map so we can update the EndIndex of the remaining + // entries. + // Zero the offsets. + Offsets.assign(HistoryMapEntries.size(), 0); + size_t CurOffset = 0; + auto ToRemoveItr = ToRemove.begin(); + for (size_t EntryIdx = *ToRemoveItr; EntryIdx < HistoryMapEntries.size(); + ++EntryIdx) { + // Check if this is an entry which will be removed. + if (ToRemoveItr != ToRemove.end() && *ToRemoveItr == EntryIdx) { + ++ToRemoveItr; + ++CurOffset; + } + Offsets[EntryIdx] = CurOffset; + } + + // Update the EndIndex of the entries to account for those which will be + // removed. + for (auto &Entry : HistoryMapEntries) + if (Entry.isClosed()) + Entry.EndIndex -= Offsets[Entry.EndIndex]; + + // Now actually remove the entries. Iterate backwards so that our remaining + // ToRemove indices are valid after each erase. + for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr) + HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr); + } +} + void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) { assert(MI.isDebugLabel() && "not a DBG_LABEL"); LabelInstr[Label] = &MI; @@ -234,7 +417,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, DbgValueHistoryMap &DbgValues, DbgLabelInstrMap &DbgLabels) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); - unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register SP = TLI->getStackPointerRegisterToSaveRestore(); Register FrameReg = TRI->getFrameRegister(*MF); RegDescribedVarsMap RegVars; DbgValueEntriesMap LiveEntries; diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 880791a06d93..68a4bfba42a7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -21,11 +21,16 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; #define DEBUG_TYPE "dwarfdebug" +/// If true, we drop variable location ranges which exist entirely outside the +/// variable's lexical scope instruction ranges. +static cl::opt<bool> TrimVarLocs("trim-var-locs", cl::Hidden, cl::init(true)); + Optional<DbgVariableLocation> DbgVariableLocation::extractFromMachineInstruction( const MachineInstr &Instruction) { @@ -86,6 +91,11 @@ DbgVariableLocation::extractFromMachineInstruction( DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} +void DebugHandlerBase::beginModule(Module *M) { + if (M->debug_compile_units().empty()) + Asm = nullptr; +} + // Each LexicalScope has first instruction and last instruction to mark // beginning and end of a scope respectively. Create an inverse map that list // scopes starts (and ends) with an instruction. One instruction may start (or @@ -153,6 +163,54 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { return getBaseTypeSize(BaseType); } +bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { + if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + // FIXME: Enums without a fixed underlying type have unknown signedness + // here, leading to incorrectly emitted constants. + if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) + return false; + + // (Pieces of) aggregate types that get hacked apart by SROA may be + // represented by a constant. Encode them as unsigned bytes. + return true; + } + + if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { + dwarf::Tag T = (dwarf::Tag)Ty->getTag(); + // Encode pointer constants as unsigned bytes. This is used at least for + // null pointer constant emission. + // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed + // here, but accept them for now due to a bug in SROA producing bogus + // dbg.values. + if (T == dwarf::DW_TAG_pointer_type || + T == dwarf::DW_TAG_ptr_to_member_type || + T == dwarf::DW_TAG_reference_type || + T == dwarf::DW_TAG_rvalue_reference_type) + return true; + assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || + T == dwarf::DW_TAG_volatile_type || + T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type); + assert(DTy->getBaseType() && "Expected valid base type"); + return isUnsignedDIType(DTy->getBaseType()); + } + + auto *BTy = cast<DIBasicType>(Ty); + unsigned Encoding = BTy->getEncoding(); + assert((Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_signed || + Encoding == dwarf::DW_ATE_signed_char || + Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || + Encoding == dwarf::DW_ATE_boolean || + (Ty->getTag() == dwarf::DW_TAG_unspecified_type && + Ty->getName() == "decltype(nullptr)")) && + "Unsupported encoding"); + return Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + Ty->getTag() == dwarf::DW_TAG_unspecified_type; +} + static bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) { if (!MMI->hasDebugInfo()) @@ -191,6 +249,9 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!"); calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), DbgValues, DbgLabels); + InstOrdering.initialize(*MF); + if (TrimVarLocs) + DbgValues.trimLocationRanges(*MF, LScopes, InstOrdering); LLVM_DEBUG(DbgValues.dump()); // Request labels for the full history. @@ -212,10 +273,16 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { // doing that violates the ranges that are calculated in the history map. // However, we currently do not emit debug values for constant arguments // directly at the start of the function, so this code is still useful. + // FIXME: If the first mention of an argument is in a unique section basic + // block, we cannot always assign the CurrentFnBeginLabel as it lies in a + // different section. Temporarily, we disable generating loc list + // information or DW_AT_const_value when the block is in a different + // section. const DILocalVariable *DIVar = Entries.front().getInstr()->getDebugVariable(); if (DIVar->isParameter() && - getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) { + getDISubprogram(DIVar->getScope())->describes(&MF->getFunction()) && + Entries.front().getInstr()->getParent()->sameSection(&MF->front())) { if (!IsDescribedByReg(Entries.front().getInstr())) LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin(); if (Entries.front().getInstr()->getDebugExpression()->isFragment()) { @@ -262,7 +329,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { } void DebugHandlerBase::beginInstruction(const MachineInstr *MI) { - if (!MMI->hasDebugInfo()) + if (!Asm || !MMI->hasDebugInfo()) return; assert(CurMI == nullptr); @@ -288,7 +355,7 @@ void DebugHandlerBase::beginInstruction(const MachineInstr *MI) { } void DebugHandlerBase::endInstruction() { - if (!MMI->hasDebugInfo()) + if (!Asm || !MMI->hasDebugInfo()) return; assert(CurMI != nullptr); @@ -320,12 +387,13 @@ void DebugHandlerBase::endInstruction() { } void DebugHandlerBase::endFunction(const MachineFunction *MF) { - if (hasDebugInfo(MMI, MF)) + if (Asm && hasDebugInfo(MMI, MF)) endFunctionImpl(MF); DbgValues.clear(); DbgLabels.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); + InstOrdering.clear(); } void DebugHandlerBase::beginBasicBlock(const MachineBasicBlock &MBB) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 11ed1062f77e..c20ac6040aef 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -81,8 +81,9 @@ void DwarfCFIException::endModule() { } } -static MCSymbol *getExceptionSym(AsmPrinter *Asm) { - return Asm->getCurExceptionSym(); +static MCSymbol *getExceptionSym(AsmPrinter *Asm, + const MachineBasicBlock *MBB) { + return Asm->getMBBExceptionSym(*MBB); } void DwarfCFIException::beginFunction(const MachineFunction *MF) { @@ -161,7 +162,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, // Provide LSDA information. if (shouldEmitLSDA) - Asm->OutStreamer->emitCFILsda(ESP(Asm), TLOF.getLSDAEncoding()); + Asm->OutStreamer->emitCFILsda(ESP(Asm, MBB), TLOF.getLSDAEncoding()); } /// endFunction - Gather and emit post-function exception information. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 296c380ae550..befc4bba19a2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -12,18 +12,12 @@ #include "DwarfCompileUnit.h" #include "AddressPool.h" -#include "DwarfDebug.h" #include "DwarfExpression.h" -#include "DwarfUnit.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" -#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" @@ -32,22 +26,16 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/Casting.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include <algorithm> -#include <cassert> -#include <cstdint> #include <iterator> -#include <memory> #include <string> #include <utility> @@ -117,7 +105,7 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) { return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None, CUID); return Asm->OutStreamer->emitDwarfFileDirective( - 0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File), + 0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File), File->getSource(), CUID); } @@ -260,7 +248,9 @@ void DwarfCompileUnit::addLocationAttribute( : dwarf::DW_OP_const8u); // 2) containing the (relocated) offset of the TLS variable // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, + addExpr(*Loc, + PointerSize == 4 ? dwarf::DW_FORM_data4 + : dwarf::DW_FORM_data8, Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); } else { addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); @@ -432,7 +422,10 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { // FIXME: duplicated from Target/WebAssembly/WebAssembly.h // don't want to depend on target specific headers in this code? const unsigned TI_GLOBAL_RELOC = 3; - if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) { + // FIXME: when writing dwo, we need to avoid relocations. Probably + // the "right" solution is to treat globals the way func and data symbols + // are (with entries in .debug_addr). + if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC && !isDwoUnit()) { // These need to be relocatable. assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far. auto SPSym = cast<MCSymbolWasm>( @@ -449,8 +442,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { true}); DIELoc *Loc = new (DIEValueAllocator) DIELoc; addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location); - addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind); - addLabel(*Loc, dwarf::DW_FORM_udata, SPSym); + addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC); + addLabel(*Loc, dwarf::DW_FORM_data4, SPSym); DD->addArangeLabel(SymbolCU(this, SPSym)); addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc); @@ -565,7 +558,12 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, void DwarfCompileUnit::attachRangesOrLowHighPC( DIE &Die, SmallVector<RangeSpan, 2> Ranges) { - if (Ranges.size() == 1 || !DD->useRangesSection()) { + assert(!Ranges.empty()); + if (!DD->useRangesSection() || + (Ranges.size() == 1 && + (!DD->alwaysUseRanges() || + DD->getSectionLabel(&Ranges.front().Begin->getSection()) == + Ranges.front().Begin))) { const RangeSpan &Front = Ranges.front(); const RangeSpan &Back = Ranges.back(); attachLowHighPC(Die, Front.Begin, Back.End); @@ -688,9 +686,9 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, // Add variable address. - unsigned Offset = DV.getDebugLocListIndex(); - if (Offset != ~0U) { - addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); + unsigned Index = DV.getDebugLocListIndex(); + if (Index != ~0U) { + addLocationList(*VariableDie, dwarf::DW_AT_location, Index); auto TagOffset = DV.getDebugLocListTagOffset(); if (TagOffset) addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1, @@ -722,6 +720,13 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, addConstantFPValue(*VariableDie, DVal->getConstantFP()); } else if (DVal->isConstantInt()) { addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType()); + } else if (DVal->isTargetIndexLocation()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + const DIBasicType *BT = dyn_cast<DIBasicType>( + static_cast<const Metadata *>(DV.getVariable()->getType())); + DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr); + addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); } return VariableDie; } @@ -737,10 +742,14 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, Register FrameReg; const DIExpression *Expr = Fragment.Expr; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); + StackOffset Offset = + TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); DwarfExpr.addFragmentOffset(Expr); + + auto *TRI = Asm->MF->getSubtarget().getRegisterInfo(); SmallVector<uint64_t, 8> Ops; - DIExpression::appendOffset(Ops, Offset); + TRI->getOffsetOpcodes(Offset, Ops); + // According to // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf // cuda-gdb requires DW_AT_address_class for all variables to be able to @@ -801,6 +810,10 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) { return Result; if (auto *DLVar = Array->getDataLocation()) Result.push_back(DLVar); + if (auto *AsVar = Array->getAssociated()) + Result.push_back(AsVar); + if (auto *AlVar = Array->getAllocated()) + Result.push_back(AlVar); for (auto *El : Array->getElements()) { if (auto *Subrange = dyn_cast<DISubrange>(El)) { if (auto Count = Subrange->getCount()) @@ -815,6 +828,19 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) { if (auto ST = Subrange->getStride()) if (auto *Dependency = ST.dyn_cast<DIVariable *>()) Result.push_back(Dependency); + } else if (auto *GenericSubrange = dyn_cast<DIGenericSubrange>(El)) { + if (auto Count = GenericSubrange->getCount()) + if (auto *Dependency = Count.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); + if (auto LB = GenericSubrange->getLowerBound()) + if (auto *Dependency = LB.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); + if (auto UB = GenericSubrange->getUpperBound()) + if (auto *Dependency = UB.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); + if (auto ST = GenericSubrange->getStride()) + if (auto *Dependency = ST.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); } } return Result; @@ -996,7 +1022,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( } bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const { - return DD->getDwarfVersion() == 4 && DD->tuneForGDB(); + return DD->getDwarfVersion() == 4 && !DD->tuneForLLDB(); } dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const { @@ -1352,11 +1378,9 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, /// Add a Dwarf loclistptr attribute data and value. void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index) { - dwarf::Form Form = dwarf::DW_FORM_data4; - if (DD->getDwarfVersion() == 4) - Form =dwarf::DW_FORM_sec_offset; - if (DD->getDwarfVersion() >= 5) - Form =dwarf::DW_FORM_loclistx; + dwarf::Form Form = (DD->getDwarfVersion() >= 5) + ? dwarf::DW_FORM_loclistx + : DD->getDwarfSectionOffsetForm(); Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index)); } @@ -1417,8 +1441,8 @@ void DwarfCompileUnit::addAddrTableBase() { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); MCSymbol *Label = DD->getAddressPool().getLabel(); addSectionLabel(getUnitDie(), - getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base - : dwarf::DW_AT_GNU_addr_base, + DD->getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base + : dwarf::DW_AT_GNU_addr_base, Label, TLOF.getDwarfAddrSection()->getBeginSymbol()); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 4ccd8c96dd0d..6d8186a5ee2b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -22,7 +22,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" -#include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/Casting.h" @@ -34,6 +33,9 @@ namespace llvm { class AsmPrinter; +class DIE; +class DIELoc; +class DIEValueList; class DwarfFile; class GlobalVariable; class MCExpr; @@ -55,7 +57,7 @@ class DwarfCompileUnit final : public DwarfUnit { DwarfCompileUnit *Skeleton = nullptr; /// The start of the unit within its section. - MCSymbol *LabelBegin; + MCSymbol *LabelBegin = nullptr; /// The start of the unit macro info within macro section. MCSymbol *MacroLabelBegin; @@ -287,8 +289,8 @@ public: return DwarfUnit::getHeaderSize() + DWOIdSize; } unsigned getLength() { - return sizeof(uint32_t) + // Length field - getHeaderSize() + getUnitDie().getSize(); + return Asm->getUnitLengthFieldByteSize() + // Length field + getHeaderSize() + getUnitDie().getSize(); } void emitHeader(bool UseOffsets) override; @@ -297,7 +299,7 @@ public: void addAddrTableBase(); MCSymbol *getLabelBegin() const { - assert(getSection()); + assert(LabelBegin && "LabelBegin is not initialized"); return LabelBegin; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 45ed5256deb9..462682743c6a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -13,30 +13,18 @@ #include "DwarfDebug.h" #include "ByteStreamer.h" #include "DIEHash.h" -#include "DebugLocEntry.h" -#include "DebugLocStream.h" #include "DwarfCompileUnit.h" #include "DwarfExpression.h" -#include "DwarfFile.h" #include "DwarfUnit.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -46,14 +34,11 @@ #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -71,15 +56,10 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include <algorithm> -#include <cassert> #include <cstddef> -#include <cstdint> #include <iterator> #include <string> -#include <utility> -#include <vector> using namespace llvm; @@ -87,18 +67,10 @@ using namespace llvm; STATISTIC(NumCSParams, "Number of dbg call site params created"); -static cl::opt<bool> -DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, - cl::desc("Disable debug info printing")); - static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier( "use-dwarf-ranges-base-address-specifier", cl::Hidden, cl::desc("Use base address specifiers in debug_ranges"), cl::init(false)); -static cl::opt<bool> EmitDwarfDebugEntryValues( - "emit-debug-entry-values", cl::Hidden, - cl::desc("Emit the debug entry values"), cl::init(false)); - static cl::opt<bool> GenerateARangeSection("generate-arange-section", cl::Hidden, cl::desc("Generate dwarf aranges"), @@ -151,6 +123,18 @@ static cl::opt<DefaultOnOff> DwarfSectionsAsReferences( clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")), cl::init(Default)); +static cl::opt<bool> + UseGNUDebugMacro("use-gnu-debug-macro", cl::Hidden, + cl::desc("Emit the GNU .debug_macro format with DWARF <5"), + cl::init(false)); + +static cl::opt<DefaultOnOff> DwarfOpConvert( + "dwarf-op-convert", cl::Hidden, + cl::desc("Enable use of the DWARFv5 DW_OP_convert operator"), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")), + cl::init(Default)); + enum LinkageNameOption { DefaultLinkageNames, AllLinkageNames, @@ -167,19 +151,23 @@ static cl::opt<LinkageNameOption> "Abstract subprograms")), cl::init(DefaultLinkageNames)); -static cl::opt<unsigned> LocationAnalysisSizeLimit( - "singlevarlocation-input-bb-limit", - cl::desc("Maximum block size to analyze for single-location variables"), - cl::init(30000), cl::Hidden); +static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option( + "minimize-addr-in-v5", cl::Hidden, + cl::desc("Always use DW_AT_ranges in DWARFv5 whenever it could allow more " + "address pool entry sharing to reduce relocations/object size"), + cl::values(clEnumValN(DwarfDebug::MinimizeAddrInV5::Default, "Default", + "Default address minimization strategy"), + clEnumValN(DwarfDebug::MinimizeAddrInV5::Ranges, "Ranges", + "Use rnglists for contiguous ranges if that allows " + "using a pre-existing base address"), + clEnumValN(DwarfDebug::MinimizeAddrInV5::Disabled, "Disabled", + "Stuff")), + cl::init(DwarfDebug::MinimizeAddrInV5::Default)); -static const char *const DWARFGroupName = "dwarf"; -static const char *const DWARFGroupDescription = "DWARF Emission"; -static const char *const DbgTimerName = "writer"; -static const char *const DbgTimerDescription = "DWARF Debug Writer"; static constexpr unsigned ULEB128PadSize = 4; void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) { - getActiveStreamer().EmitInt8( + getActiveStreamer().emitInt8( Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) : dwarf::OperationEncodingString(Op)); } @@ -193,7 +181,7 @@ void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) { } void DebugLocDwarfExpression::emitData1(uint8_t Value) { - getActiveStreamer().EmitInt8(Value, Twine(Value)); + getActiveStreamer().emitInt8(Value, Twine(Value)); } void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) { @@ -202,7 +190,7 @@ void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) { } bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, - unsigned MachineReg) { + llvm::Register MachineReg) { // This information is not available while emitting .debug_loc entries. return false; } @@ -227,7 +215,7 @@ void DebugLocDwarfExpression::commitTemporaryBuffer() { const char *Comment = (Byte.index() < TmpBuf->Comments.size()) ? TmpBuf->Comments[Byte.index()].c_str() : ""; - OutBS.EmitInt8(Byte.value(), Comment); + OutBS.emitInt8(Byte.value(), Comment); } TmpBuf->Bytes.clear(); TmpBuf->Comments.clear(); @@ -242,8 +230,8 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) { const DIExpression *Expr = MI->getDebugExpression(); assert(MI->getNumOperands() == 4); if (MI->getDebugOperand(0).isReg()) { - auto RegOp = MI->getDebugOperand(0); - auto Op1 = MI->getDebugOffset(); + const auto &RegOp = MI->getDebugOperand(0); + const auto &Op1 = MI->getDebugOffset(); // If the second operand is an immediate, this is a // register-indirect address. assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); @@ -251,7 +239,7 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) { return DbgValueLoc(Expr, MLoc); } if (MI->getDebugOperand(0).isTargetIndex()) { - auto Op = MI->getDebugOperand(0); + const auto &Op = MI->getDebugOperand(0); return DbgValueLoc(Expr, TargetIndexLocation(Op.getIndex(), Op.getOffset())); } @@ -354,7 +342,7 @@ static AccelTableKind computeAccelTableKind(unsigned DwarfVersion, return AccelTableKind::None; } -DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) +DwarfDebug::DwarfDebug(AsmPrinter *A) : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()), InfoHolder(A, "info_string", DIEValueAllocator), SkeletonHolder(A, "skel_string", DIEValueAllocator), @@ -397,6 +385,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfVersion = TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION); + bool Dwarf64 = Asm->TM.Options.MCOptions.Dwarf64 && + DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3. + TT.isArch64Bit() && // DWARF64 requires 64-bit relocations. + TT.isOSBinFormatELF(); // Support only ELF for now. + UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX(); // Use sections as references. Force for NVPTX. @@ -406,8 +399,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) UseSectionsAsReferences = DwarfSectionsAsReferences == Enable; // Don't generate type units for unsupported object file formats. - GenerateTypeUnits = - A->TM.getTargetTriple().isOSBinFormatELF() && GenerateDwarfTypeUnits; + GenerateTypeUnits = (A->TM.getTargetTriple().isOSBinFormatELF() || + A->TM.getTargetTriple().isOSBinFormatWasm()) && + GenerateDwarfTypeUnits; TheAccelTableKind = computeAccelTableKind( DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple()); @@ -430,11 +424,31 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // Emit call-site-param debug info for GDB and LLDB, if the target supports // the debug entry values feature. It can also be enabled explicitly. - EmitDebugEntryValues = (Asm->TM.Options.ShouldEmitDebugEntryValues() && - (tuneForGDB() || tuneForLLDB())) || - EmitDwarfDebugEntryValues; + EmitDebugEntryValues = Asm->TM.Options.ShouldEmitDebugEntryValues(); + + // It is unclear if the GCC .debug_macro extension is well-specified + // for split DWARF. For now, do not allow LLVM to emit it. + UseDebugMacroSection = + DwarfVersion >= 5 || (UseGNUDebugMacro && !useSplitDwarf()); + if (DwarfOpConvert == Default) + EnableOpConvert = !((tuneForGDB() && useSplitDwarf()) || (tuneForLLDB() && !TT.isOSBinFormatMachO())); + else + EnableOpConvert = (DwarfOpConvert == Enable); + + // Split DWARF would benefit object size significantly by trading reductions + // in address pool usage for slightly increased range list encodings. + if (DwarfVersion >= 5) { + MinimizeAddr = MinimizeAddrInV5Option; + // FIXME: In the future, enable this by default for Split DWARF where the + // tradeoff is more pronounced due to being able to offload the range + // lists to the dwo file and shrink object files/reduce relocations there. + if (MinimizeAddr == MinimizeAddrInV5::Default) + MinimizeAddr = MinimizeAddrInV5::Disabled; + } Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); + Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64 + : dwarf::DWARF32); } // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. @@ -583,7 +597,7 @@ static const DIExpression *combineDIExpressions(const DIExpression *Original, std::vector<uint64_t> Elts = Addition->getElements().vec(); // Avoid multiple DW_OP_stack_values. if (Original->isImplicit() && Addition->isImplicit()) - erase_if(Elts, [](uint64_t Op) { return Op == dwarf::DW_OP_stack_value; }); + erase_value(Elts, dwarf::DW_OP_stack_value); const DIExpression *CombinedExpr = (Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original; return CombinedExpr; @@ -709,11 +723,11 @@ static void interpretValues(const MachineInstr *CurMI, ForwardedRegWorklist[ParamFwdReg], Params); } else if (ParamValue->first.isReg()) { Register RegLoc = ParamValue->first.getReg(); - unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + Register SP = TLI.getStackPointerRegisterToSaveRestore(); Register FP = TRI.getFrameRegister(*MF); bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); if (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { - MachineLocation MLoc(RegLoc, /*IsIndirect=*/IsSPorFP); + MachineLocation MLoc(RegLoc, /*Indirect=*/IsSPorFP); finishCallSiteParams(MLoc, ParamValue->second, ForwardedRegWorklist[ParamFwdReg], Params); } else { @@ -797,6 +811,11 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, (void)InsertedReg; } + // Do not emit CSInfo for undef forwarding registers. + for (auto &MO : CallMI->uses()) + if (MO.isReg() && MO.isUndef()) + ForwardedRegWorklist.erase(MO.getReg()); + // We erase, from the ForwardedRegWorklist, those forwarding registers for // which we successfully describe a loaded value (by using // the describeLoadedValue()). For those remaining arguments in the working @@ -1071,9 +1090,8 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { // compilation directory. if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU) Asm->OutStreamer->emitDwarfFile0Directive( - CompilationDir, DIUnit->getFilename(), - NewCU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(), - NewCU.getUniqueID()); + CompilationDir, DIUnit->getFilename(), getMD5AsBytes(DIUnit->getFile()), + DIUnit->getSource(), NewCU.getUniqueID()); if (useSplitDwarf()) { NewCU.setSkeleton(constructSkeletonCU(NewCU)); @@ -1126,21 +1144,17 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) { // Emit all Dwarf sections that should come prior to the content. Create // global DIEs and emit initial debug info sections. This is invoked by // the target AsmPrinter. -void DwarfDebug::beginModule() { - NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName, - DWARFGroupDescription, TimePassesIsEnabled); - if (DisableDebugInfoPrinting) { - MMI->setDebugInfoAvailability(false); - return; - } +void DwarfDebug::beginModule(Module *M) { + DebugHandlerBase::beginModule(M); - const Module *M = MMI->getModule(); + if (!Asm || !MMI->hasDebugInfo()) + return; unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), M->debug_compile_units_end()); - // Tell MMI whether we have debug info. - assert(MMI->hasDebugInfo() == (NumDebugCUs > 0) && - "DebugInfoAvailabilty initialized unexpectedly"); + assert(NumDebugCUs > 0 && "Asm unexpectedly initialized"); + assert(MMI->hasDebugInfo() && + "DebugInfoAvailabilty unexpectedly not initialized"); SingleCU = NumDebugCUs == 1; DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>> GVMap; @@ -1292,7 +1306,7 @@ void DwarfDebug::finalizeModuleInfo() { Asm->TM.Options.MCOptions.SplitDwarfFile); // Emit a unique identifier for this CU. uint64_t ID = - DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie()); + DIEHash(Asm, &TheCU).computeCUSignature(DWOName, TheCU.getUnitDie()); if (getDwarfVersion() >= 5) { TheCU.setDWOId(ID); SkCU->setDWOId(ID); @@ -1353,15 +1367,18 @@ void DwarfDebug::finalizeModuleInfo() { // If compile Unit has macros, emit "DW_AT_macro_info/DW_AT_macros" // attribute. if (CUNode->getMacros()) { - if (getDwarfVersion() >= 5) { + if (UseDebugMacroSection) { if (useSplitDwarf()) TheCU.addSectionDelta( TheCU.getUnitDie(), dwarf::DW_AT_macros, U.getMacroLabelBegin(), TLOF.getDwarfMacroDWOSection()->getBeginSymbol()); - else - U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macros, - U.getMacroLabelBegin(), + else { + dwarf::Attribute MacrosAttr = getDwarfVersion() >= 5 + ? dwarf::DW_AT_macros + : dwarf::DW_AT_GNU_macros; + U.addSectionLabel(U.getUnitDie(), MacrosAttr, U.getMacroLabelBegin(), TLOF.getDwarfMacroSection()->getBeginSymbol()); + } } else { if (useSplitDwarf()) TheCU.addSectionDelta( @@ -1398,9 +1415,8 @@ void DwarfDebug::endModule() { } // If we aren't actually generating debug info (check beginModule - - // conditionalized on !DisableDebugInfoPrinting and the presence of the - // llvm.dbg.cu metadata node) - if (!MMI->hasDebugInfo()) + // conditionalized on the presence of the llvm.dbg.cu metadata node) + if (!Asm || !MMI->hasDebugInfo()) return; // Finalize the debug info for the module. @@ -1532,7 +1548,8 @@ void DwarfDebug::collectVariableInfoFromMFTable( /// either open or otherwise rolls off the end of the scope. static bool validThroughout(LexicalScopes &LScopes, const MachineInstr *DbgValue, - const MachineInstr *RangeEnd) { + const MachineInstr *RangeEnd, + const InstructionOrdering &Ordering) { assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location"); auto MBB = DbgValue->getParent(); auto DL = DbgValue->getDebugLoc(); @@ -1544,34 +1561,30 @@ static bool validThroughout(LexicalScopes &LScopes, if (LSRange.size() == 0) return false; - - // Determine if the DBG_VALUE is valid at the beginning of its lexical block. const MachineInstr *LScopeBegin = LSRange.front().first; - // Early exit if the lexical scope begins outside of the current block. - if (LScopeBegin->getParent() != MBB) - return false; - - // If there are instructions belonging to our scope in another block, and - // we're not a constant (see DWARF2 comment below), then we can't be - // validThroughout. - const MachineInstr *LScopeEnd = LSRange.back().second; - if (RangeEnd && LScopeEnd->getParent() != MBB) - return false; - - MachineBasicBlock::const_reverse_iterator Pred(DbgValue); - for (++Pred; Pred != MBB->rend(); ++Pred) { - if (Pred->getFlag(MachineInstr::FrameSetup)) - break; - auto PredDL = Pred->getDebugLoc(); - if (!PredDL || Pred->isMetaInstruction()) - continue; - // Check whether the instruction preceding the DBG_VALUE is in the same - // (sub)scope as the DBG_VALUE. - if (DL->getScope() == PredDL->getScope()) - return false; - auto *PredScope = LScopes.findLexicalScope(PredDL); - if (!PredScope || LScope->dominates(PredScope)) + // If the scope starts before the DBG_VALUE then we may have a negative + // result. Otherwise the location is live coming into the scope and we + // can skip the following checks. + if (!Ordering.isBefore(DbgValue, LScopeBegin)) { + // Exit if the lexical scope begins outside of the current block. + if (LScopeBegin->getParent() != MBB) return false; + + MachineBasicBlock::const_reverse_iterator Pred(DbgValue); + for (++Pred; Pred != MBB->rend(); ++Pred) { + if (Pred->getFlag(MachineInstr::FrameSetup)) + break; + auto PredDL = Pred->getDebugLoc(); + if (!PredDL || Pred->isMetaInstruction()) + continue; + // Check whether the instruction preceding the DBG_VALUE is in the same + // (sub)scope as the DBG_VALUE. + if (DL->getScope() == PredDL->getScope()) + return false; + auto *PredScope = LScopes.findLexicalScope(PredDL); + if (!PredScope || LScope->dominates(PredScope)) + return false; + } } // If the range of the DBG_VALUE is open-ended, report success. @@ -1585,24 +1598,10 @@ static bool validThroughout(LexicalScopes &LScopes, if (DbgValue->getDebugOperand(0).isImm() && MBB->pred_empty()) return true; - // Now check for situations where an "open-ended" DBG_VALUE isn't enough to - // determine eligibility for a single location, e.g. nested scopes, inlined - // functions. - // FIXME: For now we just handle a simple (but common) case where the scope - // is contained in MBB. We could be smarter here. - // - // At this point we know that our scope ends in MBB. So, if RangeEnd exists - // outside of the block we can ignore it; the location is just leaking outside - // its scope. - assert(LScopeEnd->getParent() == MBB && "Scope ends outside MBB"); - if (RangeEnd->getParent() != DbgValue->getParent()) - return true; - - // The location range and variable's enclosing scope are both contained within - // MBB, test if location terminates before end of scope. - for (auto I = RangeEnd->getIterator(); I != MBB->end(); ++I) - if (&*I == LScopeEnd) - return false; + // Test if the location terminates before the end of the scope. + const MachineInstr *LScopeEnd = LSRange.back().second; + if (Ordering.isBefore(RangeEnd, LScopeEnd)) + return false; // There's a single location which starts at the scope start, and ends at or // after the scope end. @@ -1642,10 +1641,8 @@ static bool validThroughout(LexicalScopes &LScopes, // [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)] // [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)] // [4-) [(@g, fragment 0, 96)] -bool DwarfDebug::buildLocationList( - SmallVectorImpl<DebugLocEntry> &DebugLoc, - const DbgValueHistoryMap::Entries &Entries, - DenseSet<const MachineBasicBlock *> &VeryLargeBlocks) { +bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::Entries &Entries) { using OpenRange = std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>; SmallVector<OpenRange, 4> OpenRanges; @@ -1658,9 +1655,7 @@ bool DwarfDebug::buildLocationList( // Remove all values that are no longer live. size_t Index = std::distance(EB, EI); - auto Last = - remove_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; }); - OpenRanges.erase(Last, OpenRanges.end()); + erase_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; }); // If we are dealing with a clobbering entry, this iteration will result in // a location list entry starting after the clobbering instruction. @@ -1741,14 +1736,8 @@ bool DwarfDebug::buildLocationList( DebugLoc.pop_back(); } - // If there's a single entry, safe for a single location, and not part of - // an over-sized basic block, then ask validThroughout whether this - // location can be represented as a single variable location. - if (DebugLoc.size() != 1 || !isSafeForSingleLocation) - return false; - if (VeryLargeBlocks.count(StartDebugMI->getParent())) - return false; - return validThroughout(LScopes, StartDebugMI, EndMI); + return DebugLoc.size() == 1 && isSafeForSingleLocation && + validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering()); } DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, @@ -1780,13 +1769,6 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMFTable(TheCU, Processed); - // Identify blocks that are unreasonably sized, so that we can later - // skip lexical scope analysis over them. - DenseSet<const MachineBasicBlock *> VeryLargeBlocks; - for (const auto &MBB : *CurFn) - if (MBB.size() > LocationAnalysisSizeLimit) - VeryLargeBlocks.insert(&MBB); - for (const auto &I : DbgValues) { InlinedEntity IV = I.first; if (Processed.count(IV)) @@ -1823,8 +1805,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, if (HistSize == 1 || SingleValueWithClobber) { const auto *End = SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr; - if (VeryLargeBlocks.count(MInsn->getParent()) == 0 && - validThroughout(LScopes, MInsn, End)) { + if (validThroughout(LScopes, MInsn, End, getInstOrdering())) { RegVar->initializeDbgValue(MInsn); continue; } @@ -1839,8 +1820,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, // Build the location list for this variable. SmallVector<DebugLocEntry, 8> Entries; - bool isValidSingleLocation = - buildLocationList(Entries, HistoryMapEntries, VeryLargeBlocks); + bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries); // Check whether buildLocationList managed to merge all locations to one // that is valid throughout the variable's scope. If so, produce single @@ -1945,7 +1925,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } DebugHandlerBase::beginInstruction(MI); - assert(CurMI); + if (!CurMI) + return; if (NoDebug) return; @@ -2382,10 +2363,10 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, TheU = Skeleton; // Emit the header. - Asm->OutStreamer->AddComment("Length of Public " + Name + " Info"); MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); - Asm->emitLabelDifference(EndLabel, BeginLabel, 4); + Asm->emitDwarfUnitLength(EndLabel, BeginLabel, + "Length of Public " + Name + " Info"); Asm->OutStreamer->emitLabel(BeginLabel); @@ -2396,7 +2377,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, emitSectionReference(*TheU); Asm->OutStreamer->AddComment("Compilation Unit Length"); - Asm->emitInt32(TheU->getLength()); + Asm->emitDwarfLengthOrOffset(TheU->getLength()); // Emit the pubnames for this compilation unit. for (const auto &GI : Globals) { @@ -2404,7 +2385,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, const DIE *Entity = GI.second; Asm->OutStreamer->AddComment("DIE offset"); - Asm->emitInt32(Entity->getOffset()); + Asm->emitDwarfLengthOrOffset(Entity->getOffset()); if (GnuStyle) { dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); @@ -2419,7 +2400,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, } Asm->OutStreamer->AddComment("End Mark"); - Asm->emitInt32(0); + Asm->emitDwarfLengthOrOffset(0); Asm->OutStreamer->emitLabel(EndLabel); } @@ -2458,7 +2439,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, for (auto &Op : Expr) { assert(Op.getCode() != dwarf::DW_OP_const_type && "3 operand ops not yet supported"); - Streamer.EmitInt8(Op.getCode(), Comment != End ? *(Comment++) : ""); + Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : ""); Offset++; for (unsigned I = 0; I < 2; ++I) { if (Op.getDescription().Op[I] == Encoding::SizeNA) @@ -2474,7 +2455,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, Comment++; } else { for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J) - Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); + Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); } Offset = Op.getOperandEndOffset(I); } @@ -2511,10 +2492,26 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, TargetIndexLocation Loc = Value.getTargetIndexLocation(); // TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific // encoding is supported. + assert(AP.TM.getTargetTriple().isWasm()); DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset)); + DwarfExpr.addExpression(std::move(ExprCursor)); + return; } else if (Value.isConstantFP()) { - APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt(); - DwarfExpr.addUnsignedConstant(RawBytes); + if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() && + !ExprCursor) { + DwarfExpr.addConstantFP(Value.getConstantFP()->getValueAPF(), AP); + return; + } + if (Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth() <= + 64 /*bits*/) + DwarfExpr.addUnsignedConstant( + Value.getConstantFP()->getValueAPF().bitcastToAPInt()); + else + LLVM_DEBUG( + dbgs() + << "Skipped DwarfExpression creation for ConstantFP of size" + << Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth() + << " bits\n"); } DwarfExpr.addExpression(std::move(ExprCursor)); } @@ -2537,7 +2534,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, }) && "all values are expected to be fragments"); assert(llvm::is_sorted(Values) && "fragments are expected to be sorted"); - for (auto Fragment : Values) + for (const auto &Fragment : Values) DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr); } else { @@ -2580,7 +2577,8 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, Asm->OutStreamer->emitLabel(Holder.getRnglistsTableBaseSym()); for (const RangeSpanList &List : Holder.getRangeLists()) - Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), 4); + Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), + Asm->getDwarfOffsetByteSize()); return TableEnd; } @@ -2599,7 +2597,8 @@ static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm, Asm->OutStreamer->emitLabel(DebugLocs.getSym()); for (const auto &List : DebugLocs.getLists()) - Asm->emitLabelDifference(List.Label, DebugLocs.getSym(), 4); + Asm->emitLabelDifference(List.Label, DebugLocs.getSym(), + Asm->getDwarfOffsetByteSize()); return TableEnd; } @@ -2881,23 +2880,23 @@ void DwarfDebug::emitDebugARanges() { // Emit size of content not including length itself. unsigned ContentSize = - sizeof(int16_t) + // DWARF ARange version number - sizeof(int32_t) + // Offset of CU in the .debug_info section - sizeof(int8_t) + // Pointer Size (in bytes) - sizeof(int8_t); // Segment Size (in bytes) + sizeof(int16_t) + // DWARF ARange version number + Asm->getDwarfOffsetByteSize() + // Offset of CU in the .debug_info + // section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int8_t); // Segment Size (in bytes) unsigned TupleSize = PtrSize * 2; // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. - unsigned Padding = - offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize)); + unsigned Padding = offsetToAlignment( + Asm->getUnitLengthFieldByteSize() + ContentSize, Align(TupleSize)); ContentSize += Padding; ContentSize += (List.size() + 1) * TupleSize; // For each compile unit, write the list of spans it covers. - Asm->OutStreamer->AddComment("Length of ARange Set"); - Asm->emitInt32(ContentSize); + Asm->emitDwarfUnitLength(ContentSize, "Length of ARange Set"); Asm->OutStreamer->AddComment("DWARF Arange version number"); Asm->emitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer->AddComment("Offset Into Debug Info Section"); @@ -2983,25 +2982,30 @@ void DwarfDebug::emitDebugRangesDWO() { Asm->getObjFileLowering().getDwarfRnglistsDWOSection()); } -/// Emit the header of a DWARF 5 macro section. +/// Emit the header of a DWARF 5 macro section, or the GNU extension for +/// DWARF 4. static void emitMacroHeader(AsmPrinter *Asm, const DwarfDebug &DD, - const DwarfCompileUnit &CU) { + const DwarfCompileUnit &CU, uint16_t DwarfVersion) { enum HeaderFlagMask { #define HANDLE_MACRO_FLAG(ID, NAME) MACRO_FLAG_##NAME = ID, #include "llvm/BinaryFormat/Dwarf.def" }; - uint8_t Flags = 0; Asm->OutStreamer->AddComment("Macro information version"); - Asm->emitInt16(5); - // We are setting Offset and line offset flags unconditionally here, - // since we're only supporting DWARF32 and line offset should be mostly - // present. - // FIXME: Add support for DWARF64. - Flags |= MACRO_FLAG_DEBUG_LINE_OFFSET; - Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present"); - Asm->emitInt8(Flags); + Asm->emitInt16(DwarfVersion >= 5 ? DwarfVersion : 4); + // We emit the line offset flag unconditionally here, since line offset should + // be mostly present. + if (Asm->isDwarf64()) { + Asm->OutStreamer->AddComment("Flags: 64 bit, debug_line_offset present"); + Asm->emitInt8(MACRO_FLAG_OFFSET_SIZE | MACRO_FLAG_DEBUG_LINE_OFFSET); + } else { + Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present"); + Asm->emitInt8(MACRO_FLAG_DEBUG_LINE_OFFSET); + } Asm->OutStreamer->AddComment("debug_line_offset"); - Asm->OutStreamer->emitSymbolValue(CU.getLineTableStartSym(), /*Size=*/4); + if (DD.useSplitDwarf()) + Asm->emitDwarfLengthOrOffset(0); + else + Asm->emitDwarfSymbolReference(CU.getLineTableStartSym()); } void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { @@ -3018,55 +3022,63 @@ void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { void DwarfDebug::emitMacro(DIMacro &M) { StringRef Name = M.getName(); StringRef Value = M.getValue(); - bool UseMacro = getDwarfVersion() >= 5; - - if (UseMacro) { - unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define - ? dwarf::DW_MACRO_define_strx - : dwarf::DW_MACRO_undef_strx; - Asm->OutStreamer->AddComment(dwarf::MacroString(Type)); - Asm->emitULEB128(Type); - Asm->OutStreamer->AddComment("Line Number"); - Asm->emitULEB128(M.getLine()); - Asm->OutStreamer->AddComment("Macro String"); - if (!Value.empty()) - Asm->emitULEB128(this->InfoHolder.getStringPool() - .getIndexedEntry(*Asm, (Name + " " + Value).str()) - .getIndex()); - else - // DW_MACRO_undef_strx doesn't have a value, so just emit the macro - // string. - Asm->emitULEB128(this->InfoHolder.getStringPool() - .getIndexedEntry(*Asm, (Name).str()) - .getIndex()); + + // There should be one space between the macro name and the macro value in + // define entries. In undef entries, only the macro name is emitted. + std::string Str = Value.empty() ? Name.str() : (Name + " " + Value).str(); + + if (UseDebugMacroSection) { + if (getDwarfVersion() >= 5) { + unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define + ? dwarf::DW_MACRO_define_strx + : dwarf::DW_MACRO_undef_strx; + Asm->OutStreamer->AddComment(dwarf::MacroString(Type)); + Asm->emitULEB128(Type); + Asm->OutStreamer->AddComment("Line Number"); + Asm->emitULEB128(M.getLine()); + Asm->OutStreamer->AddComment("Macro String"); + Asm->emitULEB128( + InfoHolder.getStringPool().getIndexedEntry(*Asm, Str).getIndex()); + } else { + unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define + ? dwarf::DW_MACRO_GNU_define_indirect + : dwarf::DW_MACRO_GNU_undef_indirect; + Asm->OutStreamer->AddComment(dwarf::GnuMacroString(Type)); + Asm->emitULEB128(Type); + Asm->OutStreamer->AddComment("Line Number"); + Asm->emitULEB128(M.getLine()); + Asm->OutStreamer->AddComment("Macro String"); + Asm->emitDwarfSymbolReference( + InfoHolder.getStringPool().getEntry(*Asm, Str).getSymbol()); + } } else { Asm->OutStreamer->AddComment(dwarf::MacinfoString(M.getMacinfoType())); Asm->emitULEB128(M.getMacinfoType()); Asm->OutStreamer->AddComment("Line Number"); Asm->emitULEB128(M.getLine()); Asm->OutStreamer->AddComment("Macro String"); - Asm->OutStreamer->emitBytes(Name); - if (!Value.empty()) { - // There should be one space between macro name and macro value. - Asm->emitInt8(' '); - Asm->OutStreamer->AddComment("Macro Value="); - Asm->OutStreamer->emitBytes(Value); - } + Asm->OutStreamer->emitBytes(Str); Asm->emitInt8('\0'); } } void DwarfDebug::emitMacroFileImpl( - DIMacroFile &F, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile, + DIMacroFile &MF, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile, StringRef (*MacroFormToString)(unsigned Form)) { Asm->OutStreamer->AddComment(MacroFormToString(StartFile)); Asm->emitULEB128(StartFile); Asm->OutStreamer->AddComment("Line Number"); - Asm->emitULEB128(F.getLine()); + Asm->emitULEB128(MF.getLine()); Asm->OutStreamer->AddComment("File Number"); - Asm->emitULEB128(U.getOrCreateSourceID(F.getFile())); - handleMacroNodes(F.getElements(), U); + DIFile &F = *MF.getFile(); + if (useSplitDwarf()) + Asm->emitULEB128(getDwoLineTable(U)->getFile( + F.getDirectory(), F.getFilename(), getMD5AsBytes(&F), + Asm->OutContext.getDwarfVersion(), F.getSource())); + else + Asm->emitULEB128(U.getOrCreateSourceID(&F)); + handleMacroNodes(MF.getElements(), U); Asm->OutStreamer->AddComment(MacroFormToString(EndFile)); Asm->emitULEB128(EndFile); } @@ -3075,10 +3087,10 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { // DWARFv5 macro and DWARFv4 macinfo share some common encodings, // so for readibility/uniformity, We are explicitly emitting those. assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file); - bool UseMacro = getDwarfVersion() >= 5; - if (UseMacro) - emitMacroFileImpl(F, U, dwarf::DW_MACRO_start_file, - dwarf::DW_MACRO_end_file, dwarf::MacroString); + if (UseDebugMacroSection) + emitMacroFileImpl( + F, U, dwarf::DW_MACRO_start_file, dwarf::DW_MACRO_end_file, + (getDwarfVersion() >= 5) ? dwarf::MacroString : dwarf::GnuMacroString); else emitMacroFileImpl(F, U, dwarf::DW_MACINFO_start_file, dwarf::DW_MACINFO_end_file, dwarf::MacinfoString); @@ -3095,8 +3107,8 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) { continue; Asm->OutStreamer->SwitchSection(Section); Asm->OutStreamer->emitLabel(U.getMacroLabelBegin()); - if (getDwarfVersion() >= 5) - emitMacroHeader(Asm, *this, U); + if (UseDebugMacroSection) + emitMacroHeader(Asm, *this, U, getDwarfVersion()); handleMacroNodes(Macros, U); Asm->OutStreamer->AddComment("End Of Macro List Mark"); Asm->emitInt8(0); @@ -3106,14 +3118,14 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) { /// Emit macros into a debug macinfo/macro section. void DwarfDebug::emitDebugMacinfo() { auto &ObjLower = Asm->getObjFileLowering(); - emitDebugMacinfoImpl(getDwarfVersion() >= 5 + emitDebugMacinfoImpl(UseDebugMacroSection ? ObjLower.getDwarfMacroSection() : ObjLower.getDwarfMacinfoSection()); } void DwarfDebug::emitDebugMacinfoDWO() { auto &ObjLower = Asm->getObjFileLowering(); - emitDebugMacinfoImpl(getDwarfVersion() >= 5 + emitDebugMacinfoImpl(UseDebugMacroSection ? ObjLower.getDwarfMacroDWOSection() : ObjLower.getDwarfMacinfoDWOSection()); } @@ -3200,7 +3212,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { const DICompileUnit *DIUnit = CU.getCUNode(); SplitTypeUnitFileTable.maybeSetRootFile( DIUnit->getDirectory(), DIUnit->getFilename(), - CU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource()); + getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource()); return &SplitTypeUnitFileTable; } @@ -3303,14 +3315,14 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) : DD(DD), - TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) { + TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) { DD->TypeUnitsUnderConstruction.clear(); - assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed()); + DD->AddrPool.resetUsedFlag(); } DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); - DD->AddrPool.resetUsedFlag(); + DD->AddrPool.resetUsedFlag(AddrPoolUsed); } DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { @@ -3375,6 +3387,15 @@ uint16_t DwarfDebug::getDwarfVersion() const { return Asm->OutStreamer->getContext().getDwarfVersion(); } +dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const { + if (Asm->getDwarfVersion() >= 4) + return dwarf::Form::DW_FORM_sec_offset; + assert((!Asm->isDwarf64() || (Asm->getDwarfVersion() == 3)) && + "DWARF64 is not defined prior DWARFv3"); + return Asm->isDwarf64() ? dwarf::Form::DW_FORM_data8 + : dwarf::Form::DW_FORM_data4; +} + const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) { return SectionLabels.find(S)->second; } @@ -3383,3 +3404,20 @@ void DwarfDebug::insertSectionLabel(const MCSymbol *S) { if (useSplitDwarf() || getDwarfVersion() >= 5) AddrPool.getIndex(S); } + +Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const { + assert(File); + if (getDwarfVersion() < 5) + return None; + Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum(); + if (!Checksum || Checksum->Kind != DIFile::CSK_MD5) + return None; + + // Convert the string checksum to an MD5Result for the streamer. + // The verifier validates the checksum so we assume it's okay. + // An MD5 checksum is 16 bytes. + std::string ChecksumString = fromHex(Checksum->Value); + MD5::MD5Result CKMem; + std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data()); + return CKMem; +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index ad2f2f3edd8e..df19ef458888 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -114,7 +114,7 @@ public: /// /// Variables that have been optimized out use none of these fields. class DbgVariable : public DbgEntity { - /// Offset in DebugLocs. + /// Index of the entry list in DebugLocs. unsigned DebugLocListIndex = ~0u; /// DW_OP_LLVM_tag_offset value from DebugLocs. Optional<uint8_t> DebugLocListTagOffset; @@ -372,6 +372,23 @@ class DwarfDebug : public DebugHandlerBase { /// Generate DWARF v4 type units. bool GenerateTypeUnits; + /// Emit a .debug_macro section instead of .debug_macinfo. + bool UseDebugMacroSection; + + /// Avoid using DW_OP_convert due to consumer incompatibilities. + bool EnableOpConvert; + +public: + enum class MinimizeAddrInV5 { + Default, + Disabled, + Ranges, + }; + +private: + /// Force the use of DW_AT_ranges even for single-entry range lists. + MinimizeAddrInV5 MinimizeAddr = MinimizeAddrInV5::Disabled; + /// DWARF5 Experimental Options /// @{ AccelTableKind TheAccelTableKind; @@ -409,6 +426,9 @@ class DwarfDebug : public DebugHandlerBase { bool SingleCU; bool IsDarwin; + /// Map for tracking Fortran deferred CHARACTER lengths. + DenseMap<const DIStringType *, unsigned> StringTypeLocMap; + AddressPool AddrPool; /// Accelerator tables. @@ -592,10 +612,8 @@ class DwarfDebug : public DebugHandlerBase { /// function that describe the same variable. If the resulting /// list has only one entry that is valid for entire variable's /// scope return true. - bool buildLocationList( - SmallVectorImpl<DebugLocEntry> &DebugLoc, - const DbgValueHistoryMap::Entries &Entries, - DenseSet<const MachineBasicBlock *> &VeryLargeBlocks); + bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::Entries &Entries); /// Collect variable information from the side table maintained by MF. void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU, @@ -617,13 +635,13 @@ public: //===--------------------------------------------------------------------===// // Main entry points. // - DwarfDebug(AsmPrinter *A, Module *M); + DwarfDebug(AsmPrinter *A); ~DwarfDebug() override; /// Emit all Dwarf sections that should come prior to the /// content. - void beginModule(); + void beginModule(Module *M) override; /// Emit all Dwarf sections that should come after the content. void endModule() override; @@ -645,6 +663,7 @@ public: class NonTypeUnitContext { DwarfDebug *DD; decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; + bool AddrPoolUsed; friend class DwarfDebug; NonTypeUnitContext(DwarfDebug *DD); public: @@ -681,6 +700,12 @@ public: /// Returns whether ranges section should be emitted. bool useRangesSection() const { return UseRangesSection; } + /// Returns whether range encodings should be used for single entry range + /// lists. + bool alwaysUseRanges() const { + return MinimizeAddr == MinimizeAddrInV5::Ranges; + } + /// Returns whether to use sections as labels rather than temp symbols. bool useSectionsAsReferences() const { return UseSectionsAsReferences; @@ -719,11 +744,21 @@ public: return EmitDebugEntryValues; } + bool useOpConvert() const { + return EnableOpConvert; + } + bool shareAcrossDWOCUs() const; /// Returns the Dwarf Version. uint16_t getDwarfVersion() const; + /// Returns a suitable DWARF form to represent a section offset, i.e. + /// * DW_FORM_sec_offset for DWARF version >= 4; + /// * DW_FORM_data8 for 64-bit DWARFv3; + /// * DW_FORM_data4 for 32-bit DWARFv3 and DWARFv2. + dwarf::Form getDwarfSectionOffsetForm() const; + /// Returns the previous CU that was being updated const DwarfCompileUnit *getPrevCU() const { return PrevCU; } void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } @@ -768,6 +803,16 @@ public: return CUDieMap.lookup(Die); } + unsigned getStringTypeLoc(const DIStringType *ST) const { + return StringTypeLocMap.lookup(ST); + } + + void addStringTypeLoc(const DIStringType *ST, unsigned Loc) { + assert(ST); + if (Loc) + StringTypeLocMap[ST] = Loc; + } + /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. /// /// Returns whether we are "tuning" for a given debugger. @@ -777,13 +822,16 @@ public: bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } /// @} - void addSectionLabel(const MCSymbol *Sym); const MCSymbol *getSectionLabel(const MCSection *S); void insertSectionLabel(const MCSymbol *S); static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, const DbgValueLoc &Value, DwarfExpression &DwarfExpr); + + /// If the \p File has an MD5 checksum, return it as an MD5Result + /// allocated in the MCContext. + Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const; }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index c2956380438f..b19b4365383f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -92,6 +92,20 @@ public: /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; }; + +class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase { + /// This is AIX's compat unwind section, which unwinder would use + /// to find the location of LSDA area and personality rountine. + void emitExceptionInfoTable(const MCSymbol *LSDA, const MCSymbol *PerSym); + +public: + AIXException(AsmPrinter *A); + + void endModule() override {} + void beginFunction(const MachineFunction *MF) override {} + + void endFunction(const MachineFunction *MF) override; +}; } // End of namespace llvm #endif diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index d4762121d105..59ad7646ce1c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -17,14 +17,14 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include <algorithm> -#include <cassert> -#include <cstdint> using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + void DwarfExpression::emitConstu(uint64_t Value) { if (Value < 32) emitOp(dwarf::DW_OP_lit0 + Value); @@ -97,7 +97,8 @@ void DwarfExpression::addAnd(unsigned Mask) { } bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, - unsigned MachineReg, unsigned MaxSize) { + llvm::Register MachineReg, + unsigned MaxSize) { if (!llvm::Register::isPhysicalRegister(MachineReg)) { if (isFrameRegister(TRI, MachineReg)) { DwarfRegs.push_back(Register::createRegister(-1, nullptr)); @@ -219,9 +220,36 @@ void DwarfExpression::addUnsignedConstant(const APInt &Value) { } } +void DwarfExpression::addConstantFP(const APFloat &APF, const AsmPrinter &AP) { + assert(isImplicitLocation() || isUnknownLocation()); + APInt API = APF.bitcastToAPInt(); + int NumBytes = API.getBitWidth() / 8; + if (NumBytes == 4 /*float*/ || NumBytes == 8 /*double*/) { + // FIXME: Add support for `long double`. + emitOp(dwarf::DW_OP_implicit_value); + emitUnsigned(NumBytes /*Size of the block in bytes*/); + + // The loop below is emitting the value starting at least significant byte, + // so we need to perform a byte-swap to get the byte order correct in case + // of a big-endian target. + if (AP.getDataLayout().isBigEndian()) + API = API.byteSwap(); + + for (int i = 0; i < NumBytes; ++i) { + emitData1(API.getZExtValue() & 0xFF); + API = API.lshr(8); + } + + return; + } + LLVM_DEBUG( + dbgs() << "Skipped DW_OP_implicit_value creation for ConstantFP of size: " + << API.getBitWidth() << " bits\n"); +} + bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, DIExpressionCursor &ExprCursor, - unsigned MachineReg, + llvm::Register MachineReg, unsigned FragmentOffsetInBits) { auto Fragment = ExprCursor.getFragmentInfo(); if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) { @@ -498,6 +526,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, case dwarf::DW_OP_not: case dwarf::DW_OP_dup: case dwarf::DW_OP_push_object_address: + case dwarf::DW_OP_over: emitOp(OpNum); break; case dwarf::DW_OP_deref: @@ -513,10 +542,15 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, assert(!isRegisterLocation()); emitConstu(Op->getArg(0)); break; + case dwarf::DW_OP_consts: + assert(!isRegisterLocation()); + emitOp(dwarf::DW_OP_consts); + emitSigned(Op->getArg(0)); + break; case dwarf::DW_OP_LLVM_convert: { unsigned BitSize = Op->getArg(0); dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1)); - if (DwarfVersion >= 5) { + if (DwarfVersion >= 5 && CU.getDwarfDebug().useOpConvert()) { emitOp(dwarf::DW_OP_convert); // If targeting a location-list; simply emit the index into the raw // byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 757b17511453..8fca9f5a630b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -218,7 +218,7 @@ protected: /// Return whether the given machine register is the frame register in the /// current function. virtual bool isFrameRegister(const TargetRegisterInfo &TRI, - unsigned MachineReg) = 0; + llvm::Register MachineReg) = 0; /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF /// register location description. @@ -245,7 +245,7 @@ protected: /// multiple subregisters that alias the register. /// /// \return false if no DWARF register exists for MachineReg. - bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg, + bool addMachineReg(const TargetRegisterInfo &TRI, llvm::Register MachineReg, unsigned MaxSize = ~1U); /// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment. @@ -299,6 +299,9 @@ public: /// Emit an unsigned constant. void addUnsignedConstant(const APInt &Value); + /// Emit an floating point constant. + void addConstantFP(const APFloat &Value, const AsmPrinter &AP); + /// Lock this down to become a memory location description. void setMemoryLocationKind() { assert(isUnknownLocation()); @@ -322,7 +325,8 @@ public: /// \return false if no DWARF register exists /// for MachineReg. bool addMachineRegExpression(const TargetRegisterInfo &TRI, - DIExpressionCursor &Expr, unsigned MachineReg, + DIExpressionCursor &Expr, + llvm::Register MachineReg, unsigned FragmentOffsetInBits = 0); /// Begin emission of an entry value dwarf operation. The entry value's @@ -385,7 +389,7 @@ class DebugLocDwarfExpression final : public DwarfExpression { void commitTemporaryBuffer() override; bool isFrameRegister(const TargetRegisterInfo &TRI, - unsigned MachineReg) override; + llvm::Register MachineReg) override; public: DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS, @@ -415,7 +419,7 @@ class DIEDwarfExpression final : public DwarfExpression { void commitTemporaryBuffer() override; bool isFrameRegister(const TargetRegisterInfo &TRI, - unsigned MachineReg) override; + llvm::Register MachineReg) override; public: DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 812e6383288f..838e1c9a10be 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -10,10 +10,9 @@ #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "DwarfUnit.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/DIE.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Metadata.h" #include "llvm/MC/MCStreamer.h" #include <algorithm> #include <cstdint> @@ -59,7 +58,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { // Compute the size and offset for each DIE. void DwarfFile::computeSizeAndOffsets() { // Offset from the first CU in the debug info section is 0 initially. - unsigned SecOffset = 0; + uint64_t SecOffset = 0; // Iterate over each compile unit and set the size and offsets for each // DIE within each compile unit. All offsets are CU relative. @@ -75,12 +74,15 @@ void DwarfFile::computeSizeAndOffsets() { TheU->setDebugSectionOffset(SecOffset); SecOffset += computeSizeAndOffsetsForUnit(TheU.get()); } + if (SecOffset > UINT32_MAX && !Asm->isDwarf64()) + report_fatal_error("The generated debug information is too large " + "for the 32-bit DWARF format."); } unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) { // CU-relative offset is reset to 0 here. - unsigned Offset = sizeof(int32_t) + // Length of Unit Info - TheU->getHeaderSize(); // Unit-specific headers + unsigned Offset = Asm->getUnitLengthFieldByteSize() + // Length of Unit Info + TheU->getHeaderSize(); // Unit-specific headers // The return value here is CU-relative, after laying out // all of the CU DIE. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index cf293d7534d0..79a6ce7801b7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -14,7 +14,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DIE.h" -#include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" #include <map> #include <memory> @@ -26,10 +25,12 @@ class AsmPrinter; class DbgEntity; class DbgVariable; class DbgLabel; +class DINode; class DwarfCompileUnit; class DwarfUnit; class LexicalScope; class MCSection; +class MDNode; // Data structure to hold a range for range lists. struct RangeSpan { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index a43929d8e8f7..a876f8ccace9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -8,7 +8,6 @@ #include "DwarfStringPool.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCAsmInfo.h" @@ -33,7 +32,6 @@ DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) { Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr; NumBytes += Str.size() + 1; - assert(NumBytes > Entry.Offset && "Unexpected overflow"); } return *I.first; } @@ -58,13 +56,13 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, if (getNumIndexedStrings() == 0) return; Asm.OutStreamer->SwitchSection(Section); - unsigned EntrySize = 4; - // FIXME: DWARF64 + unsigned EntrySize = Asm.getDwarfOffsetByteSize(); // We are emitting the header for a contribution to the string offsets // table. The header consists of an entry with the contribution's // size (not including the size of the length field), the DWARF version and // 2 bytes of padding. - Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4); + Asm.emitDwarfUnitLength(getNumIndexedStrings() * EntrySize + 4, + "Length of String Offsets Set"); Asm.emitInt16(Asm.getDwarfVersion()); Asm.emitInt16(0); // Define the symbol that marks the start of the contribution. It is @@ -120,7 +118,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, } Asm.OutStreamer->SwitchSection(OffsetSection); - unsigned size = 4; // FIXME: DWARF64 is 8. + unsigned size = Asm.getDwarfOffsetByteSize(); for (const auto &Entry : Entries) if (UseRelativeOffsets) Asm.emitDwarfStringOffset(Entry->getValue()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h index c5f5637fdae3..79b5df89e338 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -28,7 +28,7 @@ class DwarfStringPool { StringMap<EntryTy, BumpPtrAllocator &> Pool; StringRef Prefix; - unsigned NumBytes = 0; + uint64_t NumBytes = 0; unsigned NumIndexedStrings = 0; bool ShouldCreateSymbols; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index e958f38e486b..118b5fcc3bf6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -13,7 +13,6 @@ #include "DwarfUnit.h" #include "AddressPool.h" #include "DwarfCompileUnit.h" -#include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -84,15 +83,14 @@ unsigned DIEDwarfExpression::getTemporaryBufferSize() { void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); } bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, - unsigned MachineReg) { + llvm::Register MachineReg) { return MachineReg == TRI.getFrameRegister(*AP.MF); } DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) - : DIEUnit(A->getDwarfVersion(), A->MAI->getCodePointerSize(), UnitTag), - CUNode(Node), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) { -} + : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU), + IndexTyDie(nullptr) {} DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, @@ -301,27 +299,7 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer) { - if (DD->getDwarfVersion() >= 4) - addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); - else - addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); -} - -Optional<MD5::MD5Result> DwarfUnit::getMD5AsBytes(const DIFile *File) const { - assert(File); - if (DD->getDwarfVersion() < 5) - return None; - Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum(); - if (!Checksum || Checksum->Kind != DIFile::CSK_MD5) - return None; - - // Convert the string checksum to an MD5Result for the streamer. - // The verifier validates the checksum so we assume it's okay. - // An MD5 checksum is 16 bytes. - std::string ChecksumString = fromHex(Checksum->Value); - MD5::MD5Result CKMem; - std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data()); - return CKMem; + addUInt(Die, Attribute, DD->getDwarfSectionOffsetForm(), Integer); } unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { @@ -332,10 +310,9 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { // This is a split type unit that needs a line table. addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0); } - return SplitLineTable->getFile(File->getDirectory(), File->getFilename(), - getMD5AsBytes(File), - Asm->OutContext.getDwarfVersion(), - File->getSource()); + return SplitLineTable->getFile( + File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File), + Asm->OutContext.getDwarfVersion(), File->getSource()); } void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { @@ -353,7 +330,7 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { } addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Die, dwarf::DW_FORM_udata, Sym); + addLabel(Die, dwarf::DW_FORM_addr, Sym); } void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, @@ -457,77 +434,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { addSourceLine(Die, Ty->getLine(), Ty->getFile()); } -/// Return true if type encoding is unsigned. -static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { - if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { - // FIXME: Enums without a fixed underlying type have unknown signedness - // here, leading to incorrectly emitted constants. - if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) - return false; - - // (Pieces of) aggregate types that get hacked apart by SROA may be - // represented by a constant. Encode them as unsigned bytes. - return true; - } - - if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { - dwarf::Tag T = (dwarf::Tag)Ty->getTag(); - // Encode pointer constants as unsigned bytes. This is used at least for - // null pointer constant emission. - // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed - // here, but accept them for now due to a bug in SROA producing bogus - // dbg.values. - if (T == dwarf::DW_TAG_pointer_type || - T == dwarf::DW_TAG_ptr_to_member_type || - T == dwarf::DW_TAG_reference_type || - T == dwarf::DW_TAG_rvalue_reference_type) - return true; - assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || - T == dwarf::DW_TAG_volatile_type || - T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type); - assert(DTy->getBaseType() && "Expected valid base type"); - return isUnsignedDIType(DD, DTy->getBaseType()); - } - - auto *BTy = cast<DIBasicType>(Ty); - unsigned Encoding = BTy->getEncoding(); - assert((Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char || - Encoding == dwarf::DW_ATE_signed || - Encoding == dwarf::DW_ATE_signed_char || - Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || - Encoding == dwarf::DW_ATE_boolean || - (Ty->getTag() == dwarf::DW_TAG_unspecified_type && - Ty->getName() == "decltype(nullptr)")) && - "Unsupported encoding"); - return Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char || - Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || - Ty->getTag() == dwarf::DW_TAG_unspecified_type; -} - -void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) { - assert(MO.isFPImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock; - APFloat FPImm = MO.getFPImm()->getValueAPF(); - - // Get the raw data form of the floating point. - const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char *)FltVal.getRawData(); - - int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getDataLayout().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(*Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); - - addBlock(Die, dwarf::DW_AT_const_value, Block); -} - void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) { // Pass this down to addConstantValue as an unsigned bag of bits. addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); @@ -538,15 +444,8 @@ void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, addConstantValue(Die, CI->getValue(), Ty); } -void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO, - const DIType *Ty) { - assert(MO.isImm() && "Invalid machine operand!"); - - addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm()); -} - void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) { - addConstantValue(Die, isUnsignedDIType(DD, Ty), Val); + addConstantValue(Die, DD->isUnsignedDIType(Ty), Val); } void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) { @@ -557,7 +456,7 @@ void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) { } void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) { - addConstantValue(Die, Val, isUnsignedDIType(DD, Ty)); + addConstantValue(Die, Val, DD->isUnsignedDIType(Ty)); } void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { @@ -654,6 +553,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, if (auto *BT = dyn_cast<DIBasicType>(Ty)) constructTypeDIE(TyDIE, BT); + else if (auto *ST = dyn_cast<DIStringType>(Ty)) + constructTypeDIE(TyDIE, ST); else if (auto *STy = dyn_cast<DISubroutineType>(Ty)) constructTypeDIE(TyDIE, STy); else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { @@ -772,8 +673,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) { if (BTy->getTag() == dwarf::DW_TAG_unspecified_type) return; - addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy->getEncoding()); + if (BTy->getTag() != dwarf::DW_TAG_string_type) + addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy->getEncoding()); uint64_t Size = BTy->getSizeInBits() >> 3; addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); @@ -784,6 +686,37 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) { addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little); } +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) { + // Get core information. + StringRef Name = STy->getName(); + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(Buffer, dwarf::DW_AT_name, Name); + + if (DIVariable *Var = STy->getStringLength()) { + if (auto *VarDIE = getDIE(Var)) + addDIEEntry(Buffer, dwarf::DW_AT_string_length, *VarDIE); + } else if (DIExpression *Expr = STy->getStringLengthExp()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + // This is to describe the memory location of the + // length of a Fortran deferred length string, so + // lock it down as such. + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(Expr); + addBlock(Buffer, dwarf::DW_AT_string_length, DwarfExpr.finalize()); + } else { + uint64_t Size = STy->getSizeInBits() >> 3; + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + } + + if (STy->getEncoding()) { + // For eventual Unicode support. + addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + STy->getEncoding()); + } +} + void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { // Get core information. StringRef Name = DTy->getName(); @@ -910,6 +843,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { } } + // Add template parameters to a class, structure or union types. + if (Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + addTemplateParams(Buffer, CTy->getTemplateParams()); + // Add elements to structure type. DINodeArray Elements = CTy->getElements(); for (const auto *Element : Elements) { @@ -929,7 +867,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer); if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) { - if (isUnsignedDIType(DD, Discriminator->getBaseType())) + if (DD->isUnsignedDIType(Discriminator->getBaseType())) addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue()); else addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue()); @@ -979,12 +917,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (CTy->isObjcClassComplete()) addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type); - // Add template parameters to a class, structure or union types. - // FIXME: The support isn't in the metadata for this yet. - if (Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - addTemplateParams(Buffer, CTy->getTemplateParams()); - // Add the type's non-standard calling convention. uint8_t CC = 0; if (CTy->isTypePassByValue()) @@ -1008,8 +940,10 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) + // Ignore the size if it's a non-enum forward decl. // TODO: Do we care about size for enum forward declarations? - if (Size) + if (Size && + (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type)) addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); else if (!CTy->isForwardDecl()) // Add zero size if it is not a forward declaration. @@ -1133,6 +1067,8 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) { getOrCreateSourceID(M->getFile())); if (M->getLineNo()) addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo()); + if (M->getIsDecl()) + addFlag(MDie, dwarf::DW_AT_declaration); return &MDie; } @@ -1354,7 +1290,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>()) Count = CI->getSExtValue(); - auto addBoundTypeEntry = [&](dwarf::Attribute Attr, + auto AddBoundTypeEntry = [&](dwarf::Attribute Attr, DISubrange::BoundType Bound) -> void { if (auto *BV = Bound.dyn_cast<DIVariable *>()) { if (auto *VarDIE = getDIE(BV)) @@ -1372,7 +1308,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, } }; - addBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound()); + AddBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound()); if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) { if (auto *CountVarDIE = getDIE(CV)) @@ -1380,9 +1316,45 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, } else if (Count != -1) addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count); - addBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound()); + AddBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound()); - addBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride()); + AddBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride()); +} + +void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer, + const DIGenericSubrange *GSR, + DIE *IndexTy) { + DIE &DwGenericSubrange = + createAndAddDIE(dwarf::DW_TAG_generic_subrange, Buffer); + addDIEEntry(DwGenericSubrange, dwarf::DW_AT_type, *IndexTy); + + int64_t DefaultLowerBound = getDefaultLowerBound(); + + auto AddBoundTypeEntry = [&](dwarf::Attribute Attr, + DIGenericSubrange::BoundType Bound) -> void { + if (auto *BV = Bound.dyn_cast<DIVariable *>()) { + if (auto *VarDIE = getDIE(BV)) + addDIEEntry(DwGenericSubrange, Attr, *VarDIE); + } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) { + if (BE->isSignedConstant()) { + if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 || + static_cast<int64_t>(BE->getElement(1)) != DefaultLowerBound) + addSInt(DwGenericSubrange, Attr, dwarf::DW_FORM_sdata, + BE->getElement(1)); + } else { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(BE); + addBlock(DwGenericSubrange, Attr, DwarfExpr.finalize()); + } + } + }; + + AddBoundTypeEntry(dwarf::DW_AT_lower_bound, GSR->getLowerBound()); + AddBoundTypeEntry(dwarf::DW_AT_count, GSR->getCount()); + AddBoundTypeEntry(dwarf::DW_AT_upper_bound, GSR->getUpperBound()); + AddBoundTypeEntry(dwarf::DW_AT_byte_stride, GSR->getStride()); } DIE *DwarfUnit::getIndexTyDie() { @@ -1417,8 +1389,10 @@ static bool hasVectorBeenPadded(const DICompositeType *CTy) { Elements[0]->getTag() == dwarf::DW_TAG_subrange_type && "Invalid vector element array, expected one element of type subrange"); const auto Subrange = cast<DISubrange>(Elements[0]); - const auto CI = Subrange->getCount().get<ConstantInt *>(); - const int32_t NumVecElements = CI->getSExtValue(); + const auto NumVecElements = + Subrange->getCount() + ? Subrange->getCount().get<ConstantInt *>()->getSExtValue() + : 0; // Ensure we found the element count and that the actual size is wide // enough to contain the requested size. @@ -1445,6 +1419,39 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize()); } + if (DIVariable *Var = CTy->getAssociated()) { + if (auto *VarDIE = getDIE(Var)) + addDIEEntry(Buffer, dwarf::DW_AT_associated, *VarDIE); + } else if (DIExpression *Expr = CTy->getAssociatedExp()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(Expr); + addBlock(Buffer, dwarf::DW_AT_associated, DwarfExpr.finalize()); + } + + if (DIVariable *Var = CTy->getAllocated()) { + if (auto *VarDIE = getDIE(Var)) + addDIEEntry(Buffer, dwarf::DW_AT_allocated, *VarDIE); + } else if (DIExpression *Expr = CTy->getAllocatedExp()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(Expr); + addBlock(Buffer, dwarf::DW_AT_allocated, DwarfExpr.finalize()); + } + + if (auto *RankConst = CTy->getRankConst()) { + addSInt(Buffer, dwarf::DW_AT_rank, dwarf::DW_FORM_sdata, + RankConst->getSExtValue()); + } else if (auto *RankExpr = CTy->getRankExp()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(RankExpr); + addBlock(Buffer, dwarf::DW_AT_rank, DwarfExpr.finalize()); + } + // Emit the element type. addType(Buffer, CTy->getBaseType()); @@ -1457,15 +1464,19 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { DINodeArray Elements = CTy->getElements(); for (unsigned i = 0, N = Elements.size(); i < N; ++i) { // FIXME: Should this really be such a loose cast? - if (auto *Element = dyn_cast_or_null<DINode>(Elements[i])) + if (auto *Element = dyn_cast_or_null<DINode>(Elements[i])) { if (Element->getTag() == dwarf::DW_TAG_subrange_type) constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy); + else if (Element->getTag() == dwarf::DW_TAG_generic_subrange) + constructGenericSubrangeDIE(Buffer, cast<DIGenericSubrange>(Element), + IdxTy); + } } } void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { const DIType *DTy = CTy->getBaseType(); - bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy); + bool IsUnsigned = DTy && DD->isUnsignedDIType(DTy); if (DTy) { if (DD->getDwarfVersion() >= 3) addType(Buffer, DTy); @@ -1664,15 +1675,15 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { // Emit size of content not including length itself - Asm->OutStreamer->AddComment("Length of Unit"); if (!DD->useSectionsAsReferences()) { StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_"; MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start"); EndLabel = Asm->createTempSymbol(Prefix + "end"); - Asm->emitLabelDifference(EndLabel, BeginLabel, 4); + Asm->emitDwarfUnitLength(EndLabel, BeginLabel, "Length of Unit"); Asm->OutStreamer->emitLabel(BeginLabel); } else - Asm->emitInt32(getHeaderSize() + getUnitDie().getSize()); + Asm->emitDwarfUnitLength(getHeaderSize() + getUnitDie().getSize(), + "Length of Unit"); Asm->OutStreamer->AddComment("DWARF version number"); unsigned Version = DD->getDwarfVersion(); @@ -1692,7 +1703,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { Asm->OutStreamer->AddComment("Offset Into Abbrev. Section"); const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); if (UseOffsets) - Asm->emitInt32(0); + Asm->emitDwarfLengthOrOffset(0); else Asm->emitDwarfSymbolReference( TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false); @@ -1711,16 +1722,14 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) { Asm->OutStreamer->emitIntValue(TypeSignature, sizeof(TypeSignature)); Asm->OutStreamer->AddComment("Type DIE Offset"); // In a skeleton type unit there is no type DIE so emit a zero offset. - Asm->OutStreamer->emitIntValue(Ty ? Ty->getOffset() : 0, - sizeof(Ty->getOffset())); + Asm->emitDwarfLengthOrOffset(Ty ? Ty->getOffset() : 0); } DIE::value_iterator DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { return Die.addValue(DIEValueAllocator, Attribute, - DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, + DD->getDwarfSectionOffsetForm(), new (DIEValueAllocator) DIEDelta(Hi, Lo)); } @@ -1728,10 +1737,7 @@ DIE::value_iterator DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label, const MCSymbol *Sec) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - return addLabel(Die, Attribute, - DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, - Label); + return addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label); return addSectionDelta(Die, Attribute, Label, Sec); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 34f3a34ed336..5c643760fd56 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -16,22 +16,19 @@ #include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" +#include <string> namespace llvm { -class MachineOperand; -class ConstantInt; class ConstantFP; +class ConstantInt; class DbgVariable; class DwarfCompileUnit; +class MachineOperand; +class MCDwarfDwoLineTable; +class MCSymbol; //===----------------------------------------------------------------------===// /// This dwarf writer support class manages information associated with a @@ -77,7 +74,6 @@ protected: bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); - bool shareAcrossDWOCUs() const; bool isShareableAcrossCUs(const DINode *D) const; public: @@ -86,8 +82,7 @@ public: MCSymbol *getEndLabel() const { return EndLabel; } uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } const DICompileUnit *getCUNode() const { return CUNode; } - - uint16_t getDwarfVersion() const { return DD->getDwarfVersion(); } + DwarfDebug &getDwarfDebug() const { return *DD; } /// Return true if this compile unit has something to write out. bool hasContent() const { return getUnitDie().hasChildren(); } @@ -195,7 +190,6 @@ public: void addSourceLine(DIE &Die, const DIObjCProperty *Ty); /// Add constant value entry in variable DIE. - void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty); void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty); void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty); void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned); @@ -203,7 +197,6 @@ public: void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val); /// Add constant value entry in variable DIE. - void addConstantFPValue(DIE &Die, const MachineOperand &MO); void addConstantFPValue(DIE &Die, const ConstantFP *CFP); /// Add a linkage name, if it isn't empty. @@ -255,9 +248,9 @@ public: /// Compute the size of a header for this unit, not including the initial /// length field. virtual unsigned getHeaderSize() const { - return sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t) + // Pointer Size (in bytes) + return sizeof(int16_t) + // DWARF version number + Asm->getDwarfOffsetByteSize() + // Offset Into Abbrev. Section + sizeof(int8_t) + // Pointer Size (in bytes) (DD->getDwarfVersion() >= 5 ? sizeof(int8_t) : 0); // DWARF v5 unit type } @@ -284,10 +277,6 @@ public: const MCSymbol *Label, const MCSymbol *Sec); - /// If the \p File has an MD5 checksum, return it as an MD5Result - /// allocated in the MCContext. - Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const; - /// Get context owner's DIE. DIE *createTypeDIE(const DICompositeType *Ty); @@ -306,9 +295,12 @@ protected: private: void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy); + void constructTypeDIE(DIE &Buffer, const DIStringType *BTy); void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy); void constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy); void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy); + void constructGenericSubrangeDIE(DIE &Buffer, const DIGenericSubrange *SR, + DIE *IndexTy); void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy); void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy); DIE &constructMemberDIE(DIE &Buffer, const DIDerivedType *DT); @@ -361,7 +353,7 @@ public: void emitHeader(bool UseOffsets) override; unsigned getHeaderSize() const override { return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature - sizeof(uint32_t); // Type DIE Offset + Asm->getDwarfOffsetByteSize(); // Type DIE Offset } void addGlobalName(StringRef Name, const DIE &Die, const DIScope *Context) override; diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 99ee4567fa58..2ffe8a7b0469 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -44,15 +44,9 @@ EHStreamer::~EHStreamer() = default; unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L, const LandingPadInfo *R) { const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; - unsigned LSize = LIds.size(), RSize = RIds.size(); - unsigned MinSize = LSize < RSize ? LSize : RSize; - unsigned Count = 0; - - for (; Count != MinSize; ++Count) - if (LIds[Count] != RIds[Count]) - return Count; - - return Count; + return std::mismatch(LIds.begin(), LIds.end(), RIds.begin(), RIds.end()) + .first - + LIds.begin(); } /// Compute the actions table and gather the first action index for each landing @@ -220,15 +214,30 @@ void EHStreamer::computePadMap( /// the landing pad and the action. Calls marked 'nounwind' have no entry and /// must not be contained in the try-range of any entry - they form gaps in the /// table. Entries must be ordered by try-range address. -void EHStreamer:: -computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const SmallVectorImpl<const LandingPadInfo *> &LandingPads, - const SmallVectorImpl<unsigned> &FirstActions) { +/// +/// Call-sites are split into one or more call-site ranges associated with +/// different sections of the function. +/// +/// - Without -basic-block-sections, all call-sites are grouped into one +/// call-site-range corresponding to the function section. +/// +/// - With -basic-block-sections, one call-site range is created for each +/// section, with its FragmentBeginLabel and FragmentEndLabel respectively +// set to the beginning and ending of the corresponding section and its +// ExceptionLabel set to the exception symbol dedicated for this section. +// Later, one LSDA header will be emitted for each call-site range with its +// call-sites following. The action table and type info table will be +// shared across all ranges. +void EHStreamer::computeCallSiteTable( + SmallVectorImpl<CallSiteEntry> &CallSites, + SmallVectorImpl<CallSiteRange> &CallSiteRanges, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { RangeMapType PadMap; computePadMap(LandingPads, PadMap); // The end label of the previous invoke or nounwind try-range. - MCSymbol *LastLabel = nullptr; + MCSymbol *LastLabel = Asm->getFunctionBegin(); // Whether there is a potentially throwing instruction (currently this means // an ordinary call) between the end of the previous try-range and now. @@ -241,6 +250,21 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // Visit all instructions in order of address. for (const auto &MBB : *Asm->MF) { + if (&MBB == &Asm->MF->front() || MBB.isBeginSection()) { + // We start a call-site range upon function entry and at the beginning of + // every basic block section. + CallSiteRanges.push_back( + {Asm->MBBSectionRanges[MBB.getSectionIDNum()].BeginLabel, + Asm->MBBSectionRanges[MBB.getSectionIDNum()].EndLabel, + Asm->getMBBExceptionSym(MBB), CallSites.size()}); + PreviousIsInvoke = false; + SawPotentiallyThrowing = false; + LastLabel = nullptr; + } + + if (MBB.isEHPad()) + CallSiteRanges.back().IsLPRange = true; + for (const auto &MI : MBB) { if (!MI.isEHLabel()) { if (MI.isCall()) @@ -264,13 +288,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && "Inconsistent landing pad map!"); - // For Dwarf exception handling (SjLj handling doesn't use this). If some - // instruction between the previous try-range and this one may throw, - // create a call-site entry with no landing pad for the region between the - // try-ranges. - if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) { - CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 }; - CallSites.push_back(Site); + // For Dwarf and AIX exception handling (SjLj handling doesn't use this). + // If some instruction between the previous try-range and this one may + // throw, create a call-site entry with no landing pad for the region + // between the try-ranges. + if (SawPotentiallyThrowing && + (Asm->MAI->usesCFIForEH() || + Asm->MAI->getExceptionHandlingType() == ExceptionHandling::AIX)) { + CallSites.push_back({LastLabel, BeginLabel, nullptr, 0}); PreviousIsInvoke = false; } @@ -313,14 +338,21 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, PreviousIsInvoke = true; } } - } - // If some instruction between the previous try-range and the end of the - // function may throw, create a call-site entry with no landing pad for the - // region following the try-range. - if (SawPotentiallyThrowing && !IsSJLJ) { - CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; - CallSites.push_back(Site); + // We end the call-site range upon function exit and at the end of every + // basic block section. + if (&MBB == &Asm->MF->back() || MBB.isEndSection()) { + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for + // the region following the try-range. + if (SawPotentiallyThrowing && !IsSJLJ) { + CallSiteEntry Site = {LastLabel, CallSiteRanges.back().FragmentEndLabel, + nullptr, 0}; + CallSites.push_back(Site); + SawPotentiallyThrowing = false; + } + CallSiteRanges.back().CallSiteEndIdx = CallSites.size(); + } } } @@ -371,19 +403,25 @@ MCSymbol *EHStreamer::emitExceptionTable() { SmallVector<unsigned, 64> FirstActions; computeActionsTable(LandingPads, Actions, FirstActions); - // Compute the call-site table. + // Compute the call-site table and call-site ranges. Normally, there is only + // one call-site-range which covers the whole funciton. With + // -basic-block-sections, there is one call-site-range per basic block + // section. SmallVector<CallSiteEntry, 64> CallSites; - computeCallSiteTable(CallSites, LandingPads, FirstActions); + SmallVector<CallSiteRange, 4> CallSiteRanges; + computeCallSiteTable(CallSites, CallSiteRanges, LandingPads, FirstActions); bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm; + bool HasLEB128Directives = Asm->MAI->hasLEB128Directives(); unsigned CallSiteEncoding = IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) : Asm->getObjFileLowering().getCallSiteEncoding(); bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty(); // Type infos. - MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); + MCSection *LSDASection = + Asm->getObjFileLowering().getSectionForLSDA(MF->getFunction(), Asm->TM); unsigned TTypeEncoding; if (!HaveTTData) { @@ -433,35 +471,122 @@ MCSymbol *EHStreamer::emitExceptionTable() { Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+ Twine(Asm->getFunctionNumber())); Asm->OutStreamer->emitLabel(GCCETSym); - Asm->OutStreamer->emitLabel(Asm->getCurExceptionSym()); - - // Emit the LSDA header. - Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); - Asm->emitEncodingByte(TTypeEncoding, "@TType"); + MCSymbol *CstEndLabel = Asm->createTempSymbol( + CallSiteRanges.size() > 1 ? "action_table_base" : "cst_end"); MCSymbol *TTBaseLabel = nullptr; - if (HaveTTData) { - // N.B.: There is a dependency loop between the size of the TTBase uleb128 - // here and the amount of padding before the aligned type table. The - // assembler must sometimes pad this uleb128 or insert extra padding before - // the type table. See PR35809 or GNU as bug 4029. - MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref"); + if (HaveTTData) TTBaseLabel = Asm->createTempSymbol("ttbase"); - Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); - Asm->OutStreamer->emitLabel(TTBaseRefLabel); - } - bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + + // Helper for emitting references (offsets) for type table and the end of the + // call-site table (which marks the beginning of the action table). + // * For Itanium, these references will be emitted for every callsite range. + // * For SJLJ and Wasm, they will be emitted only once in the LSDA header. + auto EmitTypeTableRefAndCallSiteTableEndRef = [&]() { + Asm->emitEncodingByte(TTypeEncoding, "@TType"); + if (HaveTTData) { + // N.B.: There is a dependency loop between the size of the TTBase uleb128 + // here and the amount of padding before the aligned type table. The + // assembler must sometimes pad this uleb128 or insert extra padding + // before the type table. See PR35809 or GNU as bug 4029. + MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref"); + Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); + Asm->OutStreamer->emitLabel(TTBaseRefLabel); + } + + // The Action table follows the call-site table. So we emit the + // label difference from here (start of the call-site table for SJLJ and + // Wasm, and start of a call-site range for Itanium) to the end of the + // whole call-site table (end of the last call-site range for Itanium). + MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin"); + Asm->emitEncodingByte(CallSiteEncoding, "Call site"); + Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); + Asm->OutStreamer->emitLabel(CstBeginLabel); + }; + + // An alternative path to EmitTypeTableRefAndCallSiteTableEndRef. + // For some platforms, the system assembler does not accept the form of + // `.uleb128 label2 - label1`. In those situations, we would need to calculate + // the size between label1 and label2 manually. + // In this case, we would need to calculate the LSDA size and the call + // site table size. + auto EmitTypeTableOffsetAndCallSiteTableOffset = [&]() { + assert(CallSiteEncoding == dwarf::DW_EH_PE_udata4 && !HasLEB128Directives && + "Targets supporting .uleb128 do not need to take this path."); + if (CallSiteRanges.size() > 1) + report_fatal_error( + "-fbasic-block-sections is not yet supported on " + "platforms that do not have general LEB128 directive support."); + + uint64_t CallSiteTableSize = 0; + const CallSiteRange &CSRange = CallSiteRanges.back(); + for (size_t CallSiteIdx = CSRange.CallSiteBeginIdx; + CallSiteIdx < CSRange.CallSiteEndIdx; ++CallSiteIdx) { + const CallSiteEntry &S = CallSites[CallSiteIdx]; + // Each call site entry consists of 3 udata4 fields (12 bytes) and + // 1 ULEB128 field. + CallSiteTableSize += 12 + getULEB128Size(S.Action); + assert(isUInt<32>(CallSiteTableSize) && "CallSiteTableSize overflows."); + } + + Asm->emitEncodingByte(TTypeEncoding, "@TType"); + if (HaveTTData) { + const unsigned ByteSizeOfCallSiteOffset = + getULEB128Size(CallSiteTableSize); + uint64_t ActionTableSize = 0; + for (const ActionEntry &Action : Actions) { + // Each action entry consists of two SLEB128 fields. + ActionTableSize += getSLEB128Size(Action.ValueForTypeID) + + getSLEB128Size(Action.NextAction); + assert(isUInt<32>(ActionTableSize) && "ActionTableSize overflows."); + } + + const unsigned TypeInfoSize = + Asm->GetSizeOfEncodedValue(TTypeEncoding) * MF->getTypeInfos().size(); + + const uint64_t LSDASizeBeforeAlign = + 1 // Call site encoding byte. + + ByteSizeOfCallSiteOffset // ULEB128 encoding of CallSiteTableSize. + + CallSiteTableSize // Call site table content. + + ActionTableSize; // Action table content. + + const uint64_t LSDASizeWithoutAlign = LSDASizeBeforeAlign + TypeInfoSize; + const unsigned ByteSizeOfLSDAWithoutAlign = + getULEB128Size(LSDASizeWithoutAlign); + const uint64_t DisplacementBeforeAlign = + 2 // LPStartEncoding and TypeTableEncoding. + + ByteSizeOfLSDAWithoutAlign + LSDASizeBeforeAlign; + + // The type info area starts with 4 byte alignment. + const unsigned NeedAlignVal = (4 - DisplacementBeforeAlign % 4) % 4; + uint64_t LSDASizeWithAlign = LSDASizeWithoutAlign + NeedAlignVal; + const unsigned ByteSizeOfLSDAWithAlign = + getULEB128Size(LSDASizeWithAlign); + + // The LSDASizeWithAlign could use 1 byte less padding for alignment + // when the data we use to represent the LSDA Size "needs" to be 1 byte + // larger than the one previously calculated without alignment. + if (ByteSizeOfLSDAWithAlign > ByteSizeOfLSDAWithoutAlign) + LSDASizeWithAlign -= 1; + + Asm->OutStreamer->emitULEB128IntValue(LSDASizeWithAlign, + ByteSizeOfLSDAWithAlign); + } - // Emit the landing pad call site table. - MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin"); - MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end"); - Asm->emitEncodingByte(CallSiteEncoding, "Call site"); - Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); - Asm->OutStreamer->emitLabel(CstBeginLabel); + Asm->emitEncodingByte(CallSiteEncoding, "Call site"); + Asm->OutStreamer->emitULEB128IntValue(CallSiteTableSize); + }; // SjLj / Wasm Exception handling if (IsSJLJ || IsWasm) { + Asm->OutStreamer->emitLabel(Asm->getMBBExceptionSym(Asm->MF->front())); + + // emit the LSDA header. + Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + EmitTypeTableRefAndCallSiteTableEndRef(); + unsigned idx = 0; for (SmallVectorImpl<CallSiteEntry>::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { @@ -486,6 +611,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { } Asm->emitULEB128(S.Action); } + Asm->OutStreamer->emitLabel(CstEndLabel); } else { // Itanium LSDA exception handling @@ -507,57 +633,127 @@ MCSymbol *EHStreamer::emitExceptionTable() { // A missing entry in the call-site table indicates that a call is not // supposed to throw. + assert(CallSiteRanges.size() != 0 && "No call-site ranges!"); + + // There should be only one call-site range which includes all the landing + // pads. Find that call-site range here. + const CallSiteRange *LandingPadRange = nullptr; + for (const CallSiteRange &CSRange : CallSiteRanges) { + if (CSRange.IsLPRange) { + assert(LandingPadRange == nullptr && + "All landing pads must be in a single callsite range."); + LandingPadRange = &CSRange; + } + } + + // The call-site table is split into its call-site ranges, each being + // emitted as: + // [ LPStartEncoding | LPStart ] + // [ TypeTableEncoding | TypeTableOffset ] + // [ CallSiteEncoding | CallSiteTableEndOffset ] + // cst_begin -> { call-site entries contained in this range } + // + // and is followed by the next call-site range. + // + // For each call-site range, CallSiteTableEndOffset is computed as the + // difference between cst_begin of that range and the last call-site-table's + // end label. This offset is used to find the action table. + unsigned Entry = 0; - for (SmallVectorImpl<CallSiteEntry>::const_iterator - I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { - const CallSiteEntry &S = *I; + for (const CallSiteRange &CSRange : CallSiteRanges) { + if (CSRange.CallSiteBeginIdx != 0) { + // Align the call-site range for all ranges except the first. The + // first range is already aligned due to the exception table alignment. + Asm->emitAlignment(Align(4)); + } + Asm->OutStreamer->emitLabel(CSRange.ExceptionLabel); + + // Emit the LSDA header. + // If only one call-site range exists, LPStart is omitted as it is the + // same as the function entry. + if (CallSiteRanges.size() == 1) { + Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + } else if (!Asm->isPositionIndependent()) { + // For more than one call-site ranges, LPStart must be explicitly + // specified. + // For non-PIC we can simply use the absolute value. + Asm->emitEncodingByte(dwarf::DW_EH_PE_absptr, "@LPStart"); + Asm->OutStreamer->emitSymbolValue(LandingPadRange->FragmentBeginLabel, + Asm->MAI->getCodePointerSize()); + } else { + // For PIC mode, we Emit a PC-relative address for LPStart. + Asm->emitEncodingByte(dwarf::DW_EH_PE_pcrel, "@LPStart"); + MCContext &Context = Asm->OutStreamer->getContext(); + MCSymbol *Dot = Context.createTempSymbol(); + Asm->OutStreamer->emitLabel(Dot); + Asm->OutStreamer->emitValue( + MCBinaryExpr::createSub( + MCSymbolRefExpr::create(LandingPadRange->FragmentBeginLabel, + Context), + MCSymbolRefExpr::create(Dot, Context), Context), + Asm->MAI->getCodePointerSize()); + } + + if (HasLEB128Directives) + EmitTypeTableRefAndCallSiteTableEndRef(); + else + EmitTypeTableOffsetAndCallSiteTableOffset(); + + for (size_t CallSiteIdx = CSRange.CallSiteBeginIdx; + CallSiteIdx != CSRange.CallSiteEndIdx; ++CallSiteIdx) { + const CallSiteEntry &S = CallSites[CallSiteIdx]; + + MCSymbol *EHFuncBeginSym = CSRange.FragmentBeginLabel; + MCSymbol *EHFuncEndSym = CSRange.FragmentEndLabel; - MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); - - MCSymbol *BeginLabel = S.BeginLabel; - if (!BeginLabel) - BeginLabel = EHFuncBeginSym; - MCSymbol *EndLabel = S.EndLabel; - if (!EndLabel) - EndLabel = Asm->getFunctionEnd(); - - // Offset of the call site relative to the start of the procedure. - if (VerboseAsm) - Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); - Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding); - if (VerboseAsm) - Asm->OutStreamer->AddComment(Twine(" Call between ") + - BeginLabel->getName() + " and " + - EndLabel->getName()); - Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding); - - // Offset of the landing pad relative to the start of the procedure. - if (!S.LPad) { + MCSymbol *BeginLabel = S.BeginLabel; + if (!BeginLabel) + BeginLabel = EHFuncBeginSym; + MCSymbol *EndLabel = S.EndLabel; + if (!EndLabel) + EndLabel = EHFuncEndSym; + + // Offset of the call site relative to the start of the procedure. if (VerboseAsm) - Asm->OutStreamer->AddComment(" has no landing pad"); - Asm->emitCallSiteValue(0, CallSiteEncoding); - } else { + Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + + " <<"); + Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding); if (VerboseAsm) - Asm->OutStreamer->AddComment(Twine(" jumps to ") + - S.LPad->LandingPadLabel->getName()); - Asm->emitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym, - CallSiteEncoding); - } + Asm->OutStreamer->AddComment(Twine(" Call between ") + + BeginLabel->getName() + " and " + + EndLabel->getName()); + Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding); + + // Offset of the landing pad relative to the start of the landing pad + // fragment. + if (!S.LPad) { + if (VerboseAsm) + Asm->OutStreamer->AddComment(" has no landing pad"); + Asm->emitCallSiteValue(0, CallSiteEncoding); + } else { + if (VerboseAsm) + Asm->OutStreamer->AddComment(Twine(" jumps to ") + + S.LPad->LandingPadLabel->getName()); + Asm->emitCallSiteOffset(S.LPad->LandingPadLabel, + LandingPadRange->FragmentBeginLabel, + CallSiteEncoding); + } - // Offset of the first associated action record, relative to the start of - // the action table. This value is biased by 1 (1 indicates the start of - // the action table), and 0 indicates that there are no actions. - if (VerboseAsm) { - if (S.Action == 0) - Asm->OutStreamer->AddComment(" On action: cleanup"); - else - Asm->OutStreamer->AddComment(" On action: " + - Twine((S.Action - 1) / 2 + 1)); + // Offset of the first associated action record, relative to the start + // of the action table. This value is biased by 1 (1 indicates the start + // of the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { + if (S.Action == 0) + Asm->OutStreamer->AddComment(" On action: cleanup"); + else + Asm->OutStreamer->AddComment(" On action: " + + Twine((S.Action - 1) / 2 + 1)); + } + Asm->emitULEB128(S.Action); } - Asm->emitULEB128(S.Action); } + Asm->OutStreamer->emitLabel(CstEndLabel); } - Asm->OutStreamer->emitLabel(CstEndLabel); // Emit the Action Table. int Entry = 0; @@ -587,15 +783,12 @@ MCSymbol *EHStreamer::emitExceptionTable() { Asm->emitSLEB128(Action.ValueForTypeID); // Action Record - // - // Self-relative signed displacement in bytes of the next action record, - // or 0 if there is no next action record. if (VerboseAsm) { - if (Action.NextAction == 0) { + if (Action.Previous == unsigned(-1)) { Asm->OutStreamer->AddComment(" No further actions"); } else { - unsigned NextAction = Entry + (Action.NextAction + 1) / 2; - Asm->OutStreamer->AddComment(" Continue to action "+Twine(NextAction)); + Asm->OutStreamer->AddComment(" Continue to action " + + Twine(Action.Previous + 1)); } } Asm->emitSLEB128(Action.NextAction); @@ -615,7 +808,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); const std::vector<unsigned> &FilterIds = MF->getFilterIds(); - bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); int Entry = 0; // Emit the Catch TypeInfos. diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h index e62cf17a05d4..234e62506a56 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -69,23 +69,48 @@ protected: unsigned Action; }; + /// Structure describing a contiguous range of call-sites which reside + /// in the same procedure fragment. With -fbasic-block-sections, there will + /// be one call site range per basic block section. Otherwise, we will have + /// one call site range containing all the call sites in the function. + struct CallSiteRange { + // Symbol marking the beginning of the precedure fragment. + MCSymbol *FragmentBeginLabel = nullptr; + // Symbol marking the end of the procedure fragment. + MCSymbol *FragmentEndLabel = nullptr; + // LSDA symbol for this call-site range. + MCSymbol *ExceptionLabel = nullptr; + // Index of the first call-site entry in the call-site table which + // belongs to this range. + size_t CallSiteBeginIdx = 0; + // Index just after the last call-site entry in the call-site table which + // belongs to this range. + size_t CallSiteEndIdx = 0; + // Whether this is the call-site range containing all the landing pads. + bool IsLPRange = false; + }; + /// Compute the actions table and gather the first action index for each /// landing pad site. - void computeActionsTable(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, - SmallVectorImpl<ActionEntry> &Actions, - SmallVectorImpl<unsigned> &FirstActions); + void computeActionsTable( + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, RangeMapType &PadMap); - /// Compute the call-site table. The entry for an invoke has a try-range - /// containing the call, a non-zero landing pad and an appropriate action. - /// The entry for an ordinary call has a try-range containing the call and - /// zero for the landing pad and the action. Calls marked 'nounwind' have - /// no entry and must not be contained in the try-range of any entry - they - /// form gaps in the table. Entries must be ordered by try-range address. + /// Compute the call-site table and the call-site ranges. The entry for an + /// invoke has a try-range containing the call, a non-zero landing pad and an + /// appropriate action. The entry for an ordinary call has a try-range + /// containing the call and zero for the landing pad and the action. Calls + /// marked 'nounwind' have no entry and must not be contained in the try-range + /// of any entry - they form gaps in the table. Entries must be ordered by + /// try-range address. CallSiteRanges vector is only populated for Itanium + /// exception handling. virtual void computeCallSiteTable( SmallVectorImpl<CallSiteEntry> &CallSites, + SmallVectorImpl<CallSiteRange> &CallSiteRanges, const SmallVectorImpl<const LandingPadInfo *> &LandingPads, const SmallVectorImpl<unsigned> &FirstActions); diff --git a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 8fa83f515910..354b638b47a2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -145,9 +145,10 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, report_fatal_error("Function '" + FI.getFunction().getName() + "' is too large for the ocaml GC! " "Frame size " + - Twine(FrameSize) + ">= 65536.\n" - "(" + - Twine(uintptr_t(&FI)) + ")"); + Twine(FrameSize) + + ">= 65536.\n" + "(" + + Twine(reinterpret_cast<uintptr_t>(&FI)) + ")"); } AP.OutStreamer->AddComment("live roots for " + diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp new file mode 100644 index 000000000000..e8636052c54c --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -0,0 +1,84 @@ +//===- llvm/CodeGen/PseudoProbePrinter.cpp - Pseudo Probe Emission -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing pseudo probe info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "PseudoProbePrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PseudoProbe.h" +#include "llvm/MC/MCPseudoProbe.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +#define DEBUG_TYPE "pseudoprobe" + +PseudoProbeHandler::~PseudoProbeHandler() = default; + +PseudoProbeHandler::PseudoProbeHandler(AsmPrinter *A, Module *M) : Asm(A) { + NamedMDNode *FuncInfo = M->getNamedMetadata(PseudoProbeDescMetadataName); + assert(FuncInfo && "Pseudo probe descriptors are missing"); + for (const auto *Operand : FuncInfo->operands()) { + const auto *MD = cast<MDNode>(Operand); + auto GUID = + mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue(); + auto Name = cast<MDString>(MD->getOperand(2))->getString(); + // We may see pairs with same name but different GUIDs here in LTO mode, due + // to static same-named functions inlined from other modules into this + // module. Function profiles with the same name will be merged no matter + // whether they are collected on the same function. Therefore we just pick + // up the last <Name, GUID> pair here to represent the same-named function + // collection and all probes from the collection will be merged into a + // single profile eventually. + Names[Name] = GUID; + } + + LLVM_DEBUG(dump()); +} + +void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, + uint64_t Type, uint64_t Attr, + const DILocation *DebugLoc) { + // Gather all the inlined-at nodes. + // When it's done ReversedInlineStack looks like ([66, B], [88, A]) + // which means, Function A inlines function B at calliste with a probe id 88, + // and B inlines C at probe 66 where C is represented by Guid. + SmallVector<InlineSite, 8> ReversedInlineStack; + auto *InlinedAt = DebugLoc ? DebugLoc->getInlinedAt() : nullptr; + while (InlinedAt) { + const DISubprogram *SP = InlinedAt->getScope()->getSubprogram(); + // Use linkage name for C++ if possible. + auto Name = SP->getLinkageName(); + if (Name.empty()) + Name = SP->getName(); + assert(Names.count(Name) && "Pseudo probe descriptor missing for function"); + uint64_t CallerGuid = Names[Name]; + uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex( + InlinedAt->getDiscriminator()); + ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId); + InlinedAt = InlinedAt->getInlinedAt(); + } + + SmallVector<InlineSite, 8> InlineStack(ReversedInlineStack.rbegin(), + ReversedInlineStack.rend()); + Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack); +} + +#ifndef NDEBUG +void PseudoProbeHandler::dump() const { + dbgs() << "\n=============================\n"; + dbgs() << "\nFunction Name to GUID map:\n"; + dbgs() << "\n=============================\n"; + for (const auto &Item : Names) + dbgs() << "Func: " << Item.first << " GUID: " << Item.second << "\n"; +} +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h new file mode 100644 index 000000000000..bea07ceae9d4 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h @@ -0,0 +1,53 @@ +//===- PseudoProbePrinter.h - Pseudo probe encoding support -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing pseudo probe info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/AsmPrinterHandler.h" + +namespace llvm { + +class AsmPrinter; +class MCStreamer; +class Module; +class DILocation; + +class PseudoProbeHandler : public AsmPrinterHandler { + // Target of pseudo probe emission. + AsmPrinter *Asm; + // Name to GUID map + DenseMap<StringRef, uint64_t> Names; + +public: + PseudoProbeHandler(AsmPrinter *A, Module *M); + ~PseudoProbeHandler() override; + + void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, + uint64_t Attr, const DILocation *DebugLoc); + + // Unused. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + void endModule() override {} + void beginFunction(const MachineFunction *MF) override {} + void endFunction(const MachineFunction *MF) override {} + void beginInstruction(const MachineInstr *MI) override {} + void endInstruction() override {} + +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // namespace llvm +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp index baef4d2cc849..352a33e8639d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -18,11 +18,11 @@ using namespace llvm; void WasmException::endModule() { - // This is the symbol used in 'throw' and 'br_on_exn' instruction to denote - // this is a C++ exception. This symbol has to be emitted somewhere once in - // the module. Check if the symbol has already been created, i.e., we have at - // least one 'throw' or 'br_on_exn' instruction in the module, and emit the - // symbol only if so. + // This is the symbol used in 'throw' and 'catch' instruction to denote this + // is a C++ exception. This symbol has to be emitted somewhere once in the + // module. Check if the symbol has already been created, i.e., we have at + // least one 'throw' or 'catch' instruction in the module, and emit the symbol + // only if so. SmallString<60> NameStr; Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout()); if (Asm->OutContext.lookupSymbol(NameStr)) { @@ -76,6 +76,7 @@ void WasmException::endFunction(const MachineFunction *MF) { // information. void WasmException::computeCallSiteTable( SmallVectorImpl<CallSiteEntry> &CallSites, + SmallVectorImpl<CallSiteRange> &CallSiteRanges, const SmallVectorImpl<const LandingPadInfo *> &LandingPads, const SmallVectorImpl<unsigned> &FirstActions) { MachineFunction &MF = *Asm->MF; diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/llvm/lib/CodeGen/AsmPrinter/WasmException.h index 1893b6b2df43..f06de786bd76 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WasmException.h +++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.h @@ -32,6 +32,7 @@ protected: // Compute the call site table for wasm EH. void computeCallSiteTable( SmallVectorImpl<CallSiteEntry> &CallSites, + SmallVectorImpl<CallSiteRange> &CallSiteRanges, const SmallVectorImpl<const LandingPadInfo *> &LandingPads, const SmallVectorImpl<unsigned> &FirstActions) override; }; diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index 914308d9147e..1e3f33e70715 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file contains support for writing the metadata for Windows Control Flow -// Guard, including address-taken functions, and valid longjmp targets. +// Guard, including address-taken functions and valid longjmp targets. // //===----------------------------------------------------------------------===// @@ -17,8 +17,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCStreamer.h" @@ -38,8 +38,7 @@ void WinCFGuard::endFunction(const MachineFunction *MF) { return; // Copy the function's longjmp targets to a module-level list. - LongjmpTargets.insert(LongjmpTargets.end(), MF->getLongjmpTargets().begin(), - MF->getLongjmpTargets().end()); + llvm::append_range(LongjmpTargets, MF->getLongjmpTargets()); } /// Returns true if this function's address is escaped in a way that might make @@ -78,20 +77,50 @@ static bool isPossibleIndirectCallTarget(const Function *F) { return false; } +MCSymbol *WinCFGuard::lookupImpSymbol(const MCSymbol *Sym) { + if (Sym->getName().startswith("__imp_")) + return nullptr; + return Asm->OutContext.lookupSymbol(Twine("__imp_") + Sym->getName()); +} + void WinCFGuard::endModule() { const Module *M = Asm->MMI->getModule(); - std::vector<const Function *> Functions; - for (const Function &F : *M) - if (isPossibleIndirectCallTarget(&F)) - Functions.push_back(&F); - if (Functions.empty() && LongjmpTargets.empty()) + std::vector<const MCSymbol *> GFIDsEntries; + std::vector<const MCSymbol *> GIATsEntries; + for (const Function &F : *M) { + if (isPossibleIndirectCallTarget(&F)) { + // If F is a dllimport and has an "__imp_" symbol already defined, add the + // "__imp_" symbol to the .giats section. + if (F.hasDLLImportStorageClass()) { + if (MCSymbol *impSym = lookupImpSymbol(Asm->getSymbol(&F))) { + GIATsEntries.push_back(impSym); + } + } + // Add the function's symbol to the .gfids section. + // Note: For dllimport functions, MSVC sometimes does not add this symbol + // to the .gfids section, but only adds the corresponding "__imp_" symbol + // to the .giats section. Here we always add the symbol to the .gfids + // section, since this does not introduce security risks. + GFIDsEntries.push_back(Asm->getSymbol(&F)); + } + } + + if (GFIDsEntries.empty() && GIATsEntries.empty() && LongjmpTargets.empty()) return; + + // Emit the symbol index of each GFIDs entry to form the .gfids section. auto &OS = *Asm->OutStreamer; OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); - for (const Function *F : Functions) - OS.EmitCOFFSymbolIndex(Asm->getSymbol(F)); + for (const MCSymbol *S : GFIDsEntries) + OS.EmitCOFFSymbolIndex(S); + + // Emit the symbol index of each GIATs entry to form the .giats section. + OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection()); + for (const MCSymbol *S : GIATsEntries) { + OS.EmitCOFFSymbolIndex(S); + } - // Emit the symbol index of each longjmp target. + // Emit the symbol index of each longjmp target to form the .gljmp section. OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); for (const MCSymbol *S : LongjmpTargets) { OS.EmitCOFFSymbolIndex(S); diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h index 494a153b05ba..0e472af52c8f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -24,6 +24,7 @@ class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler { /// Target of directive emission. AsmPrinter *Asm; std::vector<const MCSymbol *> LongjmpTargets; + MCSymbol *lookupImpSymbol(const MCSymbol *Sym); public: WinCFGuard(AsmPrinter *A); diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index cd8077e7d548..3a9c9df79783 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -137,8 +137,8 @@ void WinException::endFunction(const MachineFunction *MF) { endFuncletImpl(); - // endFunclet will emit the necessary .xdata tables for x64 SEH. - if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets()) + // endFunclet will emit the necessary .xdata tables for table-based SEH. + if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets()) return; if (shouldEmitPersonality || shouldEmitLSDA) { @@ -151,7 +151,7 @@ void WinException::endFunction(const MachineFunction *MF) { // Emit the tables appropriate to the personality function in use. If we // don't recognize the personality, assume it uses an Itanium-style LSDA. - if (Per == EHPersonality::MSVC_Win64SEH) + if (Per == EHPersonality::MSVC_TableSEH) emitCSpecificHandlerTable(MF); else if (Per == EHPersonality::MSVC_X86SEH) emitExceptHandlerTable(MF); @@ -258,31 +258,35 @@ void WinException::endFuncletImpl() { if (F.hasPersonalityFn()) Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); - // On funclet exit, we emit a fake "function" end marker, so that the call - // to EmitWinEHHandlerData below can calculate the size of the funclet or - // function. - if (isAArch64) { - MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( - Asm->OutStreamer->getCurrentSectionOnly()); - Asm->OutStreamer->SwitchSection(XData); - } - - // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); - if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality && !CurrentFuncletEntry->isCleanupFuncletEntry()) { + // Emit an UNWIND_INFO struct describing the prologue. + Asm->OutStreamer->EmitWinEHHandlerData(); + // If this is a C++ catch funclet (or the parent function), // emit a reference to the LSDA for the parent function. StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( Twine("$cppxdata$", FuncLinkageName)); Asm->OutStreamer->emitValue(create32bitRef(FuncInfoXData), 4); - } else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() && + } else if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets() && !CurrentFuncletEntry->isEHFuncletEntry()) { + // Emit an UNWIND_INFO struct describing the prologue. + Asm->OutStreamer->EmitWinEHHandlerData(); + // If this is the parent function in Win64 SEH, emit the LSDA immediately // following .seh_handlerdata. emitCSpecificHandlerTable(MF); + } else if (shouldEmitPersonality || shouldEmitLSDA) { + // Emit an UNWIND_INFO struct describing the prologue. + Asm->OutStreamer->EmitWinEHHandlerData(); + // In these cases, no further info is written to the .xdata section + // right here, but is written by e.g. emitExceptionTable in endFunction() + // above. + } else { + // No need to emit the EH handler data right here if nothing needs + // writing to the .xdata section; it will be emitted for all + // functions that need it in the end anyway. } // Switch back to the funclet start .text section now that we are done @@ -339,22 +343,24 @@ int WinException::getFrameIndexOffset(int FrameIndex, const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering(); Register UnusedReg; if (Asm->MAI->usesWindowsCFI()) { - int Offset = + StackOffset Offset = TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg, /*IgnoreSPUpdates*/ true); assert(UnusedReg == Asm->MF->getSubtarget() .getTargetLowering() ->getStackPointerRegisterToSaveRestore()); - return Offset; + return Offset.getFixed(); } // For 32-bit, offsets should be relative to the end of the EH registration // node. For 64-bit, it's relative to SP at the end of the prologue. assert(FuncInfo.EHRegNodeEndOffset != INT_MAX); - int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg); - Offset += FuncInfo.EHRegNodeEndOffset; - return Offset; + StackOffset Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg); + Offset += StackOffset::getFixed(FuncInfo.EHRegNodeEndOffset); + assert(!Offset.getScalable() && + "Frame offsets with a scalable component are not supported"); + return Offset.getFixed(); } namespace { @@ -951,7 +957,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, int FI = FuncInfo.EHRegNodeFrameIndex; if (FI != INT_MAX) { const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); - Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI); + Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI).getFixed(); } MCContext &Ctx = Asm->OutContext; @@ -1015,7 +1021,8 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { Register UnusedReg; const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); int SSPIdx = MFI.getStackProtectorIndex(); - GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg); + GSCookieOffset = + TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg).getFixed(); } // Retrieve the EH Guard slot. @@ -1025,7 +1032,8 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { Register UnusedReg; const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); int EHGuardIdx = FuncInfo.EHGuardFrameIndex; - EHCookieOffset = TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg); + EHCookieOffset = + TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg).getFixed(); } AddComment("GSCookieOffset"); diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a5030305435c..4026022caa07 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1239,7 +1239,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Value *NewValueInsert = insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); Value *StoreSuccess = - TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder); + TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr, + MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; @@ -1506,8 +1507,8 @@ void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { bool expanded = expandAtomicOpToLibcall( I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); - (void)expanded; - assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load"); + if (!expanded) + report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load"); } void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { @@ -1519,8 +1520,8 @@ void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { bool expanded = expandAtomicOpToLibcall( I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); - (void)expanded; - assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store"); + if (!expanded) + report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store"); } void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { @@ -1534,8 +1535,8 @@ void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), Libcalls); - (void)expanded; - assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS"); + if (!expanded) + report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS"); } static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { @@ -1684,6 +1685,11 @@ bool AtomicExpand::expandAtomicOpToLibcall( return false; } + if (!TLI->getLibcallName(RTLibType)) { + // This target does not implement the requested atomic libcall so give up. + return false; + } + // Build up the function call. There's two kinds. First, the sized // variants. These calls are going to be one of the following (with // N=1,2,4,8,16): diff --git a/llvm/lib/CodeGen/BBSectionsPrepare.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index a35c4d813acc..7499ea8b42d4 100644 --- a/llvm/lib/CodeGen/BBSectionsPrepare.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -1,4 +1,4 @@ -//===-- BBSectionsPrepare.cpp ---=========---------------------------------===// +//===-- BasicBlockSections.cpp ---=========--------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// BBSectionsPrepare implementation. +// BasicBlockSections implementation. // // The purpose of this pass is to assign sections to basic blocks when // -fbasic-block-sections= option is used. Further, with profile information @@ -48,19 +48,11 @@ // Basic Block Labels // ================== // -// With -fbasic-block-sections=labels, or when a basic block is placed in a -// unique section, it is labelled with a symbol. This allows easy mapping of -// virtual addresses from PMU profiles back to the corresponding basic blocks. -// Since the number of basic blocks is large, the labeling bloats the symbol -// table sizes and the string table sizes significantly. While the binary size -// does increase, it does not affect performance as the symbol table is not -// loaded in memory during run-time. The string table size bloat is kept very -// minimal using a unary naming scheme that uses string suffix compression. The -// basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ... This -// turns out to be very good for string table sizes and the bloat in the string -// table size for a very large binary is ~8 %. The naming also allows using -// the --symbol-ordering-file option in LLD to arbitrarily reorder the -// sections. +// With -fbasic-block-sections=labels, we emit the offsets of BB addresses of +// every function into the .llvm_bb_addr_map section. Along with the function +// symbols, this allows for mapping of virtual addresses in PMU profiles back to +// the corresponding basic blocks. This logic is implemented in AsmPrinter. This +// pass only assigns the BBSectionType of every function to ``labels``. // //===----------------------------------------------------------------------===// @@ -69,6 +61,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -86,6 +79,15 @@ using llvm::StringMap; using llvm::StringRef; using namespace llvm; +// Placing the cold clusters in a separate section mitigates against poor +// profiles and allows optimizations such as hugepage mapping to be applied at a +// section granularity. Defaults to ".text.split." which is recognized by lld +// via the `-z keep-text-section-prefix` flag. +cl::opt<std::string> llvm::BBSectionsColdTextPrefix( + "bbsections-cold-text-prefix", + cl::desc("The text prefix to use for cold basic block clusters"), + cl::init(".text.split."), cl::Hidden); + namespace { // This struct represents the cluster information for a machine basic block. @@ -100,7 +102,7 @@ struct BBClusterInfo { using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo, 4>>; -class BBSectionsPrepare : public MachineFunctionPass { +class BasicBlockSections : public MachineFunctionPass { public: static char ID; @@ -119,13 +121,13 @@ public: // name for which we have mapping in ProgramBBClusterInfo. StringMap<StringRef> FuncAliasMap; - BBSectionsPrepare(const MemoryBuffer *Buf) + BasicBlockSections(const MemoryBuffer *Buf) : MachineFunctionPass(ID), MBuf(Buf) { - initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry()); + initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); }; - BBSectionsPrepare() : MachineFunctionPass(ID) { - initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry()); + BasicBlockSections() : MachineFunctionPass(ID) { + initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); } StringRef getPassName() const override { @@ -144,8 +146,8 @@ public: } // end anonymous namespace -char BBSectionsPrepare::ID = 0; -INITIALIZE_PASS(BBSectionsPrepare, "bbsections-prepare", +char BasicBlockSections::ID = 0; +INITIALIZE_PASS(BasicBlockSections, "bbsections-prepare", "Prepares for basic block sections, by splitting functions " "into clusters of basic blocks.", false, false) @@ -226,9 +228,9 @@ static bool getBBClusterInfoForFunction( // and "Cold" succeeding all other clusters. // FuncBBClusterInfo represent the cluster information for basic blocks. If this // is empty, it means unique sections for all basic blocks in the function. -static bool assignSectionsAndSortBasicBlocks( - MachineFunction &MF, - const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) { +static void +assignSections(MachineFunction &MF, + const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) { assert(MF.hasBBSections() && "BB Sections is not set for function."); // This variable stores the section ID of the cluster containing eh_pads (if // all eh_pads are one cluster). If more than one cluster contain eh_pads, we @@ -271,12 +273,69 @@ static bool assignSectionsAndSortBasicBlocks( for (auto &MBB : MF) if (MBB.isEHPad()) MBB.setSectionID(EHPadsSectionID.getValue()); +} +void llvm::sortBasicBlocksAndUpdateBranches( + MachineFunction &MF, MachineBasicBlockComparator MBBCmp) { SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs( MF.getNumBlockIDs()); for (auto &MBB : MF) PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); + MF.sort(MBBCmp); + + // Set IsBeginSection and IsEndSection according to the assigned section IDs. + MF.assignBeginEndSections(); + + // After reordering basic blocks, we must update basic block branches to + // insert explicit fallthrough branches when required and optimize branches + // when possible. + updateBranches(MF, PreLayoutFallThroughs); +} + +// If the exception section begins with a landing pad, that landing pad will +// assume a zero offset (relative to @LPStart) in the LSDA. However, a value of +// zero implies "no landing pad." This function inserts a NOP just before the EH +// pad label to ensure a nonzero offset. Returns true if padding is not needed. +static bool avoidZeroOffsetLandingPad(MachineFunction &MF) { + for (auto &MBB : MF) { + if (MBB.isBeginSection() && MBB.isEHPad()) { + MachineBasicBlock::iterator MI = MBB.begin(); + while (!MI->isEHLabel()) + ++MI; + MCInst Noop; + MF.getSubtarget().getInstrInfo()->getNoop(Noop); + BuildMI(MBB, MI, DebugLoc(), + MF.getSubtarget().getInstrInfo()->get(Noop.getOpcode())); + return false; + } + } + return true; +} + +bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { + auto BBSectionsType = MF.getTarget().getBBSectionsType(); + assert(BBSectionsType != BasicBlockSection::None && + "BB Sections not enabled!"); + // Renumber blocks before sorting them for basic block sections. This is + // useful during sorting, basic blocks in the same section will retain the + // default order. This renumbering should also be done for basic block + // labels to match the profiles with the correct blocks. + MF.RenumberBlocks(); + + if (BBSectionsType == BasicBlockSection::Labels) { + MF.setBBSectionsType(BBSectionsType); + return true; + } + + std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo; + if (BBSectionsType == BasicBlockSection::List && + !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, + FuncBBClusterInfo)) + return true; + MF.setBBSectionsType(BBSectionsType); + assignSections(MF, FuncBBClusterInfo); + // We make sure that the cluster including the entry basic block precedes all // other clusters. auto EntryBBSectionID = MF.front().getSectionID(); @@ -300,7 +359,8 @@ static bool assignSectionsAndSortBasicBlocks( // contiguous and ordered accordingly. Furthermore, clusters are ordered in // increasing order of their section IDs, with the exception and the // cold section placed at the end of the function. - MF.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) { + auto Comparator = [&](const MachineBasicBlock &X, + const MachineBasicBlock &Y) { auto XSectionID = X.getSectionID(); auto YSectionID = Y.getSectionID(); if (XSectionID != YSectionID) @@ -311,43 +371,10 @@ static bool assignSectionsAndSortBasicBlocks( return FuncBBClusterInfo[X.getNumber()]->PositionInCluster < FuncBBClusterInfo[Y.getNumber()]->PositionInCluster; return X.getNumber() < Y.getNumber(); - }); - - // Set IsBeginSection and IsEndSection according to the assigned section IDs. - MF.assignBeginEndSections(); - - // After reordering basic blocks, we must update basic block branches to - // insert explicit fallthrough branches when required and optimize branches - // when possible. - updateBranches(MF, PreLayoutFallThroughs); - - return true; -} - -bool BBSectionsPrepare::runOnMachineFunction(MachineFunction &MF) { - auto BBSectionsType = MF.getTarget().getBBSectionsType(); - assert(BBSectionsType != BasicBlockSection::None && - "BB Sections not enabled!"); - // Renumber blocks before sorting them for basic block sections. This is - // useful during sorting, basic blocks in the same section will retain the - // default order. This renumbering should also be done for basic block - // labels to match the profiles with the correct blocks. - MF.RenumberBlocks(); - - if (BBSectionsType == BasicBlockSection::Labels) { - MF.setBBSectionsType(BBSectionsType); - MF.createBBLabels(); - return true; - } + }; - std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo; - if (BBSectionsType == BasicBlockSection::List && - !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, - FuncBBClusterInfo)) - return true; - MF.setBBSectionsType(BBSectionsType); - MF.createBBLabels(); - assignSectionsAndSortBasicBlocks(MF, FuncBBClusterInfo); + sortBasicBlocksAndUpdateBranches(MF, Comparator); + avoidZeroOffsetLandingPad(MF); return true; } @@ -438,7 +465,7 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf, return Error::success(); } -bool BBSectionsPrepare::doInitialization(Module &M) { +bool BasicBlockSections::doInitialization(Module &M) { if (!MBuf) return false; if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) @@ -446,12 +473,12 @@ bool BBSectionsPrepare::doInitialization(Module &M) { return false; } -void BBSectionsPrepare::getAnalysisUsage(AnalysisUsage &AU) const { +void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } MachineFunctionPass * -llvm::createBBSectionsPreparePass(const MemoryBuffer *Buf) { - return new BBSectionsPrepare(Buf); +llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) { + return new BasicBlockSections(Buf); } diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index c6d5aa37834f..fd3f465fb390 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -18,16 +18,12 @@ #include "BranchFolding.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -64,7 +60,6 @@ #include <cstddef> #include <iterator> #include <numeric> -#include <vector> using namespace llvm; @@ -139,17 +134,18 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { MF.getSubtarget().getRegisterInfo()); } -BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, +BranchFolder::BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist, MBFIWrapper &FreqInfo, const MachineBranchProbabilityInfo &ProbInfo, - ProfileSummaryInfo *PSI, - unsigned MinTailLength) + ProfileSummaryInfo *PSI, unsigned MinTailLength) : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength), MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) { if (MinCommonTailLength == 0) MinCommonTailLength = TailMergeSize; switch (FlagEnableTailMerge) { - case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; + case cl::BOU_UNSET: + EnableTailMerge = DefaultEnableTailMerge; + break; case cl::BOU_TRUE: EnableTailMerge = true; break; case cl::BOU_FALSE: EnableTailMerge = false; break; } @@ -1407,7 +1403,7 @@ ReoptimizeBlock: LLVM_DEBUG(dbgs() << "\nMerging into block: " << PrevBB << "From MBB: " << *MBB); // Remove redundant DBG_VALUEs first. - if (PrevBB.begin() != PrevBB.end()) { + if (!PrevBB.empty()) { MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); --PrevBBIter; MachineBasicBlock::iterator MBBIter = MBB->begin(); diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h index 49c6bcae2db4..2a4ea92a92aa 100644 --- a/llvm/lib/CodeGen/BranchFolding.h +++ b/llvm/lib/CodeGen/BranchFolding.h @@ -32,8 +32,7 @@ class TargetRegisterInfo; class LLVM_LIBRARY_VISIBILITY BranchFolder { public: - explicit BranchFolder(bool defaultEnableTailMerge, - bool CommonHoist, + explicit BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist, MBFIWrapper &FreqInfo, const MachineBranchProbabilityInfo &ProbInfo, ProfileSummaryInfo *PSI, diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index 5a3ec1a36f96..366c303614d6 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -507,25 +507,31 @@ bool BranchRelaxation::relaxBranchInstructions() { Next = std::next(J); MachineInstr &MI = *J; - if (MI.isConditionalBranch()) { - MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); - if (!isBlockInRange(MI, *DestBB)) { - if (Next != MBB.end() && Next->isConditionalBranch()) { - // If there are multiple conditional branches, this isn't an - // analyzable block. Split later terminators into a new block so - // each one will be analyzable. - - splitBlockBeforeInstr(*Next, DestBB); - } else { - fixupConditionalBranch(MI); - ++NumConditionalRelaxed; - } + if (!MI.isConditionalBranch()) + continue; + + if (MI.getOpcode() == TargetOpcode::FAULTING_OP) + // FAULTING_OP's destination is not encoded in the instruction stream + // and thus never needs relaxed. + continue; + + MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); + if (!isBlockInRange(MI, *DestBB)) { + if (Next != MBB.end() && Next->isConditionalBranch()) { + // If there are multiple conditional branches, this isn't an + // analyzable block. Split later terminators into a new block so + // each one will be analyzable. + + splitBlockBeforeInstr(*Next, DestBB); + } else { + fixupConditionalBranch(MI); + ++NumConditionalRelaxed; + } - Changed = true; + Changed = true; - // This may have modified all of the terminators, so start over. - Next = MBB.getFirstTerminator(); - } + // This may have modified all of the terminators, so start over. + Next = MBB.getFirstTerminator(); } } } diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index b01a264dd97d..b11db3e65770 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -118,7 +118,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, if (!MO.isRenamable()) return false; - Register OriginalReg = MO.getReg(); + MCRegister OriginalReg = MO.getReg().asMCReg(); // Update only undef operands that have reg units that are mapped to one root. for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { @@ -171,8 +171,8 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { - Register reg = MI->getOperand(OpIdx).getReg(); - unsigned Clearance = RDA->getClearance(MI, reg); + MCRegister Reg = MI->getOperand(OpIdx).getReg().asMCReg(); + unsigned Clearance = RDA->getClearance(MI, Reg); LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); if (Pref > Clearance) { @@ -186,17 +186,24 @@ bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, void BreakFalseDeps::processDefs(MachineInstr *MI) { assert(!MI->isDebugInstr() && "Won't process debug values"); + const MCInstrDesc &MCID = MI->getDesc(); + // Break dependence on undef uses. Do this before updating LiveRegs below. // This can remove a false dependence with no additional instructions. - unsigned OpNum; - unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); - if (Pref) { - bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref); - // We don't need to bother trying to break a dependency if this - // instruction has a true dependency on that register through another - // operand - we'll have to wait for it to be available regardless. - if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref)) - UndefReads.push_back(std::make_pair(MI, OpNum)); + for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef()) + continue; + + unsigned Pref = TII->getUndefRegClearance(*MI, i, TRI); + if (Pref) { + bool HadTrueDependency = pickBestRegisterForUndef(MI, i, Pref); + // We don't need to bother trying to break a dependency if this + // instruction has a true dependency on that register through another + // operand - we'll have to wait for it to be available regardless. + if (!HadTrueDependency && shouldBreakDependence(MI, i, Pref)) + UndefReads.push_back(std::make_pair(MI, i)); + } } // The code below allows the target to create a new instruction to break the @@ -204,7 +211,6 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) { if (MF->getFunction().hasMinSize()) return; - const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); i != e; ++i) { diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 5d6ee09c8438..16f380c1eb62 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -28,66 +28,59 @@ using namespace llvm; #define DEBUG_TYPE "calcspillweights" -void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, - MachineFunction &MF, - VirtRegMap *VRM, - const MachineLoopInfo &MLI, - const MachineBlockFrequencyInfo &MBFI, - VirtRegAuxInfo::NormalizingFn norm) { +void VirtRegAuxInfo::calculateSpillWeightsAndHints() { LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " << MF.getName() << '\n'); MachineRegisterInfo &MRI = MF.getRegInfo(); - VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); - for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; - VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); + calculateSpillWeightAndHint(LIS.getInterval(Reg)); } } // Return the preferred allocation register for reg, given a COPY instruction. -static Register copyHint(const MachineInstr *mi, unsigned reg, - const TargetRegisterInfo &tri, - const MachineRegisterInfo &mri) { - unsigned sub, hsub; - Register hreg; - if (mi->getOperand(0).getReg() == reg) { - sub = mi->getOperand(0).getSubReg(); - hreg = mi->getOperand(1).getReg(); - hsub = mi->getOperand(1).getSubReg(); +static Register copyHint(const MachineInstr *MI, unsigned Reg, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI) { + unsigned Sub, HSub; + Register HReg; + if (MI->getOperand(0).getReg() == Reg) { + Sub = MI->getOperand(0).getSubReg(); + HReg = MI->getOperand(1).getReg(); + HSub = MI->getOperand(1).getSubReg(); } else { - sub = mi->getOperand(1).getSubReg(); - hreg = mi->getOperand(0).getReg(); - hsub = mi->getOperand(0).getSubReg(); + Sub = MI->getOperand(1).getSubReg(); + HReg = MI->getOperand(0).getReg(); + HSub = MI->getOperand(0).getSubReg(); } - if (!hreg) + if (!HReg) return 0; - if (Register::isVirtualRegister(hreg)) - return sub == hsub ? hreg : Register(); + if (Register::isVirtualRegister(HReg)) + return Sub == HSub ? HReg : Register(); - const TargetRegisterClass *rc = mri.getRegClass(reg); - Register CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); + const TargetRegisterClass *rc = MRI.getRegClass(Reg); + MCRegister CopiedPReg = HSub ? TRI.getSubReg(HReg, HSub) : HReg.asMCReg(); if (rc->contains(CopiedPReg)) return CopiedPReg; // Check if reg:sub matches so that a super register could be hinted. - if (sub) - return tri.getMatchingSuperReg(CopiedPReg, sub, rc); + if (Sub) + return TRI.getMatchingSuperReg(CopiedPReg, Sub, rc); return 0; } // Check if all values in LI are rematerializable -static bool isRematerializable(const LiveInterval &LI, - const LiveIntervals &LIS, - VirtRegMap *VRM, +static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS, + const VirtRegMap &VRM, const TargetInstrInfo &TII) { - unsigned Reg = LI.reg; - unsigned Original = VRM ? VRM->getOriginal(Reg) : 0; + unsigned Reg = LI.reg(); + unsigned Original = VRM.getOriginal(Reg); for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { const VNInfo *VNI = *I; @@ -102,31 +95,28 @@ static bool isRematerializable(const LiveInterval &LI, // Trace copies introduced by live range splitting. The inline // spiller can rematerialize through these copies, so the spill // weight must reflect this. - if (VRM) { - while (MI->isFullCopy()) { - // The copy destination must match the interval register. - if (MI->getOperand(0).getReg() != Reg) - return false; - - // Get the source register. - Reg = MI->getOperand(1).getReg(); - - // If the original (pre-splitting) registers match this - // copy came from a split. - if (!Register::isVirtualRegister(Reg) || - VRM->getOriginal(Reg) != Original) - return false; - - // Follow the copy live-in value. - const LiveInterval &SrcLI = LIS.getInterval(Reg); - LiveQueryResult SrcQ = SrcLI.Query(VNI->def); - VNI = SrcQ.valueIn(); - assert(VNI && "Copy from non-existing value"); - if (VNI->isPHIDef()) - return false; - MI = LIS.getInstructionFromIndex(VNI->def); - assert(MI && "Dead valno in interval"); - } + while (MI->isFullCopy()) { + // The copy destination must match the interval register. + if (MI->getOperand(0).getReg() != Reg) + return false; + + // Get the source register. + Reg = MI->getOperand(1).getReg(); + + // If the original (pre-splitting) registers match this + // copy came from a split. + if (!Register::isVirtualRegister(Reg) || VRM.getOriginal(Reg) != Original) + return false; + + // Follow the copy live-in value. + const LiveInterval &SrcLI = LIS.getInterval(Reg); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); + VNI = SrcQ.valueIn(); + assert(VNI && "Copy from non-existing value"); + if (VNI->isPHIDef()) + return false; + MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Dead valno in interval"); } if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis())) @@ -135,43 +125,55 @@ static bool isRematerializable(const LiveInterval &LI, return true; } -void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { - float weight = weightCalcHelper(li); +void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) { + float Weight = weightCalcHelper(LI); // Check if unspillable. - if (weight < 0) + if (Weight < 0) return; - li.weight = weight; + LI.setWeight(Weight); } -float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start, - SlotIndex end) { - return weightCalcHelper(li, &start, &end); +float VirtRegAuxInfo::futureWeight(LiveInterval &LI, SlotIndex Start, + SlotIndex End) { + return weightCalcHelper(LI, &Start, &End); } -float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, - SlotIndex *end) { - MachineRegisterInfo &mri = MF.getRegInfo(); - const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo(); - MachineBasicBlock *mbb = nullptr; - MachineLoop *loop = nullptr; - bool isExiting = false; - float totalWeight = 0; - unsigned numInstr = 0; // Number of instructions using li - SmallPtrSet<MachineInstr*, 8> visited; - - std::pair<unsigned, unsigned> TargetHint = mri.getRegAllocationHint(li.reg); +float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, + SlotIndex *End) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock *MBB = nullptr; + MachineLoop *Loop = nullptr; + bool IsExiting = false; + float TotalWeight = 0; + unsigned NumInstr = 0; // Number of instructions using LI + SmallPtrSet<MachineInstr *, 8> Visited; + + std::pair<Register, Register> TargetHint = MRI.getRegAllocationHint(LI.reg()); + + if (LI.isSpillable()) { + Register Reg = LI.reg(); + Register Original = VRM.getOriginal(Reg); + const LiveInterval &OrigInt = LIS.getInterval(Original); + // li comes from a split of OrigInt. If OrigInt was marked + // as not spillable, make sure the new interval is marked + // as not spillable as well. + if (!OrigInt.isSpillable()) + LI.markNotSpillable(); + } // Don't recompute spill weight for an unspillable register. - bool Spillable = li.isSpillable(); + bool IsSpillable = LI.isSpillable(); - bool localSplitArtifact = start && end; + bool IsLocalSplitArtifact = Start && End; // Do not update future local split artifacts. - bool updateLI = !localSplitArtifact; + bool ShouldUpdateLI = !IsLocalSplitArtifact; - if (localSplitArtifact) { - MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end); - assert(localMBB == LIS.getMBBFromIndex(*start) && + if (IsLocalSplitArtifact) { + MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*End); + assert(localMBB == LIS.getMBBFromIndex(*Start) && "start and end are expected to be in the same basic block"); // Local split artifact will have 2 additional copy instructions and they @@ -179,116 +181,119 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // localLI = COPY other // ... // other = COPY localLI - totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB); - totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB); + TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB); + TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB); - numInstr += 2; + NumInstr += 2; } // CopyHint is a sortable hint derived from a COPY instruction. struct CopyHint { - unsigned Reg; - float Weight; - bool IsPhys; - CopyHint(unsigned R, float W, bool P) : - Reg(R), Weight(W), IsPhys(P) {} - bool operator<(const CopyHint &rhs) const { + const Register Reg; + const float Weight; + CopyHint(Register R, float W) : Reg(R), Weight(W) {} + bool operator<(const CopyHint &Rhs) const { // Always prefer any physreg hint. - if (IsPhys != rhs.IsPhys) - return (IsPhys && !rhs.IsPhys); - if (Weight != rhs.Weight) - return (Weight > rhs.Weight); - return Reg < rhs.Reg; // Tie-breaker. + if (Reg.isPhysical() != Rhs.Reg.isPhysical()) + return Reg.isPhysical(); + if (Weight != Rhs.Weight) + return (Weight > Rhs.Weight); + return Reg.id() < Rhs.Reg.id(); // Tie-breaker. } }; - std::set<CopyHint> CopyHints; + std::set<CopyHint> CopyHints; + DenseMap<unsigned, float> Hint; for (MachineRegisterInfo::reg_instr_nodbg_iterator - I = mri.reg_instr_nodbg_begin(li.reg), - E = mri.reg_instr_nodbg_end(); + I = MRI.reg_instr_nodbg_begin(LI.reg()), + E = MRI.reg_instr_nodbg_end(); I != E;) { - MachineInstr *mi = &*(I++); + MachineInstr *MI = &*(I++); // For local split artifacts, we are interested only in instructions between // the expected start and end of the range. - SlotIndex si = LIS.getInstructionIndex(*mi); - if (localSplitArtifact && ((si < *start) || (si > *end))) + SlotIndex SI = LIS.getInstructionIndex(*MI); + if (IsLocalSplitArtifact && ((SI < *Start) || (SI > *End))) continue; - numInstr++; - if (mi->isIdentityCopy() || mi->isImplicitDef()) + NumInstr++; + if (MI->isIdentityCopy() || MI->isImplicitDef()) continue; - if (!visited.insert(mi).second) + if (!Visited.insert(MI).second) continue; - float weight = 1.0f; - if (Spillable) { + // For terminators that produce values, ask the backend if the register is + // not spillable. + if (TII.isUnspillableTerminator(MI) && MI->definesRegister(LI.reg())) { + LI.markNotSpillable(); + return -1.0f; + } + + float Weight = 1.0f; + if (IsSpillable) { // Get loop info for mi. - if (mi->getParent() != mbb) { - mbb = mi->getParent(); - loop = Loops.getLoopFor(mbb); - isExiting = loop ? loop->isLoopExiting(mbb) : false; + if (MI->getParent() != MBB) { + MBB = MI->getParent(); + Loop = Loops.getLoopFor(MBB); + IsExiting = Loop ? Loop->isLoopExiting(MBB) : false; } // Calculate instr weight. - bool reads, writes; - std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); - weight = LiveIntervals::getSpillWeight(writes, reads, &MBFI, *mi); + bool Reads, Writes; + std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg()); + Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI); // Give extra weight to what looks like a loop induction variable update. - if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) - weight *= 3; + if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB)) + Weight *= 3; - totalWeight += weight; + TotalWeight += Weight; } // Get allocation hints from copies. - if (!mi->isCopy()) + if (!MI->isCopy()) continue; - Register hint = copyHint(mi, li.reg, tri, mri); - if (!hint) + Register HintReg = copyHint(MI, LI.reg(), TRI, MRI); + if (!HintReg) continue; // Force hweight onto the stack so that x86 doesn't add hidden precision, // making the comparison incorrectly pass (i.e., 1 > 1 == true??). // // FIXME: we probably shouldn't use floats at all. - volatile float hweight = Hint[hint] += weight; - if (Register::isVirtualRegister(hint) || mri.isAllocatable(hint)) - CopyHints.insert( - CopyHint(hint, hweight, Register::isPhysicalRegister(hint))); + volatile float HWeight = Hint[HintReg] += Weight; + if (HintReg.isVirtual() || MRI.isAllocatable(HintReg)) + CopyHints.insert(CopyHint(HintReg, HWeight)); } - Hint.clear(); - // Pass all the sorted copy hints to mri. - if (updateLI && CopyHints.size()) { + if (ShouldUpdateLI && CopyHints.size()) { // Remove a generic hint if previously added by target. if (TargetHint.first == 0 && TargetHint.second) - mri.clearSimpleHint(li.reg); + MRI.clearSimpleHint(LI.reg()); - std::set<unsigned> HintedRegs; + std::set<Register> HintedRegs; for (auto &Hint : CopyHints) { if (!HintedRegs.insert(Hint.Reg).second || (TargetHint.first != 0 && Hint.Reg == TargetHint.second)) // Don't add the same reg twice or the target-type hint again. continue; - mri.addRegAllocationHint(li.reg, Hint.Reg); + MRI.addRegAllocationHint(LI.reg(), Hint.Reg); } // Weakly boost the spill weight of hinted registers. - totalWeight *= 1.01F; + TotalWeight *= 1.01F; } // If the live interval was already unspillable, leave it that way. - if (!Spillable) + if (!IsSpillable) return -1.0; // Mark li as unspillable if all live ranges are tiny and the interval // is not live at any reg mask. If the interval is live at a reg mask // spilling may be required. - if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) && - !li.isLiveAtIndexes(LIS.getRegMaskSlots())) { - li.markNotSpillable(); + if (ShouldUpdateLI && LI.isZeroLength(LIS.getSlotIndexes()) && + !LI.isLiveAtIndexes(LIS.getRegMaskSlots())) { + LI.markNotSpillable(); return -1.0; } @@ -296,10 +301,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // it is a preferred candidate for spilling. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. - if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo())) - totalWeight *= 0.5F; + if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo())) + TotalWeight *= 0.5F; - if (localSplitArtifact) - return normalize(totalWeight, start->distance(*end), numInstr); - return normalize(totalWeight, li.getSize(), numInstr); + if (IsLocalSplitArtifact) + return normalize(TotalWeight, Start->distance(*End), NumInstr); + return normalize(TotalWeight, LI.getSize(), NumInstr); } diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index 3d8c2c8b00aa..c9246f6e8754 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -62,6 +63,11 @@ void CCState::MarkAllocated(MCPhysReg Reg) { UsedRegs[*AI / 32] |= 1 << (*AI & 31); } +void CCState::MarkUnallocated(MCPhysReg Reg) { + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + UsedRegs[*AI / 32] &= ~(1 << (*AI & 31)); +} + bool CCState::IsShadowAllocatedReg(MCRegister Reg) const { if (!isAllocated(Reg)) return false; @@ -184,14 +190,17 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { } } +void CCState::ensureMaxAlignment(Align Alignment) { + if (!AnalyzingMustTailForwardedRegs) + MF.getFrameInfo().ensureMaxAlignment(Alignment); +} + static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { if (VT.isVector()) return true; // Assume -msse-regparm might be in effect. if (!VT.isInteger()) return false; - if (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall) - return true; - return false; + return (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall); } void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, @@ -207,8 +216,8 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, // Allocate something of this value type repeatedly until we get assigned a // location in memory. - bool HaveRegParm = true; - while (HaveRegParm) { + bool HaveRegParm; + do { if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) { #ifndef NDEBUG dbgs() << "Call has unhandled type " << EVT(VT).getEVTString() @@ -217,7 +226,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, llvm_unreachable(nullptr); } HaveRegParm = Locs.back().isRegLoc(); - } + } while (HaveRegParm); // Copy all the registers from the value locations we added. assert(NumLocs < Locs.size() && "CC assignment failed to add location"); @@ -248,7 +257,7 @@ void CCState::analyzeMustTailForwardedRegisters( const TargetLowering *TL = MF.getSubtarget().getTargetLowering(); const TargetRegisterClass *RC = TL->getRegClassFor(RegVT); for (MCPhysReg PReg : RemainingRegs) { - unsigned VReg = MF.addLiveIn(PReg, RC); + Register VReg = MF.addLiveIn(PReg, RC); Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT)); } } diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 7a8c022c82da..d2400d0371e3 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -20,16 +20,17 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); - initializeBBSectionsPreparePass(Registry); + initializeBasicBlockSectionsPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCFGuardLongjmpPass(Registry); initializeCFIInstrInserterPass(Registry); + initializeCheckDebugMachineModulePass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeDebugifyMachineModulePass(Registry); initializeDetectDeadLanesPass(Registry); - initializeDwarfEHPreparePass(Registry); + initializeDwarfEHPrepareLegacyPassPass(Registry); initializeEarlyIfConverterPass(Registry); initializeEarlyIfPredicatorPass(Registry); initializeEarlyMachineLICMPass(Registry); @@ -98,7 +99,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); initializeSafeStackLegacyPassPass(Registry); - initializeScalarizeMaskedMemIntrinPass(Registry); initializeShrinkWrapPass(Registry); initializeSjLjEHPreparePass(Registry); initializeSlotIndexesPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPassBuilder.cpp b/llvm/lib/CodeGen/CodeGenPassBuilder.cpp new file mode 100644 index 000000000000..7f37f2069a3b --- /dev/null +++ b/llvm/lib/CodeGen/CodeGenPassBuilder.cpp @@ -0,0 +1,25 @@ +//===--- CodeGenPassBuilder.cpp --------------------------------------- ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces to access the target independent code +// generation passes provided by the LLVM backend. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/CodeGenPassBuilder.h" + +using namespace llvm; + +namespace llvm { +#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + AnalysisKey PASS_NAME::Key; +#include "llvm/CodeGen/MachinePassRegistry.def" +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + AnalysisKey PASS_NAME::Key; +#include "llvm/CodeGen/MachinePassRegistry.def" +} // namespace llvm diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index e8b8e6c93cf0..b2bc75c19709 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -376,6 +376,7 @@ class TypePromotionTransaction; return *DT; } + void removeAllAssertingVHReferences(Value *V); bool eliminateFallThrough(Function &F); bool eliminateMostlyEmptyBlocks(Function &F); BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); @@ -383,6 +384,7 @@ class TypePromotionTransaction; void eliminateMostlyEmptyBlock(BasicBlock *BB); bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, bool isPreheader); + bool makeBitReverse(Instruction &I); bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, @@ -437,7 +439,11 @@ char CodeGenPrepare::ID = 0; INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) @@ -466,13 +472,21 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); OptSize = F.hasOptSize(); if (ProfileGuidedSectionPrefix) { - if (PSI->isFunctionHotInCallGraph(&F, *BFI)) - F.setSectionPrefix(".hot"); - else if (PSI->isFunctionColdInCallGraph(&F, *BFI)) - F.setSectionPrefix(".unlikely"); + // The hot attribute overwrites profile count based hotness while profile + // counts based hotness overwrite the cold attribute. + // This is a conservative behabvior. + if (F.hasFnAttribute(Attribute::Hot) || + PSI->isFunctionHotInCallGraph(&F, *BFI)) + F.setSectionPrefix("hot"); + // If PSI shows this function is not hot, we will placed the function + // into unlikely section if (1) PSI shows this is a cold function, or + // (2) the function has a attribute of cold. + else if (PSI->isFunctionColdInCallGraph(&F, *BFI) || + F.hasFnAttribute(Attribute::Cold)) + F.setSectionPrefix("unlikely"); else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() && PSI->isFunctionHotnessUnknown(F)) - F.setSectionPrefix(".unknown"); + F.setSectionPrefix("unknown"); } /// This optimization identifies DIV instructions that can be @@ -538,6 +552,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { LargeOffsetGEPID.clear(); } + NewGEPBases.clear(); SunkAddrs.clear(); if (!DisableBranchOpts) { @@ -547,13 +562,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // are removed. SmallSetVector<BasicBlock*, 8> WorkList; for (BasicBlock &BB : F) { - SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB)); + SmallVector<BasicBlock *, 2> Successors(successors(&BB)); MadeChange |= ConstantFoldTerminator(&BB, true); if (!MadeChange) continue; for (SmallVectorImpl<BasicBlock*>::iterator II = Successors.begin(), IE = Successors.end(); II != IE; ++II) - if (pred_begin(*II) == pred_end(*II)) + if (pred_empty(*II)) WorkList.insert(*II); } @@ -561,13 +576,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) { MadeChange |= !WorkList.empty(); while (!WorkList.empty()) { BasicBlock *BB = WorkList.pop_back_val(); - SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); + SmallVector<BasicBlock*, 2> Successors(successors(BB)); DeleteDeadBlock(BB); for (SmallVectorImpl<BasicBlock*>::iterator II = Successors.begin(), IE = Successors.end(); II != IE; ++II) - if (pred_begin(*II) == pred_end(*II)) + if (pred_empty(*II)) WorkList.insert(*II); } @@ -601,6 +616,33 @@ bool CodeGenPrepare::runOnFunction(Function &F) { return EverMadeChange; } +/// An instruction is about to be deleted, so remove all references to it in our +/// GEP-tracking data strcutures. +void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) { + LargeOffsetGEPMap.erase(V); + NewGEPBases.erase(V); + + auto GEP = dyn_cast<GetElementPtrInst>(V); + if (!GEP) + return; + + LargeOffsetGEPID.erase(GEP); + + auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand()); + if (VecI == LargeOffsetGEPMap.end()) + return; + + auto &GEPVector = VecI->second; + const auto &I = + llvm::find_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; }); + if (I == GEPVector.end()) + return; + + GEPVector.erase(I); + if (GEPVector.empty()) + LargeOffsetGEPMap.erase(VecI); +} + // Verify BFI has been updated correctly by recomputing BFI and comparing them. void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) { DominatorTree NewDT(F); @@ -619,9 +661,10 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { // Use a temporary array to avoid iterator being invalidated when // deleting blocks. SmallVector<WeakTrackingVH, 16> Blocks; - for (auto &Block : llvm::make_range(std::next(F.begin()), F.end())) + for (auto &Block : llvm::drop_begin(F)) Blocks.push_back(&Block); + SmallSet<WeakTrackingVH, 16> Preds; for (auto &Block : Blocks) { auto *BB = cast_or_null<BasicBlock>(Block); if (!BB) @@ -640,8 +683,16 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { // Merge BB into SinglePred and delete it. MergeBlockIntoPredecessor(BB); + Preds.insert(SinglePred); } } + + // (Repeatedly) merging blocks into their predecessors can create redundant + // debug intrinsics. + for (auto &Pred : Preds) + if (auto *BB = cast_or_null<BasicBlock>(Pred)) + RemoveRedundantDbgInstrs(BB); + return Changed; } @@ -686,7 +737,7 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end()); while (!LoopList.empty()) { Loop *L = LoopList.pop_back_val(); - LoopList.insert(LoopList.end(), L->begin(), L->end()); + llvm::append_range(LoopList, *L); if (BasicBlock *Preheader = L->getLoopPreheader()) Preheaders.insert(Preheader); } @@ -696,7 +747,7 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { // as we remove them. // Note that this intentionally skips the entry block. SmallVector<WeakTrackingVH, 16> Blocks; - for (auto &Block : llvm::make_range(std::next(F.begin()), F.end())) + for (auto &Block : llvm::drop_begin(F)) Blocks.push_back(&Block); for (auto &Block : Blocks) { @@ -2011,7 +2062,14 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::assume: { + Value *Operand = II->getOperand(0); II->eraseFromParent(); + // Prune the operand, it's most likely dead. + resetIteratorIfInvalidatedWhileCalling(BB, [&]() { + RecursivelyDeleteTriviallyDeadInstructions( + Operand, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); + }); return true; } @@ -2172,8 +2230,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT EVI = dyn_cast<ExtractValueInst>(V); if (EVI) { V = EVI->getOperand(0); - if (!std::all_of(EVI->idx_begin(), EVI->idx_end(), - [](unsigned idx) { return idx == 0; })) + if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; })) return false; } @@ -2192,13 +2249,12 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT // Skip over debug and the bitcast. do { ++BI; - } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI); + } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI || + isa<PseudoProbeInst>(BI)); if (&*BI != RetI) return false; } else { - BasicBlock::iterator BI = BB->begin(); - while (isa<DbgInfoIntrinsic>(BI)) ++BI; - if (&*BI != RetI) + if (BB->getFirstNonPHIOrDbg(true) != RetI) return false; } @@ -2223,18 +2279,12 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { if (!VisitedBBs.insert(*PI).second) continue; - - BasicBlock::InstListType &InstList = (*PI)->getInstList(); - BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); - BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); - do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); - if (RI == RE) - continue; - - CallInst *CI = dyn_cast<CallInst>(&*RI); - if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && - attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCallBBs.push_back(*PI); + if (Instruction *I = (*PI)->rbegin()->getPrevNonDebugInstruction(true)) { + CallInst *CI = dyn_cast<CallInst>(I); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && + attributesPermitTailCall(F, CI, RetI, *TLI)) + TailCallBBs.push_back(*PI); + } } } @@ -2258,7 +2308,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT } // If we eliminated all predecessors of the block, delete the block now. - if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) + if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) BB->eraseFromParent(); return Changed; @@ -3109,9 +3159,7 @@ public: /// \returns whether the element is actually removed, i.e. was in the /// collection before the operation. bool erase(PHINode *Ptr) { - auto it = NodeMap.find(Ptr); - if (it != NodeMap.end()) { - NodeMap.erase(Ptr); + if (NodeMap.erase(Ptr)) { SkipRemovedElements(FirstValidElement); return true; } @@ -3666,8 +3714,7 @@ private: PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi); Map[Current] = PHI; ST.insertNewPhi(PHI); - for (Value *P : CurrentPhi->incoming_values()) - Worklist.push_back(P); + append_range(Worklist, CurrentPhi->incoming_values()); } } } @@ -4289,7 +4336,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned SrcAS = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); - if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS)) return matchAddr(AddrInst->getOperand(0), Depth); return false; } @@ -4921,8 +4968,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast<PHINode>(V)) { - for (Value *IncValue : P->incoming_values()) - worklist.push_back(IncValue); + append_range(worklist, P->incoming_values()); PhiOrSelectSeen = true; continue; } @@ -5236,20 +5282,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // If we have no uses, recursively delete the value and all dead instructions // using it. if (Repl->use_empty()) { - // This can cause recursive deletion, which can invalidate our iterator. - // Use a WeakTrackingVH to hold onto it in case this happens. - Value *CurValue = &*CurInstIterator; - WeakTrackingVH IterHandle(CurValue); - BasicBlock *BB = CurInstIterator->getParent(); - - RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); - - if (IterHandle != CurValue) { - // If the iterator instruction was recursively deleted, start over at the - // start of the block. - CurInstIterator = BB->begin(); - SunkAddrs.clear(); - } + resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() { + RecursivelyDeleteTriviallyDeadInstructions( + Repl, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); + }); } ++NumMemoryInsts; return true; @@ -5270,92 +5307,112 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, /// /// If the final index isn't a vector or is a splat, we can emit a scalar GEP /// followed by a GEP with an all zeroes vector index. This will enable -/// SelectionDAGBuilder to use a the scalar GEP as the uniform base and have a +/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a /// zero index. bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr) { - const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); - if (!GEP || !GEP->hasIndices()) - return false; + Value *NewAddr; - // If the GEP and the gather/scatter aren't in the same BB, don't optimize. - // FIXME: We should support this by sinking the GEP. - if (MemoryInst->getParent() != GEP->getParent()) - return false; + if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { + // Don't optimize GEPs that don't have indices. + if (!GEP->hasIndices()) + return false; - SmallVector<Value *, 2> Ops(GEP->op_begin(), GEP->op_end()); + // If the GEP and the gather/scatter aren't in the same BB, don't optimize. + // FIXME: We should support this by sinking the GEP. + if (MemoryInst->getParent() != GEP->getParent()) + return false; - bool RewriteGEP = false; + SmallVector<Value *, 2> Ops(GEP->operands()); - if (Ops[0]->getType()->isVectorTy()) { - Ops[0] = const_cast<Value *>(getSplatValue(Ops[0])); - if (!Ops[0]) - return false; - RewriteGEP = true; - } + bool RewriteGEP = false; - unsigned FinalIndex = Ops.size() - 1; + if (Ops[0]->getType()->isVectorTy()) { + Ops[0] = getSplatValue(Ops[0]); + if (!Ops[0]) + return false; + RewriteGEP = true; + } - // Ensure all but the last index is 0. - // FIXME: This isn't strictly required. All that's required is that they are - // all scalars or splats. - for (unsigned i = 1; i < FinalIndex; ++i) { - auto *C = dyn_cast<Constant>(Ops[i]); - if (!C) - return false; - if (isa<VectorType>(C->getType())) - C = C->getSplatValue(); - auto *CI = dyn_cast_or_null<ConstantInt>(C); - if (!CI || !CI->isZero()) - return false; - // Scalarize the index if needed. - Ops[i] = CI; - } - - // Try to scalarize the final index. - if (Ops[FinalIndex]->getType()->isVectorTy()) { - if (Value *V = const_cast<Value *>(getSplatValue(Ops[FinalIndex]))) { - auto *C = dyn_cast<ConstantInt>(V); - // Don't scalarize all zeros vector. - if (!C || !C->isZero()) { - Ops[FinalIndex] = V; - RewriteGEP = true; + unsigned FinalIndex = Ops.size() - 1; + + // Ensure all but the last index is 0. + // FIXME: This isn't strictly required. All that's required is that they are + // all scalars or splats. + for (unsigned i = 1; i < FinalIndex; ++i) { + auto *C = dyn_cast<Constant>(Ops[i]); + if (!C) + return false; + if (isa<VectorType>(C->getType())) + C = C->getSplatValue(); + auto *CI = dyn_cast_or_null<ConstantInt>(C); + if (!CI || !CI->isZero()) + return false; + // Scalarize the index if needed. + Ops[i] = CI; + } + + // Try to scalarize the final index. + if (Ops[FinalIndex]->getType()->isVectorTy()) { + if (Value *V = getSplatValue(Ops[FinalIndex])) { + auto *C = dyn_cast<ConstantInt>(V); + // Don't scalarize all zeros vector. + if (!C || !C->isZero()) { + Ops[FinalIndex] = V; + RewriteGEP = true; + } } } - } - // If we made any changes or the we have extra operands, we need to generate - // new instructions. - if (!RewriteGEP && Ops.size() == 2) - return false; - - unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements(); + // If we made any changes or the we have extra operands, we need to generate + // new instructions. + if (!RewriteGEP && Ops.size() == 2) + return false; - IRBuilder<> Builder(MemoryInst); + auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount(); - Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); + IRBuilder<> Builder(MemoryInst); - Value *NewAddr; + Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); - // If the final index isn't a vector, emit a scalar GEP containing all ops - // and a vector GEP with all zeroes final index. - if (!Ops[FinalIndex]->getType()->isVectorTy()) { - NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front()); - auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); - NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy)); - } else { - Value *Base = Ops[0]; - Value *Index = Ops[FinalIndex]; + // If the final index isn't a vector, emit a scalar GEP containing all ops + // and a vector GEP with all zeroes final index. + if (!Ops[FinalIndex]->getType()->isVectorTy()) { + NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front()); + auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); + NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy)); + } else { + Value *Base = Ops[0]; + Value *Index = Ops[FinalIndex]; + + // Create a scalar GEP if there are more than 2 operands. + if (Ops.size() != 2) { + // Replace the last index with 0. + Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); + Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front()); + } - // Create a scalar GEP if there are more than 2 operands. - if (Ops.size() != 2) { - // Replace the last index with 0. - Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); - Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front()); + // Now create the GEP with scalar pointer and vector index. + NewAddr = Builder.CreateGEP(Base, Index); } + } else if (!isa<Constant>(Ptr)) { + // Not a GEP, maybe its a splat and we can create a GEP to enable + // SelectionDAGBuilder to use it as a uniform base. + Value *V = getSplatValue(Ptr); + if (!V) + return false; + + auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount(); - // Now create the GEP with scalar pointer and vector index. - NewAddr = Builder.CreateGEP(Base, Index); + IRBuilder<> Builder(MemoryInst); + + // Emit a vector GEP with a scalar pointer and all 0s vector index. + Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType()); + auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); + NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy)); + } else { + // Constant, SelectionDAGBuilder knows to check if its a splat. + return false; } MemoryInst->replaceUsesOfWith(Ptr, NewAddr); @@ -5363,7 +5420,9 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, // If we have no uses, recursively delete the value and all dead instructions // using it. if (Ptr->use_empty()) - RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo); + RecursivelyDeleteTriviallyDeadInstructions( + Ptr, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); return true; } @@ -5752,6 +5811,12 @@ bool CodeGenPrepare::optimizePhiType( Visited.insert(I); SmallPtrSet<Instruction *, 4> Defs; SmallPtrSet<Instruction *, 4> Uses; + // This works by adding extra bitcasts between load/stores and removing + // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi)) + // we can get in the situation where we remove a bitcast in one iteration + // just to add it again in the next. We need to ensure that at least one + // bitcast we remove are anchored to something that will not change back. + bool AnyAnchored = false; while (!Worklist.empty()) { Instruction *II = Worklist.pop_back_val(); @@ -5768,6 +5833,8 @@ bool CodeGenPrepare::optimizePhiType( Worklist.push_back(OpPhi); } } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) { + if (!OpLoad->isSimple()) + return false; if (!Defs.count(OpLoad)) { Defs.insert(OpLoad); Worklist.push_back(OpLoad); @@ -5785,9 +5852,12 @@ bool CodeGenPrepare::optimizePhiType( if (!Defs.count(OpBC)) { Defs.insert(OpBC); Worklist.push_back(OpBC); + AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) && + !isa<ExtractElementInst>(OpBC->getOperand(0)); } - } else if (!isa<UndefValue>(V)) + } else if (!isa<UndefValue>(V)) { return false; + } } } @@ -5802,7 +5872,7 @@ bool CodeGenPrepare::optimizePhiType( Worklist.push_back(OpPhi); } } else if (auto *OpStore = dyn_cast<StoreInst>(V)) { - if (OpStore->getOperand(0) != II) + if (!OpStore->isSimple() || OpStore->getOperand(0) != II) return false; Uses.insert(OpStore); } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) { @@ -5811,12 +5881,15 @@ bool CodeGenPrepare::optimizePhiType( if (OpBC->getType() != ConvertTy) return false; Uses.insert(OpBC); - } else + AnyAnchored |= + any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); }); + } else { return false; + } } } - if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy)) + if (!ConvertTy || !AnyAnchored || !TLI->shouldConvertPhiType(PhiTy, ConvertTy)) return false; LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to " @@ -5827,11 +5900,13 @@ bool CodeGenPrepare::optimizePhiType( ValueToValueMap ValMap; ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy); for (Instruction *D : Defs) { - if (isa<BitCastInst>(D)) + if (isa<BitCastInst>(D)) { ValMap[D] = D->getOperand(0); - else + DeletedInstrs.insert(D); + } else { ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode()); + } } for (PHINode *Phi : PhiNodes) ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(), @@ -5842,15 +5917,17 @@ bool CodeGenPrepare::optimizePhiType( for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++) NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)], Phi->getIncomingBlock(i)); + Visited.insert(NewPhi); } // And finally pipe up the stores and bitcasts for (Instruction *U : Uses) { if (isa<BitCastInst>(U)) { DeletedInstrs.insert(U); U->replaceAllUsesWith(ValMap[U->getOperand(0)]); - } else + } else { U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U)); + } } // Save the removed phis to be deleted later. @@ -6445,9 +6522,7 @@ bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) { /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { - // If branch conversion isn't desirable, exit early. - if (DisableSelectToBranch || OptSize || - llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())) + if (DisableSelectToBranch) return false; // Find all consecutive select instructions that share the same condition. @@ -6483,7 +6558,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { SelectKind = TargetLowering::ScalarValSelect; if (TLI->isSelectSupported(SelectKind) && - !isFormingBranchFromSelectProfitable(TTI, TLI, SI)) + (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize || + llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))) return false; // The DominatorTree needs to be rebuilt by any consumers after this @@ -6621,6 +6697,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { /// in MVE takes a GPR (integer) register, and the instruction that incorporate /// a VDUP (such as a VADD qd, qm, rm) also require a gpr register. bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { + // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), m_Undef(), m_ZeroMask()))) return false; @@ -6640,14 +6717,12 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { Builder.SetInsertPoint(SVI); Value *BC1 = Builder.CreateBitCast( cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType); - Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1, - (uint64_t)0); - Value *Shuffle = Builder.CreateShuffleVector( - Insert, UndefValue::get(NewVecType), SVI->getShuffleMask()); + Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1); Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType); SVI->replaceAllUsesWith(BC2); - RecursivelyDeleteTriviallyDeadInstructions(SVI); + RecursivelyDeleteTriviallyDeadInstructions( + SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); }); // Also hoist the bitcast up to its operand if it they are not in the same // block. @@ -6920,10 +6995,10 @@ class VectorPromoteHelper { if (UseSplat) return ConstantVector::getSplat(EC, Val); - if (!EC.Scalable) { + if (!EC.isScalable()) { SmallVector<Constant *, 4> ConstVec; UndefValue *UndefVal = UndefValue::get(Val->getType()); - for (unsigned Idx = 0; Idx != EC.Min; ++Idx) { + for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) { if (Idx == ExtractIdx) ConstVec.push_back(Val); else @@ -7604,11 +7679,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { /// Given an OR instruction, check to see if this is a bitreverse /// idiom. If so, insert the new intrinsic and return true. -static bool makeBitReverse(Instruction &I, const DataLayout &DL, - const TargetLowering &TLI) { +bool CodeGenPrepare::makeBitReverse(Instruction &I) { if (!I.getType()->isIntegerTy() || - !TLI.isOperationLegalOrCustom(ISD::BITREVERSE, - TLI.getValueType(DL, I.getType(), true))) + !TLI->isOperationLegalOrCustom(ISD::BITREVERSE, + TLI->getValueType(*DL, I.getType(), true))) return false; SmallVector<Instruction*, 4> Insts; @@ -7616,7 +7690,8 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL, return false; Instruction *LastInst = Insts.back(); I.replaceAllUsesWith(LastInst); - RecursivelyDeleteTriviallyDeadInstructions(&I); + RecursivelyDeleteTriviallyDeadInstructions( + &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); }); return true; } @@ -7638,7 +7713,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { while (MadeBitReverse) { MadeBitReverse = false; for (auto &I : reverse(BB)) { - if (makeBitReverse(I, *DL, *TLI)) { + if (makeBitReverse(I)) { MadeBitReverse = MadeChange = true; break; } @@ -7757,9 +7832,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { // %cond2 = icmp|fcmp|binary instruction ... // %cond.or = or|and i1 %cond1, cond2 // br i1 %cond.or label %dest1, label %dest2" - BinaryOperator *LogicOp; + Instruction *LogicOp; BasicBlock *TBB, *FBB; - if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) + if (!match(BB.getTerminator(), + m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB))) continue; auto *Br1 = cast<BranchInst>(BB.getTerminator()); @@ -7772,17 +7848,22 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { unsigned Opc; Value *Cond1, *Cond2; - if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), - m_OneUse(m_Value(Cond2))))) + if (match(LogicOp, + m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2))))) Opc = Instruction::And; - else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)), - m_OneUse(m_Value(Cond2))))) + else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)), + m_OneUse(m_Value(Cond2))))) Opc = Instruction::Or; else continue; - if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) || - !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) ) + auto IsGoodCond = [](Value *Cond) { + return match( + Cond, + m_CombineOr(m_Cmp(), m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), + m_LogicalOr(m_Value(), m_Value())))); + }; + if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2)) continue; LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 12dadf97e02c..97c110afdda4 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -58,6 +58,7 @@ CGOPT(bool, EnableNoInfsFPMath) CGOPT(bool, EnableNoNaNsFPMath) CGOPT(bool, EnableNoSignedZerosFPMath) CGOPT(bool, EnableNoTrappingFPMath) +CGOPT(bool, EnableAIXExtendedAltivecABI) CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath) CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math) CGOPT(bool, EnableHonorSignDependentRoundingFPMath) @@ -74,7 +75,12 @@ CGOPT(bool, UseCtors) CGOPT(bool, RelaxELFRelocations) CGOPT_EXP(bool, DataSections) CGOPT_EXP(bool, FunctionSections) +CGOPT(bool, IgnoreXCOFFVisibility) +CGOPT(bool, XCOFFTracebackTable) CGOPT(std::string, BBSections) +CGOPT(std::string, StackProtectorGuard) +CGOPT(unsigned, StackProtectorGuardOffset) +CGOPT(std::string, StackProtectorGuardReg) CGOPT(unsigned, TLSSize) CGOPT(bool, EmulatedTLS) CGOPT(bool, UniqueSectionNames) @@ -84,7 +90,10 @@ CGOPT(DebuggerKind, DebuggerTuningOpt) CGOPT(bool, EnableStackSizeSection) CGOPT(bool, EnableAddrsig) CGOPT(bool, EmitCallSiteInfo) +CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableDebugEntryValues) +CGOPT(bool, PseudoProbeForProfiling) +CGOPT(bool, ValueTrackingVariableLocations) CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) @@ -276,6 +285,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(DontPlaceZerosInBSS); + static cl::opt<bool> EnableAIXExtendedAltivecABI( + "vec-extabi", cl::desc("Enable the AIX Extended Altivec ABI."), + cl::init(false)); + CGBINDOPT(EnableAIXExtendedAltivecABI); + static cl::opt<bool> EnableGuaranteedTailCallOpt( "tailcallopt", cl::desc( @@ -331,13 +345,40 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(FunctionSections); + static cl::opt<bool> IgnoreXCOFFVisibility( + "ignore-xcoff-visibility", + cl::desc("Not emit the visibility attribute for asm in AIX OS or give " + "all symbols 'unspecified' visibility in XCOFF object file"), + cl::init(false)); + CGBINDOPT(IgnoreXCOFFVisibility); + + static cl::opt<bool> XCOFFTracebackTable( + "xcoff-traceback-table", cl::desc("Emit the XCOFF traceback table"), + cl::init(true)); + CGBINDOPT(XCOFFTracebackTable); + static cl::opt<std::string> BBSections( - "basicblock-sections", + "basic-block-sections", cl::desc("Emit basic blocks into separate sections"), cl::value_desc("all | <function list (file)> | labels | none"), cl::init("none")); CGBINDOPT(BBSections); + static cl::opt<std::string> StackProtectorGuard( + "stack-protector-guard", cl::desc("Stack protector guard mode"), + cl::init("none")); + CGBINDOPT(StackProtectorGuard); + + static cl::opt<std::string> StackProtectorGuardReg( + "stack-protector-guard-reg", cl::desc("Stack protector guard register"), + cl::init("none")); + CGBINDOPT(StackProtectorGuardReg); + + static cl::opt<unsigned> StackProtectorGuardOffset( + "stack-protector-guard-offset", cl::desc("Stack protector guard offset"), + cl::init((unsigned)-1)); + CGBINDOPT(StackProtectorGuardOffset); + static cl::opt<unsigned> TLSSize( "tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0)); CGBINDOPT(TLSSize); @@ -352,7 +393,7 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { CGBINDOPT(UniqueSectionNames); static cl::opt<bool> UniqueBasicBlockSectionNames( - "unique-bb-section-names", + "unique-basic-block-section-names", cl::desc("Give unique names to every basic block section"), cl::init(false)); CGBINDOPT(UniqueBasicBlockSectionNames); @@ -400,6 +441,24 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(EnableDebugEntryValues); + static cl::opt<bool> PseudoProbeForProfiling( + "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"), + cl::init(false)); + CGBINDOPT(PseudoProbeForProfiling); + + static cl::opt<bool> ValueTrackingVariableLocations( + "experimental-debug-variable-locations", + cl::desc("Use experimental new value-tracking variable locations"), + cl::init(false)); + CGBINDOPT(ValueTrackingVariableLocations); + + static cl::opt<bool> EnableMachineFunctionSplitter( + "split-machine-functions", + cl::desc("Split out cold basic blocks from machine functions based on " + "profile information"), + cl::init(false)); + CGBINDOPT(EnableMachineFunctionSplitter); + static cl::opt<bool> ForceDwarfFrameSection( "force-dwarf-frame-section", cl::desc("Always emit a debug frame section."), cl::init(false)); @@ -436,9 +495,28 @@ codegen::getBBSectionsMode(llvm::TargetOptions &Options) { } } +llvm::StackProtectorGuards +codegen::getStackProtectorGuardMode(llvm::TargetOptions &Options) { + if (getStackProtectorGuard() == "tls") + return StackProtectorGuards::TLS; + if (getStackProtectorGuard() == "global") + return StackProtectorGuards::Global; + if (getStackProtectorGuard() != "none") { + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getFile(getStackProtectorGuard()); + if (!MBOrErr) + errs() << "error illegal stack protector guard mode: " + << MBOrErr.getError().message() << "\n"; + else + Options.BBSectionsFuncListBuf = std::move(*MBOrErr); + } + return StackProtectorGuards::None; +} + // Common utility function tightly tied to the options listed here. Initializes // a TargetOptions object with CodeGen flags and returns it. -TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() { +TargetOptions +codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { TargetOptions Options; Options.AllowFPOpFusion = getFuseFPOps(); Options.UnsafeFPMath = getEnableUnsafeFPMath(); @@ -456,25 +534,35 @@ TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() { getEnableHonorSignDependentRoundingFPMath(); if (getFloatABIForCalls() != FloatABI::Default) Options.FloatABIType = getFloatABIForCalls(); + Options.EnableAIXExtendedAltivecABI = getEnableAIXExtendedAltivecABI(); Options.NoZerosInBSS = getDontPlaceZerosInBSS(); Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt(); Options.StackAlignmentOverride = getOverrideStackAlignment(); Options.StackSymbolOrdering = getStackSymbolOrdering(); Options.UseInitArray = !getUseCtors(); Options.RelaxELFRelocations = getRelaxELFRelocations(); - Options.DataSections = getDataSections(); + Options.DataSections = + getExplicitDataSections().getValueOr(TheTriple.hasDefaultDataSections()); Options.FunctionSections = getFunctionSections(); + Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility(); + Options.XCOFFTracebackTable = getXCOFFTracebackTable(); Options.BBSections = getBBSectionsMode(Options); Options.UniqueSectionNames = getUniqueSectionNames(); Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames(); + Options.StackProtectorGuard = getStackProtectorGuardMode(Options); + Options.StackProtectorGuardOffset = getStackProtectorGuardOffset(); + Options.StackProtectorGuardReg = getStackProtectorGuardReg(); Options.TLSSize = getTLSSize(); Options.EmulatedTLS = getEmulatedTLS(); Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0; Options.ExceptionModel = getExceptionModel(); Options.EmitStackSizeSection = getEnableStackSizeSection(); + Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter(); Options.EmitAddrsig = getEnableAddrsig(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); + Options.PseudoProbeForProfiling = getPseudoProbeForProfiling(); + Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations(); Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index d1529b08f708..93467e9d09b8 100644 --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -48,6 +49,8 @@ namespace { private: bool isDead(const MachineInstr *MI) const; + + bool eliminateDeadMI(MachineFunction &MF); }; } char DeadMachineInstructionElim::ID = 0; @@ -107,7 +110,13 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; + bool AnyChanges = eliminateDeadMI(MF); + while (AnyChanges && eliminateDeadMI(MF)) + ; + return AnyChanges; +} +bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { bool AnyChanges = false; MRI = &MF.getRegInfo(); TRI = MF.getSubtarget().getRegisterInfo(); @@ -116,22 +125,24 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. - for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) { + for (MachineBasicBlock *MBB : post_order(&MF)) { // Start out assuming that reserved registers are live out of this block. LivePhysRegs = MRI->getReservedRegs(); // Add live-ins from successors to LivePhysRegs. Normally, physregs are not // live across blocks, but some targets (x86) can have flags live out of a // block. - for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(), - E = MBB.succ_end(); S != E; S++) + for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(), + E = MBB->succ_end(); + S != E; S++) for (const auto &LI : (*S)->liveins()) LivePhysRegs.set(LI.PhysReg); // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. - for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), - MIE = MBB.rend(); MII != MIE; ) { + for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), + MIE = MBB->rend(); + MII != MIE;) { MachineInstr *MI = &*MII++; // If the instruction is dead, delete it! diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp index 6d5306c1dc0c..03fe5f155291 100644 --- a/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -25,11 +25,7 @@ // //===----------------------------------------------------------------------===// -#include <deque> -#include <vector> - #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -40,6 +36,7 @@ #include "llvm/PassRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <deque> using namespace llvm; diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp index c75c957bff8a..97e0162f35a1 100644 --- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -44,67 +45,44 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { - class DwarfEHPrepare : public FunctionPass { - // RewindFunction - _Unwind_Resume or the target equivalent. - FunctionCallee RewindFunction = nullptr; +class DwarfEHPrepare { + CodeGenOpt::Level OptLevel; - CodeGenOpt::Level OptLevel; - DominatorTree *DT = nullptr; - const TargetLowering *TLI = nullptr; - - bool InsertUnwindResumeCalls(Function &Fn); - Value *GetExceptionObject(ResumeInst *RI); - size_t - pruneUnreachableResumes(Function &Fn, - SmallVectorImpl<ResumeInst *> &Resumes, - SmallVectorImpl<LandingPadInst *> &CleanupLPads); + // RewindFunction - _Unwind_Resume or the target equivalent. + FunctionCallee &RewindFunction; - public: - static char ID; // Pass identification, replacement for typeid. - - DwarfEHPrepare(CodeGenOpt::Level OptLevel = CodeGenOpt::Default) - : FunctionPass(ID), OptLevel(OptLevel) {} + Function &F; + const TargetLowering &TLI; + DomTreeUpdater *DTU; + const TargetTransformInfo *TTI; - bool runOnFunction(Function &Fn) override; + /// Return the exception object from the value passed into + /// the 'resume' instruction (typically an aggregate). Clean up any dead + /// instructions, including the 'resume' instruction. + Value *GetExceptionObject(ResumeInst *RI); - bool doFinalization(Module &M) override { - RewindFunction = nullptr; - return false; - } + /// Replace resumes that are not reachable from a cleanup landing pad with + /// unreachable and then simplify those blocks. + size_t + pruneUnreachableResumes(SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<LandingPadInst *> &CleanupLPads); - void getAnalysisUsage(AnalysisUsage &AU) const override; + /// Convert the ResumeInsts that are still present + /// into calls to the appropriate _Unwind_Resume function. + bool InsertUnwindResumeCalls(); - StringRef getPassName() const override { - return "Exception handling preparation"; - } - }; +public: + DwarfEHPrepare(CodeGenOpt::Level OptLevel_, FunctionCallee &RewindFunction_, + Function &F_, const TargetLowering &TLI_, DomTreeUpdater *DTU_, + const TargetTransformInfo *TTI_) + : OptLevel(OptLevel_), RewindFunction(RewindFunction_), F(F_), TLI(TLI_), + DTU(DTU_), TTI(TTI_) {} -} // end anonymous namespace + bool run(); +}; -char DwarfEHPrepare::ID = 0; +} // namespace -INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE, - "Prepare DWARF exceptions", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE, - "Prepare DWARF exceptions", false, false) - -FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) { - return new DwarfEHPrepare(OptLevel); -} - -void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetPassConfig>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - if (OptLevel != CodeGenOpt::None) - AU.addRequired<DominatorTreeWrapperPass>(); -} - -/// GetExceptionObject - Return the exception object from the value passed into -/// the 'resume' instruction (typically an aggregate). Clean up any dead -/// instructions, including the 'resume' instruction. Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { Value *V = RI->getOperand(0); Value *ExnObj = nullptr; @@ -142,16 +120,16 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { return ExnObj; } -/// Replace resumes that are not reachable from a cleanup landing pad with -/// unreachable and then simplify those blocks. size_t DwarfEHPrepare::pruneUnreachableResumes( - Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<ResumeInst *> &Resumes, SmallVectorImpl<LandingPadInst *> &CleanupLPads) { + assert(DTU && "Should have DomTreeUpdater here."); + BitVector ResumeReachable(Resumes.size()); size_t ResumeIndex = 0; for (auto *RI : Resumes) { for (auto *LP : CleanupLPads) { - if (isPotentiallyReachable(LP, RI, nullptr, DT)) { + if (isPotentiallyReachable(LP, RI, nullptr, &DTU->getDomTree())) { ResumeReachable.set(ResumeIndex); break; } @@ -163,9 +141,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes( if (ResumeReachable.all()) return Resumes.size(); - const TargetTransformInfo &TTI = - getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); - LLVMContext &Ctx = Fn.getContext(); + LLVMContext &Ctx = F.getContext(); // Otherwise, insert unreachable instructions and call simplifycfg. size_t ResumesLeft = 0; @@ -177,19 +153,17 @@ size_t DwarfEHPrepare::pruneUnreachableResumes( BasicBlock *BB = RI->getParent(); new UnreachableInst(Ctx, RI); RI->eraseFromParent(); - simplifyCFG(BB, TTI); + simplifyCFG(BB, *TTI, RequireAndPreserveDomTree ? DTU : nullptr); } } Resumes.resize(ResumesLeft); return ResumesLeft; } -/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present -/// into calls to the appropriate _Unwind_Resume function. -bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { - SmallVector<ResumeInst*, 16> Resumes; - SmallVector<LandingPadInst*, 16> CleanupLPads; - for (BasicBlock &BB : Fn) { +bool DwarfEHPrepare::InsertUnwindResumeCalls() { + SmallVector<ResumeInst *, 16> Resumes; + SmallVector<LandingPadInst *, 16> CleanupLPads; + for (BasicBlock &BB : F) { if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator())) Resumes.push_back(RI); if (auto *LP = BB.getLandingPadInst()) @@ -201,25 +175,25 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { return false; // Check the personality, don't do anything if it's scope-based. - EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn()); + EHPersonality Pers = classifyEHPersonality(F.getPersonalityFn()); if (isScopedEHPersonality(Pers)) return false; - LLVMContext &Ctx = Fn.getContext(); + LLVMContext &Ctx = F.getContext(); size_t ResumesLeft = Resumes.size(); if (OptLevel != CodeGenOpt::None) - ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads); + ResumesLeft = pruneUnreachableResumes(Resumes, CleanupLPads); if (ResumesLeft == 0) return true; // We pruned them all. // Find the rewind function if we didn't already. if (!RewindFunction) { - FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), - Type::getInt8PtrTy(Ctx), false); - const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy); + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); + const char *RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME); + RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy); } // Create the basic block where the _Unwind_Resume call will live. @@ -232,22 +206,27 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { // Call the _Unwind_Resume function. CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); - CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // We never expect _Unwind_Resume to return. + CI->setDoesNotReturn(); new UnreachableInst(Ctx, UnwindBB); return true; } - BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); - PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, - "exn.obj", UnwindBB); + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(Resumes.size()); + + BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F); + PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj", + UnwindBB); // Extract the exception object from the ResumeInst and add it to the PHI node // that feeds the _Unwind_Resume call. for (ResumeInst *RI : Resumes) { BasicBlock *Parent = RI->getParent(); BranchInst::Create(UnwindBB, Parent); + Updates.push_back({DominatorTree::Insert, Parent, UnwindBB}); Value *ExnObj = GetExceptionObject(RI); PN->addIncoming(ExnObj, Parent); @@ -257,21 +236,100 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { // Call the function. CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB); - CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // We never expect _Unwind_Resume to return. + CI->setDoesNotReturn(); new UnreachableInst(Ctx, UnwindBB); + + if (DTU && RequireAndPreserveDomTree) + DTU->applyUpdates(Updates); + return true; } -bool DwarfEHPrepare::runOnFunction(Function &Fn) { - const TargetMachine &TM = - getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); - DT = OptLevel != CodeGenOpt::None - ? &getAnalysis<DominatorTreeWrapperPass>().getDomTree() : nullptr; - TLI = TM.getSubtargetImpl(Fn)->getTargetLowering(); - bool Changed = InsertUnwindResumeCalls(Fn); - DT = nullptr; - TLI = nullptr; +bool DwarfEHPrepare::run() { + assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) || + (DTU && + DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && + "Original domtree is invalid?"); + + bool Changed = InsertUnwindResumeCalls(); + + assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) || + (DTU && + DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && + "Original domtree is invalid?"); + return Changed; } + +static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, + FunctionCallee &RewindFunction, Function &F, + const TargetLowering &TLI, DominatorTree *DT, + const TargetTransformInfo *TTI) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + + return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr, + TTI) + .run(); +} + +namespace { + +class DwarfEHPrepareLegacyPass : public FunctionPass { + // RewindFunction - _Unwind_Resume or the target equivalent. + FunctionCallee RewindFunction = nullptr; + + CodeGenOpt::Level OptLevel; + +public: + static char ID; // Pass identification, replacement for typeid. + + DwarfEHPrepareLegacyPass(CodeGenOpt::Level OptLevel = CodeGenOpt::Default) + : FunctionPass(ID), OptLevel(OptLevel) {} + + bool runOnFunction(Function &F) override { + const TargetMachine &TM = + getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); + const TargetLowering &TLI = *TM.getSubtargetImpl(F)->getTargetLowering(); + DominatorTree *DT = nullptr; + const TargetTransformInfo *TTI = nullptr; + if (OptLevel != CodeGenOpt::None) { + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + } + return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + if (OptLevel != CodeGenOpt::None) { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + if (RequireAndPreserveDomTree) + AU.addPreserved<DominatorTreeWrapperPass>(); + } + } + + StringRef getPassName() const override { + return "Exception handling preparation"; + } +}; + +} // end anonymous namespace + +char DwarfEHPrepareLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(DwarfEHPrepareLegacyPass, DEBUG_TYPE, + "Prepare DWARF exceptions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(DwarfEHPrepareLegacyPass, DEBUG_TYPE, + "Prepare DWARF exceptions", false, false) + +FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) { + return new DwarfEHPrepareLegacyPass(OptLevel); +} diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index 96d4efb856c1..cf7d93d6a33a 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" @@ -264,7 +265,8 @@ bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { // Remember clobbered regunits. if (MO.isDef() && Register::isPhysicalRegister(Reg)) - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); + ++Units) ClobberedRegUnits.set(*Units); if (!MO.readsReg() || !Register::isVirtualRegister(Reg)) @@ -363,7 +365,7 @@ bool SSAIfConv::findInsertionPoint() { // Keep track of live regunits before the current position. // Only track RegUnits that are also in ClobberedRegUnits. LiveRegUnits.clear(); - SmallVector<unsigned, 8> Reads; + SmallVector<MCRegister, 8> Reads; MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); MachineBasicBlock::iterator I = Head->end(); MachineBasicBlock::iterator B = Head->begin(); @@ -385,11 +387,12 @@ bool SSAIfConv::findInsertionPoint() { continue; // I clobbers Reg, so it isn't live before I. if (MO.isDef()) - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); + ++Units) LiveRegUnits.erase(*Units); // Unless I reads Reg. if (MO.readsReg()) - Reads.push_back(Reg); + Reads.push_back(Reg.asMCReg()); } // Anything read by I is live before I. while (!Reads.empty()) @@ -794,6 +797,17 @@ static unsigned adjCycles(unsigned Cyc, int Delta) { return Cyc + Delta; } +namespace { +/// Helper class to simplify emission of cycle counts into optimization remarks. +struct Cycles { + const char *Key; + unsigned Value; +}; +template <typename Remark> Remark &operator<<(Remark &R, Cycles C) { + return R << ore::NV(C.Key, C.Value) << (C.Value == 1 ? " cycle" : " cycles"); +} +} // anonymous namespace + /// Apply cost model and heuristics to the if-conversion in IfConv. /// Return true if the conversion is a good idea. /// @@ -814,6 +828,9 @@ bool EarlyIfConverter::shouldConvertIf() { // Set a somewhat arbitrary limit on the critical path extension we accept. unsigned CritLimit = SchedModel.MispredictPenalty/2; + MachineBasicBlock &MBB = *IfConv.Head; + MachineOptimizationRemarkEmitter MORE(*MBB.getParent(), nullptr); + // If-conversion only makes sense when there is unexploited ILP. Compute the // maximum-ILP resource length of the trace after if-conversion. Compare it // to the shortest critical path. @@ -825,6 +842,17 @@ bool EarlyIfConverter::shouldConvertIf() { << ", minimal critical path " << MinCrit << '\n'); if (ResLength > MinCrit + CritLimit) { LLVM_DEBUG(dbgs() << "Not enough available ILP.\n"); + MORE.emit([&]() { + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion", + MBB.findDebugLoc(MBB.back()), &MBB); + R << "did not if-convert branch: the resulting critical path (" + << Cycles{"ResLength", ResLength} + << ") would extend the shorter leg's critical path (" + << Cycles{"MinCrit", MinCrit} << ") by more than the threshold of " + << Cycles{"CritLimit", CritLimit} + << ", which cannot be hidden by available ILP."; + return R; + }); return false; } @@ -839,6 +867,14 @@ bool EarlyIfConverter::shouldConvertIf() { // Look at all the tail phis, and compute the critical path extension caused // by inserting select instructions. MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); + struct CriticalPathInfo { + unsigned Extra; // Count of extra cycles that the component adds. + unsigned Depth; // Absolute depth of the component in cycles. + }; + CriticalPathInfo Cond{}; + CriticalPathInfo TBlock{}; + CriticalPathInfo FBlock{}; + bool ShouldConvert = true; for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; unsigned Slack = TailTrace.getInstrSlack(*PI.PHI); @@ -850,9 +886,11 @@ bool EarlyIfConverter::shouldConvertIf() { if (CondDepth > MaxDepth) { unsigned Extra = CondDepth - MaxDepth; LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); + if (Extra > Cond.Extra) + Cond = {Extra, CondDepth}; if (Extra > CritLimit) { LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); - return false; + ShouldConvert = false; } } @@ -861,9 +899,11 @@ bool EarlyIfConverter::shouldConvertIf() { if (TDepth > MaxDepth) { unsigned Extra = TDepth - MaxDepth; LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); + if (Extra > TBlock.Extra) + TBlock = {Extra, TDepth}; if (Extra > CritLimit) { LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); - return false; + ShouldConvert = false; } } @@ -872,13 +912,63 @@ bool EarlyIfConverter::shouldConvertIf() { if (FDepth > MaxDepth) { unsigned Extra = FDepth - MaxDepth; LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); + if (Extra > FBlock.Extra) + FBlock = {Extra, FDepth}; if (Extra > CritLimit) { LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); - return false; + ShouldConvert = false; } } } - return true; + + // Organize by "short" and "long" legs, since the diagnostics get confusing + // when referring to the "true" and "false" sides of the branch, given that + // those don't always correlate with what the user wrote in source-terms. + const CriticalPathInfo Short = TBlock.Extra > FBlock.Extra ? FBlock : TBlock; + const CriticalPathInfo Long = TBlock.Extra > FBlock.Extra ? TBlock : FBlock; + + if (ShouldConvert) { + MORE.emit([&]() { + MachineOptimizationRemark R(DEBUG_TYPE, "IfConversion", + MBB.back().getDebugLoc(), &MBB); + R << "performing if-conversion on branch: the condition adds " + << Cycles{"CondCycles", Cond.Extra} << " to the critical path"; + if (Short.Extra > 0) + R << ", and the short leg adds another " + << Cycles{"ShortCycles", Short.Extra}; + if (Long.Extra > 0) + R << ", and the long leg adds another " + << Cycles{"LongCycles", Long.Extra}; + R << ", each staying under the threshold of " + << Cycles{"CritLimit", CritLimit} << "."; + return R; + }); + } else { + MORE.emit([&]() { + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion", + MBB.back().getDebugLoc(), &MBB); + R << "did not if-convert branch: the condition would add " + << Cycles{"CondCycles", Cond.Extra} << " to the critical path"; + if (Cond.Extra > CritLimit) + R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit}; + if (Short.Extra > 0) { + R << ", and the short leg would add another " + << Cycles{"ShortCycles", Short.Extra}; + if (Short.Extra > CritLimit) + R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit}; + } + if (Long.Extra > 0) { + R << ", and the long leg would add another " + << Cycles{"LongCycles", Long.Extra}; + if (Long.Extra > CritLimit) + R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit}; + } + R << "."; + return R; + }); + } + + return ShouldConvert; } /// Attempt repeated if-conversion on MBB, return true if successful. diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index 45f21c1085dd..a4c9f02dc64d 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This pass implements IR expansion for reduction intrinsics, allowing targets -// to enable the experimental intrinsics until just before codegen. +// to enable the intrinsics until just before codegen. // //===----------------------------------------------------------------------===// @@ -30,49 +30,49 @@ namespace { unsigned getOpcode(Intrinsic::ID ID) { switch (ID) { - case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::vector_reduce_fadd: return Instruction::FAdd; - case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::vector_reduce_fmul: return Instruction::FMul; - case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::vector_reduce_add: return Instruction::Add; - case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::vector_reduce_mul: return Instruction::Mul; - case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::vector_reduce_and: return Instruction::And; - case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::vector_reduce_or: return Instruction::Or; - case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::vector_reduce_xor: return Instruction::Xor; - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: return Instruction::ICmp; - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: return Instruction::FCmp; default: llvm_unreachable("Unexpected ID"); } } -RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { +RecurKind getRK(Intrinsic::ID ID) { switch (ID) { - case Intrinsic::experimental_vector_reduce_smax: - return RecurrenceDescriptor::MRK_SIntMax; - case Intrinsic::experimental_vector_reduce_smin: - return RecurrenceDescriptor::MRK_SIntMin; - case Intrinsic::experimental_vector_reduce_umax: - return RecurrenceDescriptor::MRK_UIntMax; - case Intrinsic::experimental_vector_reduce_umin: - return RecurrenceDescriptor::MRK_UIntMin; - case Intrinsic::experimental_vector_reduce_fmax: - return RecurrenceDescriptor::MRK_FloatMax; - case Intrinsic::experimental_vector_reduce_fmin: - return RecurrenceDescriptor::MRK_FloatMin; + case Intrinsic::vector_reduce_smax: + return RecurKind::SMax; + case Intrinsic::vector_reduce_smin: + return RecurKind::SMin; + case Intrinsic::vector_reduce_umax: + return RecurKind::UMax; + case Intrinsic::vector_reduce_umin: + return RecurKind::UMin; + case Intrinsic::vector_reduce_fmax: + return RecurKind::FMax; + case Intrinsic::vector_reduce_fmin: + return RecurKind::FMin; default: - return RecurrenceDescriptor::MRK_Invalid; + return RecurKind::None; } } @@ -83,19 +83,19 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { if (auto *II = dyn_cast<IntrinsicInst>(&I)) { switch (II->getIntrinsicID()) { default: break; - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: if (TTI->shouldExpandReduction(II)) Worklist.push_back(II); @@ -108,7 +108,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { FastMathFlags FMF = isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; Intrinsic::ID ID = II->getIntrinsicID(); - RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID); + RecurKind RK = getRK(ID); Value *Rdx = nullptr; IRBuilder<> Builder(II); @@ -116,42 +116,54 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { Builder.setFastMathFlags(FMF); switch (ID) { default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: { + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: { // FMFs must be attached to the call, otherwise it's an ordered reduction // and it can't be handled by generating a shuffle sequence. Value *Acc = II->getArgOperand(0); Value *Vec = II->getArgOperand(1); if (!FMF.allowReassoc()) - Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK); + Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK); else { if (!isPowerOf2_32( cast<FixedVectorType>(Vec->getType())->getNumElements())) continue; - Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), Acc, Rdx, "bin.rdx"); } break; } - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: { + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: { Value *Vec = II->getArgOperand(0); if (!isPowerOf2_32( cast<FixedVectorType>(Vec->getType())->getNumElements())) continue; - Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); + break; + } + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: { + // FIXME: We only expand 'fast' reductions here because the underlying + // code in createMinMaxOp() assumes that comparisons use 'fast' + // semantics. + Value *Vec = II->getArgOperand(0); + if (!isPowerOf2_32( + cast<FixedVectorType>(Vec->getType())->getNumElements()) || + !FMF.isFast()) + continue; + + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); break; } } diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 27319804049d..f8f99b7e87f2 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -46,6 +46,20 @@ static cl::opt<bool> FixupSCSExtendSlotSize( cl::desc("Allow spill in spill slot of greater size than register size"), cl::Hidden); +static cl::opt<bool> PassGCPtrInCSR( + "fixup-allow-gcptr-in-csr", cl::Hidden, cl::init(false), + cl::desc("Allow passing GC Pointer arguments in callee saved registers")); + +static cl::opt<bool> EnableCopyProp( + "fixup-scs-enable-copy-propagation", cl::Hidden, cl::init(true), + cl::desc("Enable simple copy propagation during register reloading")); + +// This is purely debugging option. +// It may be handy for investigating statepoint spilling issues. +static cl::opt<unsigned> MaxStatepointsWithRegs( + "fixup-max-csr-statepoints", cl::Hidden, + cl::desc("Max number of statepoints allowed to pass GC Ptrs in registers")); + namespace { class FixupStatepointCallerSaved : public MachineFunctionPass { @@ -67,6 +81,7 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; }; + } // End anonymous namespace. char FixupStatepointCallerSaved::ID = 0; @@ -83,7 +98,101 @@ static unsigned getRegisterSize(const TargetRegisterInfo &TRI, Register Reg) { return TRI.getSpillSize(*RC); } +// Try to eliminate redundant copy to register which we're going to +// spill, i.e. try to change: +// X = COPY Y +// SPILL X +// to +// SPILL Y +// If there are no uses of X between copy and STATEPOINT, that COPY +// may be eliminated. +// Reg - register we're about to spill +// RI - On entry points to statepoint. +// On successful copy propagation set to new spill point. +// IsKill - set to true if COPY is Kill (there are no uses of Y) +// Returns either found source copy register or original one. +static Register performCopyPropagation(Register Reg, + MachineBasicBlock::iterator &RI, + bool &IsKill, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI) { + // First check if statepoint itself uses Reg in non-meta operands. + int Idx = RI->findRegisterUseOperandIdx(Reg, false, &TRI); + if (Idx >= 0 && (unsigned)Idx < StatepointOpers(&*RI).getNumDeoptArgsIdx()) { + IsKill = false; + return Reg; + } + + if (!EnableCopyProp) + return Reg; + + MachineBasicBlock *MBB = RI->getParent(); + MachineBasicBlock::reverse_iterator E = MBB->rend(); + MachineInstr *Def = nullptr, *Use = nullptr; + for (auto It = ++(RI.getReverse()); It != E; ++It) { + if (It->readsRegister(Reg, &TRI) && !Use) + Use = &*It; + if (It->modifiesRegister(Reg, &TRI)) { + Def = &*It; + break; + } + } + + if (!Def) + return Reg; + + auto DestSrc = TII.isCopyInstr(*Def); + if (!DestSrc || DestSrc->Destination->getReg() != Reg) + return Reg; + + Register SrcReg = DestSrc->Source->getReg(); + + if (getRegisterSize(TRI, Reg) != getRegisterSize(TRI, SrcReg)) + return Reg; + + LLVM_DEBUG(dbgs() << "spillRegisters: perform copy propagation " + << printReg(Reg, &TRI) << " -> " << printReg(SrcReg, &TRI) + << "\n"); + + // Insert spill immediately after Def + RI = ++MachineBasicBlock::iterator(Def); + IsKill = DestSrc->Source->isKill(); + + // There are no uses of original register between COPY and STATEPOINT. + // There can't be any after STATEPOINT, so we can eliminate Def. + if (!Use) { + LLVM_DEBUG(dbgs() << "spillRegisters: removing dead copy " << *Def); + Def->eraseFromParent(); + } + return SrcReg; +} + namespace { +// Pair {Register, FrameIndex} +using RegSlotPair = std::pair<Register, int>; + +// Keeps track of what reloads were inserted in MBB. +class RegReloadCache { + using ReloadSet = SmallSet<RegSlotPair, 8>; + DenseMap<const MachineBasicBlock *, ReloadSet> Reloads; + +public: + RegReloadCache() = default; + + // Record reload of Reg from FI in block MBB + void recordReload(Register Reg, int FI, const MachineBasicBlock *MBB) { + RegSlotPair RSP(Reg, FI); + auto Res = Reloads[MBB].insert(RSP); + (void)Res; + assert(Res.second && "reload already exists"); + } + + // Does basic block MBB contains reload of Reg from FI? + bool hasReload(Register Reg, int FI, const MachineBasicBlock *MBB) { + RegSlotPair RSP(Reg, FI); + return Reloads.count(MBB) && Reloads[MBB].count(RSP); + } +}; + // Cache used frame indexes during statepoint re-write to re-use them in // processing next statepoint instruction. // Two strategies. One is to preserve the size of spill slot while another one @@ -105,24 +214,62 @@ private: // size will be increased. DenseMap<unsigned, FrameIndexesPerSize> Cache; + // Keeps track of slots reserved for the shared landing pad processing. + // Initialized from GlobalIndices for the current EHPad. + SmallSet<int, 8> ReservedSlots; + + // Landing pad can be destination of several statepoints. Every register + // defined by such statepoints must be spilled to the same stack slot. + // This map keeps that information. + DenseMap<const MachineBasicBlock *, SmallVector<RegSlotPair, 8>> + GlobalIndices; + + FrameIndexesPerSize &getCacheBucket(unsigned Size) { + // In FixupSCSExtendSlotSize mode the bucket with 0 index is used + // for all sizes. + return Cache[FixupSCSExtendSlotSize ? 0 : Size]; + } + public: FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI) : MFI(MFI), TRI(TRI) {} // Reset the current state of used frame indexes. After invocation of - // this function all frame indexes are available for allocation. - void reset() { + // this function all frame indexes are available for allocation with + // the exception of slots reserved for landing pad processing (if any). + void reset(const MachineBasicBlock *EHPad) { for (auto &It : Cache) It.second.Index = 0; + + ReservedSlots.clear(); + if (EHPad && GlobalIndices.count(EHPad)) + for (auto &RSP : GlobalIndices[EHPad]) + ReservedSlots.insert(RSP.second); } + // Get frame index to spill the register. - int getFrameIndex(Register Reg) { + int getFrameIndex(Register Reg, MachineBasicBlock *EHPad) { + // Check if slot for Reg is already reserved at EHPad. + auto It = GlobalIndices.find(EHPad); + if (It != GlobalIndices.end()) { + auto &Vec = It->second; + auto Idx = llvm::find_if( + Vec, [Reg](RegSlotPair &RSP) { return Reg == RSP.first; }); + if (Idx != Vec.end()) { + int FI = Idx->second; + LLVM_DEBUG(dbgs() << "Found global FI " << FI << " for register " + << printReg(Reg, &TRI) << " at " + << printMBBReference(*EHPad) << "\n"); + assert(ReservedSlots.count(FI) && "using unreserved slot"); + return FI; + } + } + unsigned Size = getRegisterSize(TRI, Reg); - // In FixupSCSExtendSlotSize mode the bucket with 0 index is used - // for all sizes. - unsigned Bucket = FixupSCSExtendSlotSize ? 0 : Size; - FrameIndexesPerSize &Line = Cache[Bucket]; - if (Line.Index < Line.Slots.size()) { + FrameIndexesPerSize &Line = getCacheBucket(Size); + while (Line.Index < Line.Slots.size()) { int FI = Line.Slots[Line.Index++]; + if (ReservedSlots.count(FI)) + continue; // If all sizes are kept together we probably need to extend the // spill slot size. if (MFI.getObjectSize(FI) < Size) { @@ -136,15 +283,25 @@ public: NumSpillSlotsAllocated++; Line.Slots.push_back(FI); ++Line.Index; + + // Remember assignment {Reg, FI} for EHPad + if (EHPad) { + GlobalIndices[EHPad].push_back(std::make_pair(Reg, FI)); + LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling reg " + << printReg(Reg, &TRI) << " at landing pad " + << printMBBReference(*EHPad) << "\n"); + } + return FI; } + // Sort all registers to spill in descendent order. In the // FixupSCSExtendSlotSize mode it will minimize the total frame size. // In non FixupSCSExtendSlotSize mode we can skip this step. void sortRegisters(SmallVectorImpl<Register> &Regs) { if (!FixupSCSExtendSlotSize) return; - llvm::sort(Regs.begin(), Regs.end(), [&](Register &A, Register &B) { + llvm::sort(Regs, [&](Register &A, Register &B) { return getRegisterSize(TRI, A) > getRegisterSize(TRI, B); }); } @@ -156,6 +313,8 @@ private: // statepoint instruction. MachineInstr &MI; MachineFunction &MF; + // If non-null then statepoint is invoke, and this points to the landing pad. + MachineBasicBlock *EHPad; const TargetRegisterInfo &TRI; const TargetInstrInfo &TII; MachineFrameInfo &MFI; @@ -163,36 +322,77 @@ private: const uint32_t *Mask; // Cache of frame indexes used on previous instruction processing. FrameIndexesCache &CacheFI; + bool AllowGCPtrInCSR; // Operands with physical registers requiring spilling. SmallVector<unsigned, 8> OpsToSpill; // Set of register to spill. SmallVector<Register, 8> RegsToSpill; + // Set of registers to reload after statepoint. + SmallVector<Register, 8> RegsToReload; // Map Register to Frame Slot index. DenseMap<Register, int> RegToSlotIdx; public: StatepointState(MachineInstr &MI, const uint32_t *Mask, - FrameIndexesCache &CacheFI) + FrameIndexesCache &CacheFI, bool AllowGCPtrInCSR) : MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()), TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()), - Mask(Mask), CacheFI(CacheFI) {} + Mask(Mask), CacheFI(CacheFI), AllowGCPtrInCSR(AllowGCPtrInCSR) { + + // Find statepoint's landing pad, if any. + EHPad = nullptr; + MachineBasicBlock *MBB = MI.getParent(); + // Invoke statepoint must be last one in block. + bool Last = std::none_of(++MI.getIterator(), MBB->end().getInstrIterator(), + [](MachineInstr &I) { + return I.getOpcode() == TargetOpcode::STATEPOINT; + }); + + if (!Last) + return; + + auto IsEHPad = [](MachineBasicBlock *B) { return B->isEHPad(); }; + + assert(llvm::count_if(MBB->successors(), IsEHPad) < 2 && "multiple EHPads"); + + auto It = llvm::find_if(MBB->successors(), IsEHPad); + if (It != MBB->succ_end()) + EHPad = *It; + } + + MachineBasicBlock *getEHPad() const { return EHPad; } + // Return true if register is callee saved. bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; } + // Iterates over statepoint meta args to find caller saver registers. // Also cache the size of found registers. // Returns true if caller save registers found. bool findRegistersToSpill() { + SmallSet<Register, 8> GCRegs; + // All GC pointer operands assigned to registers produce new value. + // Since they're tied to their defs, it is enough to collect def registers. + for (const auto &Def : MI.defs()) + GCRegs.insert(Def.getReg()); + SmallSet<Register, 8> VisitedRegs; for (unsigned Idx = StatepointOpers(&MI).getVarIdx(), EndIdx = MI.getNumOperands(); Idx < EndIdx; ++Idx) { MachineOperand &MO = MI.getOperand(Idx); - if (!MO.isReg() || MO.isImplicit()) + // Leave `undef` operands as is, StackMaps will rewrite them + // into a constant. + if (!MO.isReg() || MO.isImplicit() || MO.isUndef()) continue; Register Reg = MO.getReg(); assert(Reg.isPhysical() && "Only physical regs are expected"); - if (isCalleeSaved(Reg)) + + if (isCalleeSaved(Reg) && (AllowGCPtrInCSR || !is_contained(GCRegs, Reg))) continue; + + LLVM_DEBUG(dbgs() << "Will spill " << printReg(Reg, &TRI) << " at index " + << Idx << "\n"); + if (VisitedRegs.insert(Reg).second) RegsToSpill.push_back(Reg); OpsToSpill.push_back(Idx); @@ -200,30 +400,109 @@ public: CacheFI.sortRegisters(RegsToSpill); return !RegsToSpill.empty(); } + // Spill all caller saved registers right before statepoint instruction. // Remember frame index where register is spilled. void spillRegisters() { for (Register Reg : RegsToSpill) { - int FI = CacheFI.getFrameIndex(Reg); + int FI = CacheFI.getFrameIndex(Reg, EHPad); const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*MI.getParent(), MI, Reg, true /*is_Kill*/, FI, - RC, &TRI); + NumSpilledRegisters++; RegToSlotIdx[Reg] = FI; + + LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, &TRI) << " to FI " << FI + << "\n"); + + // Perform trivial copy propagation + bool IsKill = true; + MachineBasicBlock::iterator InsertBefore(MI); + Reg = performCopyPropagation(Reg, InsertBefore, IsKill, TII, TRI); + + LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore); + TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI, + RC, &TRI); + } + } + + void insertReloadBefore(unsigned Reg, MachineBasicBlock::iterator It, + MachineBasicBlock *MBB) { + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + int FI = RegToSlotIdx[Reg]; + if (It != MBB->end()) { + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI); + return; } + + // To insert reload at the end of MBB, insert it before last instruction + // and then swap them. + assert(!MBB->empty() && "Empty block"); + --It; + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI); + MachineInstr *Reload = It->getPrevNode(); + int Dummy = 0; + (void)Dummy; + assert(TII.isLoadFromStackSlot(*Reload, Dummy) == Reg); + assert(Dummy == FI); + MBB->remove(Reload); + MBB->insertAfter(It, Reload); } + + // Insert reloads of (relocated) registers spilled in statepoint. + void insertReloads(MachineInstr *NewStatepoint, RegReloadCache &RC) { + MachineBasicBlock *MBB = NewStatepoint->getParent(); + auto InsertPoint = std::next(NewStatepoint->getIterator()); + + for (auto Reg : RegsToReload) { + insertReloadBefore(Reg, InsertPoint, MBB); + LLVM_DEBUG(dbgs() << "Reloading " << printReg(Reg, &TRI) << " from FI " + << RegToSlotIdx[Reg] << " after statepoint\n"); + + if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) { + RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad); + auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin()); + insertReloadBefore(Reg, EHPadInsertPoint, EHPad); + LLVM_DEBUG(dbgs() << "...also reload at EHPad " + << printMBBReference(*EHPad) << "\n"); + } + } + } + // Re-write statepoint machine instruction to replace caller saved operands // with indirect memory location (frame index). - void rewriteStatepoint() { + MachineInstr *rewriteStatepoint() { MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true); MachineInstrBuilder MIB(MF, NewMI); + unsigned NumOps = MI.getNumOperands(); + + // New indices for the remaining defs. + SmallVector<unsigned, 8> NewIndices; + unsigned NumDefs = MI.getNumDefs(); + for (unsigned I = 0; I < NumDefs; ++I) { + MachineOperand &DefMO = MI.getOperand(I); + assert(DefMO.isReg() && DefMO.isDef() && "Expected Reg Def operand"); + Register Reg = DefMO.getReg(); + if (!AllowGCPtrInCSR) { + assert(is_contained(RegsToSpill, Reg)); + RegsToReload.push_back(Reg); + } else { + if (isCalleeSaved(Reg)) { + NewIndices.push_back(NewMI->getNumOperands()); + MIB.addReg(Reg, RegState::Define); + } else { + NewIndices.push_back(NumOps); + RegsToReload.push_back(Reg); + } + } + } + // Add End marker. OpsToSpill.push_back(MI.getNumOperands()); unsigned CurOpIdx = 0; - for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + for (unsigned I = NumDefs; I < MI.getNumOperands(); ++I) { MachineOperand &MO = MI.getOperand(I); if (I == OpsToSpill[CurOpIdx]) { int FI = RegToSlotIdx[MO.getReg()]; @@ -234,23 +513,38 @@ public: MIB.addFrameIndex(FI); MIB.addImm(0); ++CurOpIdx; - } else + } else { MIB.add(MO); + unsigned OldDef; + if (AllowGCPtrInCSR && MI.isRegTiedToDefOperand(I, &OldDef)) { + assert(OldDef < NumDefs); + assert(NewIndices[OldDef] < NumOps); + MIB->tieOperands(NewIndices[OldDef], MIB->getNumOperands() - 1); + } + } } assert(CurOpIdx == (OpsToSpill.size() - 1) && "Not all operands processed"); // Add mem operands. NewMI->setMemRefs(MF, MI.memoperands()); for (auto It : RegToSlotIdx) { + Register R = It.first; int FrameIndex = It.second; auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); - auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - getRegisterSize(TRI, It.first), - MFI.getObjectAlign(FrameIndex)); + MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad; + if (is_contained(RegsToReload, R)) + Flags |= MachineMemOperand::MOStore; + auto *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, getRegisterSize(TRI, R), + MFI.getObjectAlign(FrameIndex)); NewMI->addMemOperand(MF, MMO); } + // Insert new statepoint and erase old one. MI.getParent()->insert(MI, NewMI); + + LLVM_DEBUG(dbgs() << "rewritten statepoint to : " << *NewMI << "\n"); MI.eraseFromParent(); + return NewMI; } }; @@ -259,28 +553,33 @@ private: MachineFunction &MF; const TargetRegisterInfo &TRI; FrameIndexesCache CacheFI; + RegReloadCache ReloadCache; public: StatepointProcessor(MachineFunction &MF) : MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()), CacheFI(MF.getFrameInfo(), TRI) {} - bool process(MachineInstr &MI) { + bool process(MachineInstr &MI, bool AllowGCPtrInCSR) { StatepointOpers SO(&MI); uint64_t Flags = SO.getFlags(); // Do nothing for LiveIn, it supports all registers. if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn) return false; + LLVM_DEBUG(dbgs() << "\nMBB " << MI.getParent()->getNumber() << " " + << MI.getParent()->getName() << " : process statepoint " + << MI); CallingConv::ID CC = SO.getCallingConv(); const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC); - CacheFI.reset(); - StatepointState SS(MI, Mask, CacheFI); + StatepointState SS(MI, Mask, CacheFI, AllowGCPtrInCSR); + CacheFI.reset(SS.getEHPad()); if (!SS.findRegistersToSpill()) return false; SS.spillRegisters(); - SS.rewriteStatepoint(); + auto *NewStatepoint = SS.rewriteStatepoint(); + SS.insertReloads(NewStatepoint, ReloadCache); return true; } }; @@ -305,7 +604,14 @@ bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; StatepointProcessor SPP(MF); - for (MachineInstr *I : Statepoints) - Changed |= SPP.process(*I); + unsigned NumStatepoints = 0; + bool AllowGCPtrInCSR = PassGCPtrInCSR; + for (MachineInstr *I : Statepoints) { + ++NumStatepoints; + if (MaxStatepointsWithRegs.getNumOccurrences() && + NumStatepoints >= MaxStatepointsWithRegs) + AllowGCPtrInCSR = false; + Changed |= SPP.process(*I, AllowGCPtrInCSR); + } return Changed; } diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp index c6730aa6b00d..e2ee0c97f94d 100644 --- a/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/llvm/lib/CodeGen/GCRootLowering.cpp @@ -296,7 +296,10 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { } else { Register FrameReg; // FIXME: surely GCRoot ought to store the // register that the offset is from? - RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg); + auto FrameOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg); + assert(!FrameOffset.getScalable() && + "Frame offsets with a scalable component are not supported"); + RI->StackOffset = FrameOffset.getFixed(); ++RI; } } diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index c4d8777615d2..2fa208fbfaaf 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -59,6 +59,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_TRUNC: case TargetOpcode::G_PTR_ADD: + case TargetOpcode::G_EXTRACT: return true; } return false; @@ -366,23 +367,30 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const { return *this; } +const GISelInstProfileBuilder & +GISelInstProfileBuilder::addNodeIDReg(Register Reg) const { + LLT Ty = MRI.getType(Reg); + if (Ty.isValid()) + addNodeIDRegType(Ty); + + if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) { + if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>()) + addNodeIDRegType(RB); + else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>()) + addNodeIDRegType(RC); + } + return *this; +} + const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( const MachineOperand &MO) const { if (MO.isReg()) { Register Reg = MO.getReg(); if (!MO.isDef()) addNodeIDRegNum(Reg); - LLT Ty = MRI.getType(Reg); - if (Ty.isValid()) - addNodeIDRegType(Ty); - - if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) { - if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>()) - addNodeIDRegType(RB); - else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>()) - addNodeIDRegType(RC); - } + // Profile the register properties. + addNodeIDReg(Reg); assert(!MO.isImplicit() && "Unhandled case"); } else if (MO.isImm()) ID.AddInteger(MO.getImm()); diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 88173dc4d302..2c86f06a602d 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/IR/DebugInfoMetadata.h" using namespace llvm; @@ -41,8 +42,14 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID, if (MI) { CSEInfo->countOpcodeHit(MI->getOpcode()); auto CurrPos = getInsertPt(); - if (!dominates(MI, CurrPos)) + auto MII = MachineBasicBlock::iterator(MI); + if (MII == CurrPos) { + // Move the insert point ahead of the instruction so any future uses of + // this builder will have the def ready. + setInsertPt(*CurMBB, std::next(MII)); + } else if (!dominates(MI, CurrPos)) { CurMBB->splice(CurrPos, CurMBB, MI); + } return MachineInstrBuilder(getMF(), MI); } return MachineInstrBuilder(); @@ -61,6 +68,11 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op, case DstOp::DstType::Ty_RC: B.addNodeIDRegType(Op.getRegClass()); break; + case DstOp::DstType::Ty_Reg: { + // Regs can have LLT&(RB|RC). If those exist, profile them as well. + B.addNodeIDReg(Op.getReg()); + break; + } default: B.addNodeIDRegType(Op.getLLTTy(*getMRI())); break; @@ -70,6 +82,9 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op, void CSEMIRBuilder::profileSrcOp(const SrcOp &Op, GISelInstProfileBuilder &B) const { switch (Op.getSrcOpKind()) { + case SrcOp::SrcType::Ty_Imm: + B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm())); + break; case SrcOp::SrcType::Ty_Predicate: B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate())); break; @@ -115,7 +130,7 @@ bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) { if (DstOps.size() == 1) return true; // always possible to emit copy to just 1 vreg. - return std::all_of(DstOps.begin(), DstOps.end(), [](const DstOp &Op) { + return llvm::all_of(DstOps, [](const DstOp &Op) { DstOp::DstType DT = Op.getDstOpKind(); return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC; }); @@ -131,6 +146,21 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps, if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg) return buildCopy(Op.getReg(), MIB.getReg(0)); } + + // If we didn't generate a copy then we're re-using an existing node directly + // instead of emitting any code. Merge the debug location we wanted to emit + // into the instruction we're CSE'ing with. Debug locations arent part of the + // profile so we don't need to recompute it. + if (getDebugLoc()) { + GISelChangeObserver *Observer = getState().Observer; + if (Observer) + Observer->changingInstr(*MIB); + MIB->setDebugLoc( + DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc())); + if (Observer) + Observer->changedInstr(*MIB); + } + return MIB; } diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index a7146515c4c9..803e1527a4f0 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -30,6 +30,51 @@ using namespace llvm; void CallLowering::anchor() {} +/// Helper function which updates \p Flags when \p AttrFn returns true. +static void +addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, + const std::function<bool(Attribute::AttrKind)> &AttrFn) { + if (AttrFn(Attribute::SExt)) + Flags.setSExt(); + if (AttrFn(Attribute::ZExt)) + Flags.setZExt(); + if (AttrFn(Attribute::InReg)) + Flags.setInReg(); + if (AttrFn(Attribute::StructRet)) + Flags.setSRet(); + if (AttrFn(Attribute::Nest)) + Flags.setNest(); + if (AttrFn(Attribute::ByVal)) + Flags.setByVal(); + if (AttrFn(Attribute::Preallocated)) + Flags.setPreallocated(); + if (AttrFn(Attribute::InAlloca)) + Flags.setInAlloca(); + if (AttrFn(Attribute::Returned)) + Flags.setReturned(); + if (AttrFn(Attribute::SwiftSelf)) + Flags.setSwiftSelf(); + if (AttrFn(Attribute::SwiftError)) + Flags.setSwiftError(); +} + +ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, + unsigned ArgIdx) const { + ISD::ArgFlagsTy Flags; + addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) { + return Call.paramHasAttr(ArgIdx, Attr); + }); + return Flags; +} + +void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, + const AttributeList &Attrs, + unsigned OpIdx) const { + addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) { + return Attrs.hasAttribute(OpIdx, Attr); + }); +} + bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, ArrayRef<Register> ResRegs, ArrayRef<ArrayRef<Register>> ArgRegs, @@ -37,6 +82,29 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, std::function<unsigned()> GetCalleeReg) const { CallLoweringInfo Info; const DataLayout &DL = MIRBuilder.getDataLayout(); + MachineFunction &MF = MIRBuilder.getMF(); + bool CanBeTailCalled = CB.isTailCall() && + isInTailCallPosition(CB, MF.getTarget()) && + (MF.getFunction() + .getFnAttribute("disable-tail-calls") + .getValueAsString() != "true"); + + CallingConv::ID CallConv = CB.getCallingConv(); + Type *RetTy = CB.getType(); + bool IsVarArg = CB.getFunctionType()->isVarArg(); + + SmallVector<BaseArgInfo, 4> SplitArgs; + getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL); + Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg); + + if (!Info.CanLowerReturn) { + // Callee requires sret demotion. + insertSRetOutgoingArgument(MIRBuilder, CB, Info); + + // The sret demotion isn't compatible with tail-calls, since the sret + // argument points into the caller's stack frame. + CanBeTailCalled = false; + } // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that @@ -44,9 +112,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, unsigned i = 0; unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); for (auto &Arg : CB.args()) { - ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, + ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i), i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); + + // If we have an explicit sret argument that is an Instruction, (i.e., it + // might point to function-local memory), we can't meaningfully tail-call. + if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg)) + CanBeTailCalled = false; + Info.OrigArgs.push_back(OrigArg); ++i; } @@ -59,21 +133,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, else Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - Info.OrigRet = ArgInfo{ResRegs, CB.getType(), ISD::ArgFlagsTy{}}; + Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}}; if (!Info.OrigRet.Ty->isVoidTy()) setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); - MachineFunction &MF = MIRBuilder.getMF(); Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); - Info.CallConv = CB.getCallingConv(); + Info.CallConv = CallConv; Info.SwiftErrorVReg = SwiftErrorVReg; Info.IsMustTailCall = CB.isMustTailCall(); - Info.IsTailCall = - CB.isTailCall() && isInTailCallPosition(CB, MF.getTarget()) && - (MF.getFunction() - .getFnAttribute("disable-tail-calls") - .getValueAsString() != "true"); - Info.IsVarArg = CB.getFunctionType()->isVarArg(); + Info.IsTailCall = CanBeTailCalled; + Info.IsVarArg = IsVarArg; return lowerCall(MIRBuilder, Info); } @@ -83,24 +152,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const FuncInfoTy &FuncInfo) const { auto &Flags = Arg.Flags[0]; const AttributeList &Attrs = FuncInfo.getAttributes(); - if (Attrs.hasAttribute(OpIdx, Attribute::ZExt)) - Flags.setZExt(); - if (Attrs.hasAttribute(OpIdx, Attribute::SExt)) - Flags.setSExt(); - if (Attrs.hasAttribute(OpIdx, Attribute::InReg)) - Flags.setInReg(); - if (Attrs.hasAttribute(OpIdx, Attribute::StructRet)) - Flags.setSRet(); - if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf)) - Flags.setSwiftSelf(); - if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError)) - Flags.setSwiftError(); - if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) - Flags.setByVal(); - if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated)) - Flags.setPreallocated(); - if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) - Flags.setInAlloca(); + addArgFlagsFromAttributes(Flags, Attrs, OpIdx); if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType(); @@ -117,8 +169,6 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); Flags.setByValAlign(FrameAlign); } - if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) - Flags.setNest(); Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); } @@ -195,98 +245,96 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { EVT CurVT = EVT::getEVT(Args[i].Ty); - if (!CurVT.isSimple() || - Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(), - CCValAssign::Full, Args[i], Args[i].Flags[0], - CCInfo)) { - MVT NewVT = TLI->getRegisterTypeForCallingConv( - F.getContext(), F.getCallingConv(), EVT(CurVT)); - - // If we need to split the type over multiple regs, check it's a scenario - // we currently support. - unsigned NumParts = TLI->getNumRegistersForCallingConv( - F.getContext(), F.getCallingConv(), CurVT); - if (NumParts > 1) { - // For now only handle exact splits. - if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) - return false; - } + if (CurVT.isSimple() && + !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(), + CCValAssign::Full, Args[i], Args[i].Flags[0], + CCInfo)) + continue; + + MVT NewVT = TLI->getRegisterTypeForCallingConv( + F.getContext(), F.getCallingConv(), EVT(CurVT)); + + // If we need to split the type over multiple regs, check it's a scenario + // we currently support. + unsigned NumParts = TLI->getNumRegistersForCallingConv( + F.getContext(), F.getCallingConv(), CurVT); + + if (NumParts == 1) { + // Try to use the register type if we couldn't assign the VT. + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) + return false; + continue; + } - // For incoming arguments (physregs to vregs), we could have values in - // physregs (or memlocs) which we want to extract and copy to vregs. - // During this, we might have to deal with the LLT being split across - // multiple regs, so we have to record this information for later. - // - // If we have outgoing args, then we have the opposite case. We have a - // vreg with an LLT which we want to assign to a physical location, and - // we might have to record that the value has to be split later. - if (Handler.isIncomingArgumentHandler()) { - if (NumParts == 1) { - // Try to use the register type if we couldn't assign the VT. - if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], - Args[i].Flags[0], CCInfo)) - return false; + assert(NumParts > 1); + // For now only handle exact splits. + if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) + return false; + + // For incoming arguments (physregs to vregs), we could have values in + // physregs (or memlocs) which we want to extract and copy to vregs. + // During this, we might have to deal with the LLT being split across + // multiple regs, so we have to record this information for later. + // + // If we have outgoing args, then we have the opposite case. We have a + // vreg with an LLT which we want to assign to a physical location, and + // we might have to record that the value has to be split later. + if (Handler.isIncomingArgumentHandler()) { + // We're handling an incoming arg which is split over multiple regs. + // E.g. passing an s128 on AArch64. + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + Args[i].OrigRegs.push_back(Args[i].Regs[0]); + Args[i].Regs.clear(); + Args[i].Flags.clear(); + LLT NewLLT = getLLTForMVT(NewVT); + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) { + Register Reg = + MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); + ISD::ArgFlagsTy Flags = OrigFlags; + if (Part == 0) { + Flags.setSplit(); } else { - // We're handling an incoming arg which is split over multiple regs. - // E.g. passing an s128 on AArch64. - ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; - Args[i].OrigRegs.push_back(Args[i].Regs[0]); - Args[i].Regs.clear(); - Args[i].Flags.clear(); - LLT NewLLT = getLLTForMVT(NewVT); - // For each split register, create and assign a vreg that will store - // the incoming component of the larger value. These will later be - // merged to form the final vreg. - for (unsigned Part = 0; Part < NumParts; ++Part) { - Register Reg = - MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); - ISD::ArgFlagsTy Flags = OrigFlags; - if (Part == 0) { - Flags.setSplit(); - } else { - Flags.setOrigAlign(Align(1)); - if (Part == NumParts - 1) - Flags.setSplitEnd(); - } - Args[i].Regs.push_back(Reg); - Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full, - Args[i], Args[i].Flags[Part], CCInfo)) { - // Still couldn't assign this smaller part type for some reason. - return false; - } - } + Flags.setOrigAlign(Align(1)); + if (Part == NumParts - 1) + Flags.setSplitEnd(); } - } else { - // Handling an outgoing arg that might need to be split. - if (NumParts < 2) - return false; // Don't know how to deal with this type combination. - - // This type is passed via multiple registers in the calling convention. - // We need to extract the individual parts. - Register LargeReg = Args[i].Regs[0]; - LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); - auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); - assert(Unmerge->getNumOperands() == NumParts + 1); - ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; - // We're going to replace the regs and flags with the split ones. - Args[i].Regs.clear(); - Args[i].Flags.clear(); - for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { - ISD::ArgFlagsTy Flags = OrigFlags; - if (PartIdx == 0) { - Flags.setSplit(); - } else { - Flags.setOrigAlign(Align(1)); - if (PartIdx == NumParts - 1) - Flags.setSplitEnd(); - } - Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); - Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full, - Args[i], Args[i].Flags[PartIdx], CCInfo)) - return false; + Args[i].Regs.push_back(Reg); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[Part], CCInfo)) { + // Still couldn't assign this smaller part type for some reason. + return false; + } + } + } else { + // This type is passed via multiple registers in the calling convention. + // We need to extract the individual parts. + Register LargeReg = Args[i].Regs[0]; + LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); + auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); + assert(Unmerge->getNumOperands() == NumParts + 1); + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + // We're going to replace the regs and flags with the split ones. + Args[i].Regs.clear(); + Args[i].Flags.clear(); + for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { + ISD::ArgFlagsTy Flags = OrigFlags; + if (PartIdx == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align(1)); + if (PartIdx == NumParts - 1) + Flags.setSplitEnd(); } + Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[PartIdx], CCInfo)) + return false; } } } @@ -313,83 +361,239 @@ bool CallLowering::handleAssignments(CCState &CCInfo, EVT VAVT = VA.getValVT(); const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); - if (VA.isRegLoc()) { - if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) { - if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) { - // Expected to be multiple regs for a single incoming arg. - unsigned NumArgRegs = Args[i].Regs.size(); - if (NumArgRegs < 2) - return false; - - assert((j + (NumArgRegs - 1)) < ArgLocs.size() && - "Too many regs for number of args"); - for (unsigned Part = 0; Part < NumArgRegs; ++Part) { - // There should be Regs.size() ArgLocs per argument. - VA = ArgLocs[j + Part]; - Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); - } - j += NumArgRegs - 1; - // Merge the split registers into the expected larger result vreg - // of the original call. - MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); - continue; - } - const LLT VATy(VAVT.getSimpleVT()); - Register NewReg = - MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); - Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); - // If it's a vector type, we either need to truncate the elements - // or do an unmerge to get the lower block of elements. - if (VATy.isVector() && - VATy.getNumElements() > OrigVT.getVectorNumElements()) { - // Just handle the case where the VA type is 2 * original type. - if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { - LLVM_DEBUG(dbgs() - << "Incoming promoted vector arg has too many elts"); - return false; - } - auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg}); - MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0)); - } else { - MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); + // Expected to be multiple regs for a single incoming arg. + // There should be Regs.size() ArgLocs per argument. + unsigned NumArgRegs = Args[i].Regs.size(); + + assert((j + (NumArgRegs - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + if (VA.isMemLoc()) { + // Don't currently support loading/storing a type that needs to be split + // to the stack. Should be easy, just not implemented yet. + if (NumArgRegs > 1) { + LLVM_DEBUG( + dbgs() + << "Load/store a split arg to/from the stack not implemented yet\n"); + return false; } - } else if (!Handler.isIncomingArgumentHandler()) { - assert((j + (Args[i].Regs.size() - 1)) < ArgLocs.size() && - "Too many regs for number of args"); - // This is an outgoing argument that might have been split. - for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) { - // There should be Regs.size() ArgLocs per argument. - VA = ArgLocs[j + Part]; - Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + + // FIXME: Use correct address space for pointer size + EVT LocVT = VA.getValVT(); + unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize() + : LocVT.getStoreSize(); + unsigned Offset = VA.getLocMemOffset(); + MachinePointerInfo MPO; + Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO); + Handler.assignValueToAddress(Args[i], StackAddr, + MemSize, MPO, VA); + continue; + } + + assert(VA.isRegLoc() && "custom loc should have been handled already"); + + // GlobalISel does not currently work for scalable vectors. + if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() || + !Handler.isIncomingArgumentHandler()) { + // This is an argument that might have been split. There should be + // Regs.size() ArgLocs per argument. + + // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge + // to the original register after handling all of the parts. + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + continue; + } + + // This ArgLoc covers multiple pieces, so we need to split it. + const LLT VATy(VAVT.getSimpleVT()); + Register NewReg = + MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); + Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); + // If it's a vector type, we either need to truncate the elements + // or do an unmerge to get the lower block of elements. + if (VATy.isVector() && + VATy.getNumElements() > OrigVT.getVectorNumElements()) { + // Just handle the case where the VA type is 2 * original type. + if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { + LLVM_DEBUG(dbgs() + << "Incoming promoted vector arg has too many elts"); + return false; } - j += Args[i].Regs.size() - 1; + auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg}); + MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0)); } else { - Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); + MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); } - } else if (VA.isMemLoc()) { - // Don't currently support loading/storing a type that needs to be split - // to the stack. Should be easy, just not implemented yet. - if (Args[i].Regs.size() > 1) { - LLVM_DEBUG( - dbgs() - << "Load/store a split arg to/from the stack not implemented yet"); - return false; + } + + // Now that all pieces have been handled, re-pack any arguments into any + // wider, original registers. + if (Handler.isIncomingArgumentHandler()) { + if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) { + assert(NumArgRegs >= 2); + + // Merge the split registers into the expected larger result vreg + // of the original call. + MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); } - MVT VT = MVT::getVT(Args[i].Ty); - unsigned Size = VT == MVT::iPTR ? DL.getPointerSize() - : alignTo(VT.getSizeInBits(), 8) / 8; - unsigned Offset = VA.getLocMemOffset(); - MachinePointerInfo MPO; - Register StackAddr = Handler.getStackAddress(Size, Offset, MPO); - Handler.assignValueToAddress(Args[i], StackAddr, Size, MPO, VA); - } else { - // FIXME: Support byvals and other weirdness - return false; } + + j += NumArgRegs - 1; } + return true; } +void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, + ArrayRef<Register> VRegs, Register DemoteReg, + int FI) const { + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const DataLayout &DL = MF.getDataLayout(); + + SmallVector<EVT, 4> SplitVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + + assert(VRegs.size() == SplitVTs.size()); + + unsigned NumValues = SplitVTs.size(); + Align BaseAlign = DL.getPrefTypeAlign(RetTy); + Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace()); + LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL); + + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); + + for (unsigned I = 0; I < NumValues; ++I) { + Register Addr; + MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MRI.getType(VRegs[I]).getSizeInBytes(), + commonAlignment(BaseAlign, Offsets[I])); + MIRBuilder.buildLoad(VRegs[I], Addr, *MMO); + } +} + +void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, + ArrayRef<Register> VRegs, + Register DemoteReg) const { + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const DataLayout &DL = MF.getDataLayout(); + + SmallVector<EVT, 4> SplitVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0); + + assert(VRegs.size() == SplitVTs.size()); + + unsigned NumValues = SplitVTs.size(); + Align BaseAlign = DL.getPrefTypeAlign(RetTy); + unsigned AS = DL.getAllocaAddrSpace(); + LLT OffsetLLTy = + getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL); + + MachinePointerInfo PtrInfo(AS); + + for (unsigned I = 0; I < NumValues; ++I) { + Register Addr; + MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MRI.getType(VRegs[I]).getSizeInBytes(), + commonAlignment(BaseAlign, Offsets[I])); + MIRBuilder.buildStore(VRegs[I], Addr, *MMO); + } +} + +void CallLowering::insertSRetIncomingArgument( + const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg, + MachineRegisterInfo &MRI, const DataLayout &DL) const { + unsigned AS = DL.getAllocaAddrSpace(); + DemoteReg = MRI.createGenericVirtualRegister( + LLT::pointer(AS, DL.getPointerSizeInBits(AS))); + + Type *PtrTy = PointerType::get(F.getReturnType(), AS); + + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs); + + // NOTE: Assume that a pointer won't get split into more than one VT. + assert(ValueVTs.size() == 1); + + ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext())); + setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F); + DemoteArg.Flags[0].setSRet(); + SplitArgs.insert(SplitArgs.begin(), DemoteArg); +} + +void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder, + const CallBase &CB, + CallLoweringInfo &Info) const { + const DataLayout &DL = MIRBuilder.getDataLayout(); + Type *RetTy = CB.getType(); + unsigned AS = DL.getAllocaAddrSpace(); + LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); + + int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject( + DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false); + + Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0); + ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS)); + setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB); + DemoteArg.Flags[0].setSRet(); + + Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg); + Info.DemoteStackIndex = FI; + Info.DemoteRegister = DemoteReg; +} + +bool CallLowering::checkReturn(CCState &CCInfo, + SmallVectorImpl<BaseArgInfo> &Outs, + CCAssignFn *Fn) const { + for (unsigned I = 0, E = Outs.size(); I < E; ++I) { + MVT VT = MVT::getVT(Outs[I].Ty); + if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo)) + return false; + } + return true; +} + +void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy, + AttributeList Attrs, + SmallVectorImpl<BaseArgInfo> &Outs, + const DataLayout &DL) const { + LLVMContext &Context = RetTy->getContext(); + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + + SmallVector<EVT, 4> SplitVTs; + ComputeValueVTs(*TLI, DL, RetTy, SplitVTs); + addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex); + + for (EVT VT : SplitVTs) { + unsigned NumParts = + TLI->getNumRegistersForCallingConv(Context, CallConv, VT); + MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT); + Type *PartTy = EVT(RegVT).getTypeForEVT(Context); + + for (unsigned I = 0; I < NumParts; ++I) { + Outs.emplace_back(PartTy, Flags); + } + } +} + +bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const { + const auto &F = MF.getFunction(); + Type *ReturnType = F.getReturnType(); + CallingConv::ID CallConv = F.getCallingConv(); + + SmallVector<BaseArgInfo, 4> SplitArgs; + getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs, + MF.getDataLayout()); + return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg()); +} + bool CallLowering::analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args, CCAssignFn &AssignFnFixed, @@ -407,6 +611,58 @@ bool CallLowering::analyzeArgInfo(CCState &CCState, return true; } +bool CallLowering::parametersInCSRMatch( + const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, + const SmallVectorImpl<CCValAssign> &OutLocs, + const SmallVectorImpl<ArgInfo> &OutArgs) const { + for (unsigned i = 0; i < OutLocs.size(); ++i) { + auto &ArgLoc = OutLocs[i]; + // If it's not a register, it's fine. + if (!ArgLoc.isRegLoc()) + continue; + + MCRegister PhysReg = ArgLoc.getLocReg(); + + // Only look at callee-saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg)) + continue; + + LLVM_DEBUG( + dbgs() + << "... Call has an argument passed in a callee-saved register.\n"); + + // Check if it was copied from. + const ArgInfo &OutInfo = OutArgs[i]; + + if (OutInfo.Regs.size() > 1) { + LLVM_DEBUG( + dbgs() << "... Cannot handle arguments in multiple registers.\n"); + return false; + } + + // Check if we copy the register, walking through copies from virtual + // registers. Note that getDefIgnoringCopies does not ignore copies from + // physical registers. + MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); + if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { + LLVM_DEBUG( + dbgs() + << "... Parameter was not copied into a VReg, cannot tail call.\n"); + return false; + } + + // Got a copy. Verify that it's the same as the register we want. + Register CopyRHS = RegDef->getOperand(1).getReg(); + if (CopyRHS != PhysReg) { + LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " + "VReg, cannot tail call.\n"); + return false; + } + } + + return true; +} + bool CallLowering::resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl<ArgInfo> &InArgs, diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index b4562a5c6601..f1071d96e5a3 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -130,8 +130,6 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, WrapperObserver.addObserver(CSEInfo); RAIIDelegateInstaller DelInstall(MF, &WrapperObserver); for (MachineBasicBlock *MBB : post_order(&MF)) { - if (MBB->empty()) - continue; for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) { MachineInstr *CurMI = &*MII; ++MII; @@ -155,5 +153,8 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, MFChanged |= Changed; } while (Changed); + assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) && + "CSEInfo is not consistent. Likely missing calls to " + "observer on mutations")); return MFChanged; } diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 194961ae3b21..df0219fcfa64 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -34,7 +35,6 @@ static cl::opt<bool> cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner")); - CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, GISelKnownBits *KB, MachineDominatorTree *MDT, @@ -44,6 +44,75 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, (void)this->KB; } +const TargetLowering &CombinerHelper::getTargetLowering() const { + return *Builder.getMF().getSubtarget().getTargetLowering(); +} + +/// \returns The little endian in-memory byte position of byte \p I in a +/// \p ByteWidth bytes wide type. +/// +/// E.g. Given a 4-byte type x, x[0] -> byte 0 +static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) { + assert(I < ByteWidth && "I must be in [0, ByteWidth)"); + return I; +} + +/// \returns The big endian in-memory byte position of byte \p I in a +/// \p ByteWidth bytes wide type. +/// +/// E.g. Given a 4-byte type x, x[0] -> byte 3 +static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { + assert(I < ByteWidth && "I must be in [0, ByteWidth)"); + return ByteWidth - I - 1; +} + +/// Given a map from byte offsets in memory to indices in a load/store, +/// determine if that map corresponds to a little or big endian byte pattern. +/// +/// \param MemOffset2Idx maps memory offsets to address offsets. +/// \param LowestIdx is the lowest index in \p MemOffset2Idx. +/// +/// \returns true if the map corresponds to a big endian byte pattern, false +/// if it corresponds to a little endian byte pattern, and None otherwise. +/// +/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns +/// are as follows: +/// +/// AddrOffset Little endian Big endian +/// 0 0 3 +/// 1 1 2 +/// 2 2 1 +/// 3 3 0 +static Optional<bool> +isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, + int64_t LowestIdx) { + // Need at least two byte positions to decide on endianness. + unsigned Width = MemOffset2Idx.size(); + if (Width < 2) + return None; + bool BigEndian = true, LittleEndian = true; + for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { + auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); + if (MemOffsetAndIdx == MemOffset2Idx.end()) + return None; + const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; + assert(Idx >= 0 && "Expected non-negative byte offset?"); + LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); + BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); + if (!BigEndian && !LittleEndian) + return None; + } + + assert((BigEndian != LittleEndian) && + "Pattern cannot be both big and little endian!"); + return BigEndian; +} + +bool CombinerHelper::isLegalOrBeforeLegalizer( + const LegalityQuery &Query) const { + return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; +} + void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const { Observer.changingAllUsesOfReg(MRI, FromReg); @@ -555,13 +624,13 @@ bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, assert(DefMI.getParent() == UseMI.getParent()); if (&DefMI == &UseMI) return false; - - // Loop through the basic block until we find one of the instructions. - MachineBasicBlock::const_iterator I = DefMI.getParent()->begin(); - for (; &*I != &DefMI && &*I != &UseMI; ++I) - return &*I == &DefMI; - - llvm_unreachable("Block must contain instructions"); + const MachineBasicBlock &MBB = *DefMI.getParent(); + auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) { + return &MI == &DefMI || &MI == &UseMI; + }); + if (DefOrUse == MBB.end()) + llvm_unreachable("Block must contain both DefMI and UseMI!"); + return &*DefOrUse == &DefMI; } bool CombinerHelper::dominates(const MachineInstr &DefMI, @@ -576,20 +645,97 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI, return isPredecessor(DefMI, UseMI); } -bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) { +bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); Register SrcReg = MI.getOperand(1).getReg(); - unsigned SrcSignBits = KB->computeNumSignBits(SrcReg); - unsigned NumSextBits = - MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() - - MI.getOperand(2).getImm(); - return SrcSignBits >= NumSextBits; + Register LoadUser = SrcReg; + + if (MRI.getType(SrcReg).isVector()) + return false; + + Register TruncSrc; + if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) + LoadUser = TruncSrc; + + uint64_t SizeInBits = MI.getOperand(2).getImm(); + // If the source is a G_SEXTLOAD from the same bit width, then we don't + // need any extend at all, just a truncate. + if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) { + const auto &MMO = **LoadMI->memoperands_begin(); + // If truncating more than the original extended value, abort. + if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits()) + return false; + if (MMO.getSizeInBits() == SizeInBits) + return true; + } + return false; } -bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) { +bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); - MachineIRBuilder MIB(MI); - MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + Builder.setInstrAndDebugLoc(MI); + Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchSextInRegOfLoad( + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + + // Only supports scalars for now. + if (MRI.getType(MI.getOperand(0).getReg()).isVector()) + return false; + + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); + if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) + return false; + + // If the sign extend extends from a narrower width than the load's width, + // then we can narrow the load width when we combine to a G_SEXTLOAD. + auto &MMO = **LoadDef->memoperands_begin(); + // Don't do this for non-simple loads. + if (MMO.isAtomic() || MMO.isVolatile()) + return false; + + // Avoid widening the load at all. + unsigned NewSizeBits = + std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); + + // Don't generate G_SEXTLOADs with a < 1 byte width. + if (NewSizeBits < 8) + return false; + // Don't bother creating a non-power-2 sextload, it will likely be broken up + // anyway for most targets. + if (!isPowerOf2_32(NewSizeBits)) + return false; + MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits); + return true; +} + +bool CombinerHelper::applySextInRegOfLoad( + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register LoadReg; + unsigned ScalarSizeBits; + std::tie(LoadReg, ScalarSizeBits) = MatchInfo; + auto *LoadDef = MRI.getVRegDef(LoadReg); + assert(LoadDef && "Expected a load reg"); + + // If we have the following: + // %ld = G_LOAD %ptr, (load 2) + // %ext = G_SEXT_INREG %ld, 8 + // ==> + // %ld = G_SEXTLOAD %ptr (load 1) + + auto &MMO = **LoadDef->memoperands_begin(); + Builder.setInstrAndDebugLoc(MI); + auto &MF = Builder.getMF(); + auto PtrInfo = MMO.getPointerInfo(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); + Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), + LoadDef->getOperand(1).getReg(), *NewMMO); MI.eraseFromParent(); return true; } @@ -611,7 +757,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, return false; LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); - + // FIXME: The following use traversal needs a bail out for patholigical cases. for (auto &Use : MRI.use_nodbg_instructions(Base)) { if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) continue; @@ -738,6 +884,11 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) return false; + // For now, no targets actually support these opcodes so don't waste time + // running these unless we're forced to for testing. + if (!ForceLegalIndexing) + return false; + MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, MatchInfo.Offset); if (!MatchInfo.IsPre && @@ -790,14 +941,12 @@ void CombinerHelper::applyCombineIndexedLoadStore( LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); } -bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { +bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::G_BR) return false; // Try to match the following: // bb1: - // %c(s32) = G_ICMP pred, %a, %b - // %c1(s1) = G_TRUNC %c(s32) // G_BRCOND %c1, %bb2 // G_BR %bb3 // bb2: @@ -807,7 +956,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { // The above pattern does not have a fall through to the successor bb2, always // resulting in a branch no matter which path is taken. Here we try to find // and replace that pattern with conditional branch to bb3 and otherwise - // fallthrough to bb2. + // fallthrough to bb2. This is generally better for branch predictors. MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::iterator BrIt(MI); @@ -822,40 +971,34 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { // Check that the next block is the conditional branch target. if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) return false; - - MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); - if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP || - !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg())) - return false; return true; } -bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) { - if (!matchElideBrByInvertingCond(MI)) - return false; - applyElideBrByInvertingCond(MI); - return true; -} - -void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) { +void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); MachineBasicBlock::iterator BrIt(MI); MachineInstr *BrCond = &*std::prev(BrIt); - MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); - CmpInst::Predicate InversePred = CmpInst::getInversePredicate( - (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate()); + Builder.setInstrAndDebugLoc(*BrCond); + LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); + // FIXME: Does int/fp matter for this? If so, we might need to restrict + // this to i1 only since we might not know for sure what kind of + // compare generated the condition value. + auto True = Builder.buildConstant( + Ty, getICmpTrueVal(getTargetLowering(), false, false)); + auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); - // Invert the G_ICMP condition. - Observer.changingInstr(*CmpMI); - CmpMI->getOperand(1).setPredicate(InversePred); - Observer.changedInstr(*CmpMI); + auto *FallthroughBB = BrCond->getOperand(1).getMBB(); + Observer.changingInstr(MI); + MI.getOperand(0).setMBB(FallthroughBB); + Observer.changedInstr(MI); - // Change the conditional branch target. + // Change the conditional branch to use the inverted condition and + // new target block. Observer.changingInstr(*BrCond); + BrCond->getOperand(0).setReg(Xor.getReg(0)); BrCond->getOperand(1).setMBB(BrTarget); Observer.changedInstr(*BrCond); - MI.eraseFromParent(); } static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { @@ -946,8 +1089,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { unsigned NumBits = Ty.getScalarSizeInBits(); auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); if (!Ty.isVector() && ValVRegAndVal) { - unsigned KnownVal = ValVRegAndVal->Value; - APInt Scalar = APInt(8, KnownVal); + APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); APInt SplatVal = APInt::getSplat(NumBits, Scalar); return MIB.buildConstant(Ty, SplatVal).getReg(0); } @@ -1299,13 +1441,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, } bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { + const unsigned Opc = MI.getOpcode(); // This combine is fairly complex so it's not written with a separate // matcher function. - assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); - Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID(); - assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || - ID == Intrinsic::memset) && - "Expected a memcpy like intrinsic"); + assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || + Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction"); auto MMOIt = MI.memoperands_begin(); const MachineMemOperand *MemOp = *MMOIt; @@ -1316,11 +1456,11 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { Align DstAlign = MemOp->getBaseAlign(); Align SrcAlign; - Register Dst = MI.getOperand(1).getReg(); - Register Src = MI.getOperand(2).getReg(); - Register Len = MI.getOperand(3).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Len = MI.getOperand(2).getReg(); - if (ID != Intrinsic::memset) { + if (Opc != TargetOpcode::G_MEMSET) { assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); MemOp = *(++MMOIt); SrcAlign = MemOp->getBaseAlign(); @@ -1330,7 +1470,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); if (!LenVRegAndVal) return false; // Leave it to the legalizer to lower it to a libcall. - unsigned KnownLen = LenVRegAndVal->Value; + unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); if (KnownLen == 0) { MI.eraseFromParent(); @@ -1340,15 +1480,78 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { if (MaxLen && KnownLen > MaxLen) return false; - if (ID == Intrinsic::memcpy) + if (Opc == TargetOpcode::G_MEMCPY) return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); - if (ID == Intrinsic::memmove) + if (Opc == TargetOpcode::G_MEMMOVE) return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); - if (ID == Intrinsic::memset) + if (Opc == TargetOpcode::G_MEMSET) return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); return false; } +static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, + const Register Op, + const MachineRegisterInfo &MRI) { + const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); + if (!MaybeCst) + return None; + + APFloat V = MaybeCst->getValueAPF(); + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_FNEG: { + V.changeSign(); + return V; + } + case TargetOpcode::G_FABS: { + V.clearSign(); + return V; + } + case TargetOpcode::G_FPTRUNC: + break; + case TargetOpcode::G_FSQRT: { + bool Unused; + V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); + V = APFloat(sqrt(V.convertToDouble())); + break; + } + case TargetOpcode::G_FLOG2: { + bool Unused; + V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); + V = APFloat(log2(V.convertToDouble())); + break; + } + } + // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, + // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, + // and `G_FLOG2` reach here. + bool Unused; + V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); + return V; +} + +bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, + Optional<APFloat> &Cst) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); + return Cst.hasValue(); +} + +bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, + Optional<APFloat> &Cst) { + assert(Cst.hasValue() && "Optional is unexpectedly empty!"); + Builder.setInstrAndDebugLoc(MI); + MachineFunction &MF = Builder.getMF(); + auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); + Register DstReg = MI.getOperand(0).getReg(); + Builder.buildFConstant(DstReg, *FPVal); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) { // We're trying to match the following pattern: @@ -1377,7 +1580,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, return false; // Pass the combined immediate to the apply function. - MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value; + MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); MatchInfo.Base = Base; return true; } @@ -1395,15 +1598,211 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, return true; } +bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, + RegisterImmPair &MatchInfo) { + // We're trying to match the following pattern with any of + // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: + // %t1 = SHIFT %base, G_CONSTANT imm1 + // %root = SHIFT %t1, G_CONSTANT imm2 + // --> + // %root = SHIFT %base, G_CONSTANT (imm1 + imm2) + + unsigned Opcode = MI.getOpcode(); + assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || + Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || + Opcode == TargetOpcode::G_USHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); + + Register Shl2 = MI.getOperand(1).getReg(); + Register Imm1 = MI.getOperand(2).getReg(); + auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); + if (!MaybeImmVal) + return false; + + MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2); + if (Shl2Def->getOpcode() != Opcode) + return false; + + Register Base = Shl2Def->getOperand(1).getReg(); + Register Imm2 = Shl2Def->getOperand(2).getReg(); + auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); + if (!MaybeImm2Val) + return false; + + // Pass the combined immediate to the apply function. + MatchInfo.Imm = + (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue(); + MatchInfo.Reg = Base; + + // There is no simple replacement for a saturating unsigned left shift that + // exceeds the scalar size. + if (Opcode == TargetOpcode::G_USHLSAT && + MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits()) + return false; + + return true; +} + +bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, + RegisterImmPair &MatchInfo) { + unsigned Opcode = MI.getOpcode(); + assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || + Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || + Opcode == TargetOpcode::G_USHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); + + Builder.setInstrAndDebugLoc(MI); + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); + auto Imm = MatchInfo.Imm; + + if (Imm >= ScalarSizeInBits) { + // Any logical shift that exceeds scalar size will produce zero. + if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { + Builder.buildConstant(MI.getOperand(0), 0); + MI.eraseFromParent(); + return true; + } + // Arithmetic shift and saturating signed left shift have no effect beyond + // scalar size. + Imm = ScalarSizeInBits - 1; + } + + LLT ImmTy = MRI.getType(MI.getOperand(2).getReg()); + Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(MatchInfo.Reg); + MI.getOperand(2).setReg(NewImm); + Observer.changedInstr(MI); + return true; +} + +bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo) { + // We're trying to match the following pattern with any of + // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination + // with any of G_AND/G_OR/G_XOR logic instructions. + // %t1 = SHIFT %X, G_CONSTANT C0 + // %t2 = LOGIC %t1, %Y + // %root = SHIFT %t2, G_CONSTANT C1 + // --> + // %t3 = SHIFT %X, G_CONSTANT (C0+C1) + // %t4 = SHIFT %Y, G_CONSTANT C1 + // %root = LOGIC %t3, %t4 + unsigned ShiftOpcode = MI.getOpcode(); + assert((ShiftOpcode == TargetOpcode::G_SHL || + ShiftOpcode == TargetOpcode::G_ASHR || + ShiftOpcode == TargetOpcode::G_LSHR || + ShiftOpcode == TargetOpcode::G_USHLSAT || + ShiftOpcode == TargetOpcode::G_SSHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); + + // Match a one-use bitwise logic op. + Register LogicDest = MI.getOperand(1).getReg(); + if (!MRI.hasOneNonDBGUse(LogicDest)) + return false; + + MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest); + unsigned LogicOpcode = LogicMI->getOpcode(); + if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR && + LogicOpcode != TargetOpcode::G_XOR) + return false; + + // Find a matching one-use shift by constant. + const Register C1 = MI.getOperand(2).getReg(); + auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); + if (!MaybeImmVal) + return false; + + const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); + + auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { + // Shift should match previous one and should be a one-use. + if (MI->getOpcode() != ShiftOpcode || + !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) + return false; + + // Must be a constant. + auto MaybeImmVal = + getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); + if (!MaybeImmVal) + return false; + + ShiftVal = MaybeImmVal->Value.getSExtValue(); + return true; + }; + + // Logic ops are commutative, so check each operand for a match. + Register LogicMIReg1 = LogicMI->getOperand(1).getReg(); + MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1); + Register LogicMIReg2 = LogicMI->getOperand(2).getReg(); + MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2); + uint64_t C0Val; + + if (matchFirstShift(LogicMIOp1, C0Val)) { + MatchInfo.LogicNonShiftReg = LogicMIReg2; + MatchInfo.Shift2 = LogicMIOp1; + } else if (matchFirstShift(LogicMIOp2, C0Val)) { + MatchInfo.LogicNonShiftReg = LogicMIReg1; + MatchInfo.Shift2 = LogicMIOp2; + } else + return false; + + MatchInfo.ValSum = C0Val + C1Val; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits()) + return false; + + MatchInfo.Logic = LogicMI; + return true; +} + +bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo) { + unsigned Opcode = MI.getOpcode(); + assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || + Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || + Opcode == TargetOpcode::G_SSHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); + + LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); + LLT DestType = MRI.getType(MI.getOperand(0).getReg()); + Builder.setInstrAndDebugLoc(MI); + + Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); + + Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg(); + Register Shift1 = + Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); + + Register Shift2Const = MI.getOperand(2).getReg(); + Register Shift2 = Builder + .buildInstr(Opcode, {DestType}, + {MatchInfo.LogicNonShiftReg, Shift2Const}) + .getReg(0); + + Register Dest = MI.getOperand(0).getReg(); + Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); + + // These were one use so it's safe to remove them. + MatchInfo.Shift2->eraseFromParent(); + MatchInfo.Logic->eraseFromParent(); + + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); auto MaybeImmVal = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); - if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value)) + if (!MaybeImmVal) return false; - ShiftVal = Log2_64(MaybeImmVal->Value); - return true; + + ShiftVal = MaybeImmVal->Value.exactLogBase2(); + return (static_cast<int32_t>(ShiftVal) != -1); } bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, @@ -1419,6 +1818,254 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, return true; } +// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source +bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, + RegisterImmPair &MatchData) { + assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); + + Register LHS = MI.getOperand(1).getReg(); + + Register ExtSrc; + if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) && + !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) && + !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) + return false; + + // TODO: Should handle vector splat. + Register RHS = MI.getOperand(2).getReg(); + auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI); + if (!MaybeShiftAmtVal) + return false; + + if (LI) { + LLT SrcTy = MRI.getType(ExtSrc); + + // We only really care about the legality with the shifted value. We can + // pick any type the constant shift amount, so ask the target what to + // use. Otherwise we would have to guess and hope it is reported as legal. + LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy); + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}})) + return false; + } + + int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); + MatchData.Reg = ExtSrc; + MatchData.Imm = ShiftAmt; + + unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); + return MinLeadingZeros >= ShiftAmt; +} + +bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, + const RegisterImmPair &MatchData) { + Register ExtSrcReg = MatchData.Reg; + int64_t ShiftAmtVal = MatchData.Imm; + + LLT ExtSrcTy = MRI.getType(ExtSrcReg); + Builder.setInstrAndDebugLoc(MI); + auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal); + auto NarrowShift = + Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); + Builder.buildZExt(MI.getOperand(0), NarrowShift); + MI.eraseFromParent(); + return true; +} + +static Register peekThroughBitcast(Register Reg, + const MachineRegisterInfo &MRI) { + while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) + ; + + return Reg; +} + +bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl<Register> &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + Register SrcReg = + peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); + + MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); + if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && + SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && + SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) + return false; + + // Check the source type of the merge. + LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); + LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); + if (SrcMergeTy != Dst0Ty && !SameSize) + return false; + // They are the same now (modulo a bitcast). + // We can collect all the src registers. + for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; + ++Idx) + Operands.push_back(SrcInstr->getOperand(Idx).getReg()); + return true; +} + +bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl<Register> &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + assert((MI.getNumOperands() - 1 == Operands.size()) && + "Not enough operands to replace all defs"); + unsigned NumElems = MI.getNumOperands() - 1; + + LLT SrcTy = MRI.getType(Operands[0]); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + bool CanReuseInputDirectly = DstTy == SrcTy; + Builder.setInstrAndDebugLoc(MI); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + Register SrcReg = Operands[Idx]; + if (CanReuseInputDirectly) + replaceRegWith(MRI, DstReg, SrcReg); + else + Builder.buildCast(DstReg, SrcReg); + } + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl<APInt> &Csts) { + unsigned SrcIdx = MI.getNumOperands() - 1; + Register SrcReg = MI.getOperand(SrcIdx).getReg(); + MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); + if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && + SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) + return false; + // Break down the big constant in smaller ones. + const MachineOperand &CstVal = SrcInstr->getOperand(1); + APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT + ? CstVal.getCImm()->getValue() + : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + + LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned ShiftAmt = Dst0Ty.getSizeInBits(); + // Unmerge a constant. + for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { + Csts.emplace_back(Val.trunc(ShiftAmt)); + Val = Val.lshr(ShiftAmt); + } + + return true; +} + +bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl<APInt> &Csts) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + assert((MI.getNumOperands() - 1 == Csts.size()) && + "Not enough operands to replace all defs"); + unsigned NumElems = MI.getNumOperands() - 1; + Builder.setInstrAndDebugLoc(MI); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + Builder.buildConstant(DstReg, Csts[Idx]); + } + + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + // Check that all the lanes are dead except the first one. + for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { + if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) + return false; + } + return true; +} + +bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { + Builder.setInstrAndDebugLoc(MI); + Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); + // Truncating a vector is going to truncate every single lane, + // whereas we want the full lowbits. + // Do the operation on a scalar instead. + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.isVector()) + SrcReg = + Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT Dst0Ty = MRI.getType(Dst0Reg); + if (Dst0Ty.isVector()) { + auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); + Builder.buildCast(Dst0Reg, MIB); + } else + Builder.buildTrunc(Dst0Reg, SrcReg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT Dst0Ty = MRI.getType(Dst0Reg); + // G_ZEXT on vector applies to each lane, so it will + // affect all destinations. Therefore we won't be able + // to simplify the unmerge to just the first definition. + if (Dst0Ty.isVector()) + return false; + Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.isVector()) + return false; + + Register ZExtSrcReg; + if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) + return false; + + // Finally we can replace the first definition with + // a zext of the source if the definition is big enough to hold + // all of ZExtSrc bits. + LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); + return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); +} + +bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + + Register Dst0Reg = MI.getOperand(0).getReg(); + + MachineInstr *ZExtInstr = + MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg()); + assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT && + "Expecting a G_ZEXT"); + + Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg(); + LLT Dst0Ty = MRI.getType(Dst0Reg); + LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); + + Builder.setInstrAndDebugLoc(MI); + + if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { + Builder.buildZExt(Dst0Reg, ZExtSrcReg); + } else { + assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() && + "ZExt src doesn't fit in destination"); + replaceRegWith(MRI, Dst0Reg, ZExtSrcReg); + } + + Register ZeroReg; + for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { + if (!ZeroReg) + ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0); + replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); + } + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) { @@ -1440,7 +2087,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, if (!MaybeImmVal) return false; - ShiftVal = MaybeImmVal->Value; + ShiftVal = MaybeImmVal->Value.getSExtValue(); return ShiftVal >= Size / 2 && ShiftVal < Size; } @@ -1529,6 +2176,296 @@ bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, return false; } +bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register SrcReg = MI.getOperand(1).getReg(); + return mi_match(SrcReg, MRI, + m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); +} + +bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstr(MI); + Builder.buildCopy(DstReg, Reg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); + Register SrcReg = MI.getOperand(1).getReg(); + return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); +} + +bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstr(MI); + Builder.buildZExtOrTrunc(DstReg, Reg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineAddP2IToPtrAdd( + MachineInstr &MI, std::pair<Register, bool> &PtrReg) { + assert(MI.getOpcode() == TargetOpcode::G_ADD); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT IntTy = MRI.getType(LHS); + + // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the + // instruction. + PtrReg.second = false; + for (Register SrcReg : {LHS, RHS}) { + if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) { + // Don't handle cases where the integer is implicitly converted to the + // pointer width. + LLT PtrTy = MRI.getType(PtrReg.first); + if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits()) + return true; + } + + PtrReg.second = true; + } + + return false; +} + +bool CombinerHelper::applyCombineAddP2IToPtrAdd( + MachineInstr &MI, std::pair<Register, bool> &PtrReg) { + Register Dst = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + const bool DoCommute = PtrReg.second; + if (DoCommute) + std::swap(LHS, RHS); + LHS = PtrReg.first; + + LLT PtrTy = MRI.getType(LHS); + + Builder.setInstrAndDebugLoc(MI); + auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); + Builder.buildPtrToInt(Dst, PtrAdd); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, + int64_t &NewCst) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); + + if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { + int64_t Cst; + if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { + NewCst = Cst + *RHSCst; + return true; + } + } + + return false; +} + +bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, + int64_t &NewCst) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); + Register Dst = MI.getOperand(0).getReg(); + + Builder.setInstrAndDebugLoc(MI); + Builder.buildConstant(Dst, NewCst); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + return mi_match(SrcReg, MRI, + m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); +} + +bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); + Register DstReg = MI.getOperand(0).getReg(); + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, Reg); + return true; +} + +bool CombinerHelper::matchCombineExtOfExt( + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || + MI.getOpcode() == TargetOpcode::G_SEXT || + MI.getOpcode() == TargetOpcode::G_ZEXT) && + "Expected a G_[ASZ]EXT"); + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + // Match exts with the same opcode, anyext([sz]ext) and sext(zext). + unsigned Opc = MI.getOpcode(); + unsigned SrcOpc = SrcMI->getOpcode(); + if (Opc == SrcOpc || + (Opc == TargetOpcode::G_ANYEXT && + (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) || + (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) { + MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); + return true; + } + return false; +} + +bool CombinerHelper::applyCombineExtOfExt( + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || + MI.getOpcode() == TargetOpcode::G_SEXT || + MI.getOpcode() == TargetOpcode::G_ZEXT) && + "Expected a G_[ASZ]EXT"); + + Register Reg = std::get<0>(MatchInfo); + unsigned SrcExtOp = std::get<1>(MatchInfo); + + // Combine exts with the same opcode. + if (MI.getOpcode() == SrcExtOp) { + Observer.changingInstr(MI); + MI.getOperand(1).setReg(Reg); + Observer.changedInstr(MI); + return true; + } + + // Combine: + // - anyext([sz]ext x) to [sz]ext x + // - sext(zext x) to zext x + if (MI.getOpcode() == TargetOpcode::G_ANYEXT || + (MI.getOpcode() == TargetOpcode::G_SEXT && + SrcExtOp == TargetOpcode::G_ZEXT)) { + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstrAndDebugLoc(MI); + Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + + Builder.setInstrAndDebugLoc(MI); + Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, + MI.getFlags()); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); + Register SrcReg = MI.getOperand(1).getReg(); + return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); +} + +bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { + assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); + Src = MI.getOperand(1).getReg(); + Register AbsSrc; + return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); +} + +bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { + assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); + Register Dst = MI.getOperand(0).getReg(); + MI.eraseFromParent(); + replaceRegWith(MRI, Dst, Src); + return true; +} + +bool CombinerHelper::matchCombineTruncOfExt( + MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + unsigned SrcOpc = SrcMI->getOpcode(); + if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT || + SrcOpc == TargetOpcode::G_ZEXT) { + MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc); + return true; + } + return false; +} + +bool CombinerHelper::applyCombineTruncOfExt( + MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register SrcReg = MatchInfo.first; + unsigned SrcExtOp = MatchInfo.second; + Register DstReg = MI.getOperand(0).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI.getType(DstReg); + if (SrcTy == DstTy) { + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, SrcReg); + return true; + } + Builder.setInstrAndDebugLoc(MI); + if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) + Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); + else + Builder.buildTrunc(DstReg, SrcReg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineTruncOfShl( + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register ShiftSrc; + Register ShiftAmt; + + if (MRI.hasOneNonDBGUse(SrcReg) && + mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && + isLegalOrBeforeLegalizer( + {TargetOpcode::G_SHL, + {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { + KnownBits Known = KB->getKnownBits(ShiftAmt); + unsigned Size = DstTy.getSizeInBits(); + if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); + return true; + } + } + return false; +} + +bool CombinerHelper::applyCombineTruncOfShl( + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + + Register ShiftSrc = MatchInfo.first; + Register ShiftAmt = MatchInfo.second; + Builder.setInstrAndDebugLoc(MI); + auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); + Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return MO.isReg() && @@ -1555,6 +2492,22 @@ bool CombinerHelper::matchUndefStore(MachineInstr &MI) { MRI); } +bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), + MRI); +} + +bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + if (auto MaybeCstCmp = + getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { + OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; + return true; + } + return false; +} + bool CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); return true; @@ -1651,6 +2604,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, return true; } +bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, + Register Replacement) { + assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); + Register OldReg = MI.getOperand(0).getReg(); + assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); + MI.eraseFromParent(); + replaceRegWith(MRI, OldReg, Replacement); + return true; +} + bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SELECT); // Match (cond ? x : x) @@ -1671,6 +2634,18 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { MRI); } +bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + return MO.isReg() && + getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); +} + +bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); +} + bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); @@ -1706,9 +2681,7 @@ bool CombinerHelper::matchSimplifyAddToSub( // ((0-A) + B) -> B - A // (A + (0-B)) -> A - B auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { - int64_t Cst; - if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) || - Cst != 0) + if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS)))) return false; NewLHS = MaybeNewLHS; return true; @@ -1717,6 +2690,67 @@ bool CombinerHelper::matchSimplifyAddToSub( return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); } +bool CombinerHelper::matchCombineInsertVecElts( + MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT && + "Invalid opcode"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?"); + unsigned NumElts = DstTy.getNumElements(); + // If this MI is part of a sequence of insert_vec_elts, then + // don't do the combine in the middle of the sequence. + if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() == + TargetOpcode::G_INSERT_VECTOR_ELT) + return false; + MachineInstr *CurrInst = &MI; + MachineInstr *TmpInst; + int64_t IntImm; + Register TmpReg; + MatchInfo.resize(NumElts); + while (mi_match( + CurrInst->getOperand(0).getReg(), MRI, + m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) { + if (IntImm >= NumElts) + return false; + if (!MatchInfo[IntImm]) + MatchInfo[IntImm] = TmpReg; + CurrInst = TmpInst; + } + // Variable index. + if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) + return false; + if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { + for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) { + if (!MatchInfo[I - 1].isValid()) + MatchInfo[I - 1] = TmpInst->getOperand(I).getReg(); + } + return true; + } + // If we didn't end in a G_IMPLICIT_DEF, bail out. + return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; +} + +bool CombinerHelper::applyCombineInsertVecElts( + MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { + Builder.setInstr(MI); + Register UndefReg; + auto GetUndef = [&]() { + if (UndefReg) + return UndefReg; + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0); + return UndefReg; + }; + for (unsigned I = 0; I < MatchInfo.size(); ++I) { + if (!MatchInfo[I]) + MatchInfo[I] = GetUndef(); + } + Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::applySimplifyAddToSub( MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { Builder.setInstr(MI); @@ -1727,6 +2761,812 @@ bool CombinerHelper::applySimplifyAddToSub( return true; } +bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( + MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { + // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... + // + // Creates the new hand + logic instruction (but does not insert them.) + // + // On success, MatchInfo is populated with the new instructions. These are + // inserted in applyHoistLogicOpWithSameOpcodeHands. + unsigned LogicOpcode = MI.getOpcode(); + assert(LogicOpcode == TargetOpcode::G_AND || + LogicOpcode == TargetOpcode::G_OR || + LogicOpcode == TargetOpcode::G_XOR); + MachineIRBuilder MIB(MI); + Register Dst = MI.getOperand(0).getReg(); + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + + // Don't recompute anything. + if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg)) + return false; + + // Make sure we have (hand x, ...), (hand y, ...) + MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI); + MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI); + if (!LeftHandInst || !RightHandInst) + return false; + unsigned HandOpcode = LeftHandInst->getOpcode(); + if (HandOpcode != RightHandInst->getOpcode()) + return false; + if (!LeftHandInst->getOperand(1).isReg() || + !RightHandInst->getOperand(1).isReg()) + return false; + + // Make sure the types match up, and if we're doing this post-legalization, + // we end up with legal types. + Register X = LeftHandInst->getOperand(1).getReg(); + Register Y = RightHandInst->getOperand(1).getReg(); + LLT XTy = MRI.getType(X); + LLT YTy = MRI.getType(Y); + if (XTy != YTy) + return false; + if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) + return false; + + // Optional extra source register. + Register ExtraHandOpSrcReg; + switch (HandOpcode) { + default: + return false; + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: { + // Match: logic (ext X), (ext Y) --> ext (logic X, Y) + break; + } + case TargetOpcode::G_AND: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_SHL: { + // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z + MachineOperand &ZOp = LeftHandInst->getOperand(2); + if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2))) + return false; + ExtraHandOpSrcReg = ZOp.getReg(); + break; + } + } + + // Record the steps to build the new instructions. + // + // Steps to build (logic x, y) + auto NewLogicDst = MRI.createGenericVirtualRegister(XTy); + OperandBuildSteps LogicBuildSteps = { + [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(X); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }}; + InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps); + + // Steps to build hand (logic x, y), ...z + OperandBuildSteps HandBuildSteps = { + [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }}; + if (ExtraHandOpSrcReg.isValid()) + HandBuildSteps.push_back( + [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); }); + InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps); + + MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps}); + return true; +} + +bool CombinerHelper::applyBuildInstructionSteps( + MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { + assert(MatchInfo.InstrsToBuild.size() && + "Expected at least one instr to build?"); + Builder.setInstr(MI); + for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { + assert(InstrToBuild.Opcode && "Expected a valid opcode?"); + assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); + MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode); + for (auto &OperandFn : InstrToBuild.OperandFns) + OperandFn(Instr); + } + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchAshrShlToSextInreg( + MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ASHR); + int64_t ShlCst, AshrCst; + Register Src; + // FIXME: detect splat constant vectors. + if (!mi_match(MI.getOperand(0).getReg(), MRI, + m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) + return false; + if (ShlCst != AshrCst) + return false; + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) + return false; + MatchInfo = std::make_tuple(Src, ShlCst); + return true; +} +bool CombinerHelper::applyAshShlToSextInreg( + MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ASHR); + Register Src; + int64_t ShiftAmt; + std::tie(Src, ShiftAmt) = MatchInfo; + unsigned Size = MRI.getType(Src).getScalarSizeInBits(); + Builder.setInstrAndDebugLoc(MI); + Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, + Register &Replacement) { + // Given + // + // %y:_(sN) = G_SOMETHING + // %x:_(sN) = G_SOMETHING + // %res:_(sN) = G_AND %x, %y + // + // Eliminate the G_AND when it is known that x & y == x or x & y == y. + // + // Patterns like this can appear as a result of legalization. E.g. + // + // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y + // %one:_(s32) = G_CONSTANT i32 1 + // %and:_(s32) = G_AND %cmp, %one + // + // In this case, G_ICMP only produces a single bit, so x & 1 == x. + assert(MI.getOpcode() == TargetOpcode::G_AND); + if (!KB) + return false; + + Register AndDst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(AndDst); + + // FIXME: This should be removed once GISelKnownBits supports vectors. + if (DstTy.isVector()) + return false; + + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + KnownBits LHSBits = KB->getKnownBits(LHS); + KnownBits RHSBits = KB->getKnownBits(RHS); + + // Check that x & Mask == x. + // x & 1 == x, always + // x & 0 == x, only if x is also 0 + // Meaning Mask has no effect if every bit is either one in Mask or zero in x. + // + // Check if we can replace AndDst with the LHS of the G_AND + if (canReplaceReg(AndDst, LHS, MRI) && + (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { + Replacement = LHS; + return true; + } + + // Check if we can replace AndDst with the RHS of the G_AND + if (canReplaceReg(AndDst, RHS, MRI) && + (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { + Replacement = RHS; + return true; + } + + return false; +} + +bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { + // Given + // + // %y:_(sN) = G_SOMETHING + // %x:_(sN) = G_SOMETHING + // %res:_(sN) = G_OR %x, %y + // + // Eliminate the G_OR when it is known that x | y == x or x | y == y. + assert(MI.getOpcode() == TargetOpcode::G_OR); + if (!KB) + return false; + + Register OrDst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(OrDst); + + // FIXME: This should be removed once GISelKnownBits supports vectors. + if (DstTy.isVector()) + return false; + + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + KnownBits LHSBits = KB->getKnownBits(LHS); + KnownBits RHSBits = KB->getKnownBits(RHS); + + // Check that x | Mask == x. + // x | 0 == x, always + // x | 1 == x, only if x is also 1 + // Meaning Mask has no effect if every bit is either zero in Mask or one in x. + // + // Check if we can replace OrDst with the LHS of the G_OR + if (canReplaceReg(OrDst, LHS, MRI) && + (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { + Replacement = LHS; + return true; + } + + // Check if we can replace OrDst with the RHS of the G_OR + if (canReplaceReg(OrDst, RHS, MRI) && + (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { + Replacement = RHS; + return true; + } + + return false; +} + +bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) { + // If the input is already sign extended, just drop the extension. + Register Src = MI.getOperand(1).getReg(); + unsigned ExtBits = MI.getOperand(2).getImm(); + unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits(); + return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1); +} + +static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, + int64_t Cst, bool IsVector, bool IsFP) { + // For i1, Cst will always be -1 regardless of boolean contents. + return (ScalarSizeBits == 1 && Cst == -1) || + isConstTrueVal(TLI, Cst, IsVector, IsFP); +} + +bool CombinerHelper::matchNotCmp(MachineInstr &MI, + SmallVectorImpl<Register> &RegsToNegate) { + assert(MI.getOpcode() == TargetOpcode::G_XOR); + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); + Register XorSrc; + Register CstReg; + // We match xor(src, true) here. + if (!mi_match(MI.getOperand(0).getReg(), MRI, + m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) + return false; + + if (!MRI.hasOneNonDBGUse(XorSrc)) + return false; + + // Check that XorSrc is the root of a tree of comparisons combined with ANDs + // and ORs. The suffix of RegsToNegate starting from index I is used a work + // list of tree nodes to visit. + RegsToNegate.push_back(XorSrc); + // Remember whether the comparisons are all integer or all floating point. + bool IsInt = false; + bool IsFP = false; + for (unsigned I = 0; I < RegsToNegate.size(); ++I) { + Register Reg = RegsToNegate[I]; + if (!MRI.hasOneNonDBGUse(Reg)) + return false; + MachineInstr *Def = MRI.getVRegDef(Reg); + switch (Def->getOpcode()) { + default: + // Don't match if the tree contains anything other than ANDs, ORs and + // comparisons. + return false; + case TargetOpcode::G_ICMP: + if (IsFP) + return false; + IsInt = true; + // When we apply the combine we will invert the predicate. + break; + case TargetOpcode::G_FCMP: + if (IsInt) + return false; + IsFP = true; + // When we apply the combine we will invert the predicate. + break; + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + // Implement De Morgan's laws: + // ~(x & y) -> ~x | ~y + // ~(x | y) -> ~x & ~y + // When we apply the combine we will change the opcode and recursively + // negate the operands. + RegsToNegate.push_back(Def->getOperand(1).getReg()); + RegsToNegate.push_back(Def->getOperand(2).getReg()); + break; + } + } + + // Now we know whether the comparisons are integer or floating point, check + // the constant in the xor. + int64_t Cst; + if (Ty.isVector()) { + MachineInstr *CstDef = MRI.getVRegDef(CstReg); + auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); + if (!MaybeCst) + return false; + if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) + return false; + } else { + if (!mi_match(CstReg, MRI, m_ICst(Cst))) + return false; + if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP)) + return false; + } + + return true; +} + +bool CombinerHelper::applyNotCmp(MachineInstr &MI, + SmallVectorImpl<Register> &RegsToNegate) { + for (Register Reg : RegsToNegate) { + MachineInstr *Def = MRI.getVRegDef(Reg); + Observer.changingInstr(*Def); + // For each comparison, invert the opcode. For each AND and OR, change the + // opcode. + switch (Def->getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_ICMP: + case TargetOpcode::G_FCMP: { + MachineOperand &PredOp = Def->getOperand(1); + CmpInst::Predicate NewP = CmpInst::getInversePredicate( + (CmpInst::Predicate)PredOp.getPredicate()); + PredOp.setPredicate(NewP); + break; + } + case TargetOpcode::G_AND: + Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); + break; + case TargetOpcode::G_OR: + Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); + break; + } + Observer.changedInstr(*Def); + } + + replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchXorOfAndWithSameReg( + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + // Match (xor (and x, y), y) (or any of its commuted cases) + assert(MI.getOpcode() == TargetOpcode::G_XOR); + Register &X = MatchInfo.first; + Register &Y = MatchInfo.second; + Register AndReg = MI.getOperand(1).getReg(); + Register SharedReg = MI.getOperand(2).getReg(); + + // Find a G_AND on either side of the G_XOR. + // Look for one of + // + // (xor (and x, y), SharedReg) + // (xor SharedReg, (and x, y)) + if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { + std::swap(AndReg, SharedReg); + if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) + return false; + } + + // Only do this if we'll eliminate the G_AND. + if (!MRI.hasOneNonDBGUse(AndReg)) + return false; + + // We can combine if SharedReg is the same as either the LHS or RHS of the + // G_AND. + if (Y != SharedReg) + std::swap(X, Y); + return Y == SharedReg; +} + +bool CombinerHelper::applyXorOfAndWithSameReg( + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + // Fold (xor (and x, y), y) -> (and (not x), y) + Builder.setInstrAndDebugLoc(MI); + Register X, Y; + std::tie(X, Y) = MatchInfo; + auto Not = Builder.buildNot(MRI.getType(X), X); + Observer.changingInstr(MI); + MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND)); + MI.getOperand(1).setReg(Not->getOperand(0).getReg()); + MI.getOperand(2).setReg(Y); + Observer.changedInstr(MI); + return true; +} + +bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + const DataLayout &DL = Builder.getMF().getDataLayout(); + + if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace())) + return false; + + if (Ty.isPointer()) { + auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); + return ConstVal && *ConstVal == 0; + } + + assert(Ty.isVector() && "Expecting a vector type"); + const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + return isBuildVectorAllZeros(*VecMI, MRI); +} + +bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); + Builder.setInstrAndDebugLoc(MI); + Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); + MI.eraseFromParent(); + return true; +} + +/// The second source operand is known to be a power of 2. +bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Pow2Src1 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(DstReg); + Builder.setInstrAndDebugLoc(MI); + + // Fold (urem x, pow2) -> (and x, pow2-1) + auto NegOne = Builder.buildConstant(Ty, -1); + auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne); + Builder.buildAnd(DstReg, Src0, Add); + MI.eraseFromParent(); + return true; +} + +Optional<SmallVector<Register, 8>> +CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { + assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); + // We want to detect if Root is part of a tree which represents a bunch + // of loads being merged into a larger load. We'll try to recognize patterns + // like, for example: + // + // Reg Reg + // \ / + // OR_1 Reg + // \ / + // OR_2 + // \ Reg + // .. / + // Root + // + // Reg Reg Reg Reg + // \ / \ / + // OR_1 OR_2 + // \ / + // \ / + // ... + // Root + // + // Each "Reg" may have been produced by a load + some arithmetic. This + // function will save each of them. + SmallVector<Register, 8> RegsToVisit; + SmallVector<const MachineInstr *, 7> Ors = {Root}; + + // In the "worst" case, we're dealing with a load for each byte. So, there + // are at most #bytes - 1 ORs. + const unsigned MaxIter = + MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1; + for (unsigned Iter = 0; Iter < MaxIter; ++Iter) { + if (Ors.empty()) + break; + const MachineInstr *Curr = Ors.pop_back_val(); + Register OrLHS = Curr->getOperand(1).getReg(); + Register OrRHS = Curr->getOperand(2).getReg(); + + // In the combine, we want to elimate the entire tree. + if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) + return None; + + // If it's a G_OR, save it and continue to walk. If it's not, then it's + // something that may be a load + arithmetic. + if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI)) + Ors.push_back(Or); + else + RegsToVisit.push_back(OrLHS); + if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI)) + Ors.push_back(Or); + else + RegsToVisit.push_back(OrRHS); + } + + // We're going to try and merge each register into a wider power-of-2 type, + // so we ought to have an even number of registers. + if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) + return None; + return RegsToVisit; +} + +/// Helper function for findLoadOffsetsForLoadOrCombine. +/// +/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value, +/// and then moving that value into a specific byte offset. +/// +/// e.g. x[i] << 24 +/// +/// \returns The load instruction and the byte offset it is moved into. +static Optional<std::pair<MachineInstr *, int64_t>> +matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, + const MachineRegisterInfo &MRI) { + assert(MRI.hasOneNonDBGUse(Reg) && + "Expected Reg to only have one non-debug use?"); + Register MaybeLoad; + int64_t Shift; + if (!mi_match(Reg, MRI, + m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) { + Shift = 0; + MaybeLoad = Reg; + } + + if (Shift % MemSizeInBits != 0) + return None; + + // TODO: Handle other types of loads. + auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI); + if (!Load) + return None; + + const auto &MMO = **Load->memoperands_begin(); + if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits) + return None; + + return std::make_pair(Load, Shift / MemSizeInBits); +} + +Optional<std::pair<MachineInstr *, int64_t>> +CombinerHelper::findLoadOffsetsForLoadOrCombine( + SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, + const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { + + // Each load found for the pattern. There should be one for each RegsToVisit. + SmallSetVector<const MachineInstr *, 8> Loads; + + // The lowest index used in any load. (The lowest "i" for each x[i].) + int64_t LowestIdx = INT64_MAX; + + // The load which uses the lowest index. + MachineInstr *LowestIdxLoad = nullptr; + + // Keeps track of the load indices we see. We shouldn't see any indices twice. + SmallSet<int64_t, 8> SeenIdx; + + // Ensure each load is in the same MBB. + // TODO: Support multiple MachineBasicBlocks. + MachineBasicBlock *MBB = nullptr; + const MachineMemOperand *MMO = nullptr; + + // Earliest instruction-order load in the pattern. + MachineInstr *EarliestLoad = nullptr; + + // Latest instruction-order load in the pattern. + MachineInstr *LatestLoad = nullptr; + + // Base pointer which every load should share. + Register BasePtr; + + // We want to find a load for each register. Each load should have some + // appropriate bit twiddling arithmetic. During this loop, we will also keep + // track of the load which uses the lowest index. Later, we will check if we + // can use its pointer in the final, combined load. + for (auto Reg : RegsToVisit) { + // Find the load, and find the position that it will end up in (e.g. a + // shifted) value. + auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); + if (!LoadAndPos) + return None; + MachineInstr *Load; + int64_t DstPos; + std::tie(Load, DstPos) = *LoadAndPos; + + // TODO: Handle multiple MachineBasicBlocks. Currently not handled because + // it is difficult to check for stores/calls/etc between loads. + MachineBasicBlock *LoadMBB = Load->getParent(); + if (!MBB) + MBB = LoadMBB; + if (LoadMBB != MBB) + return None; + + // Make sure that the MachineMemOperands of every seen load are compatible. + const MachineMemOperand *LoadMMO = *Load->memoperands_begin(); + if (!MMO) + MMO = LoadMMO; + if (MMO->getAddrSpace() != LoadMMO->getAddrSpace()) + return None; + + // Find out what the base pointer and index for the load is. + Register LoadPtr; + int64_t Idx; + if (!mi_match(Load->getOperand(1).getReg(), MRI, + m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) { + LoadPtr = Load->getOperand(1).getReg(); + Idx = 0; + } + + // Don't combine things like a[i], a[i] -> a bigger load. + if (!SeenIdx.insert(Idx).second) + return None; + + // Every load must share the same base pointer; don't combine things like: + // + // a[i], b[i + 1] -> a bigger load. + if (!BasePtr.isValid()) + BasePtr = LoadPtr; + if (BasePtr != LoadPtr) + return None; + + if (Idx < LowestIdx) { + LowestIdx = Idx; + LowestIdxLoad = Load; + } + + // Keep track of the byte offset that this load ends up at. If we have seen + // the byte offset, then stop here. We do not want to combine: + // + // a[i] << 16, a[i + k] << 16 -> a bigger load. + if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) + return None; + Loads.insert(Load); + + // Keep track of the position of the earliest/latest loads in the pattern. + // We will check that there are no load fold barriers between them later + // on. + // + // FIXME: Is there a better way to check for load fold barriers? + if (!EarliestLoad || dominates(*Load, *EarliestLoad)) + EarliestLoad = Load; + if (!LatestLoad || dominates(*LatestLoad, *Load)) + LatestLoad = Load; + } + + // We found a load for each register. Let's check if each load satisfies the + // pattern. + assert(Loads.size() == RegsToVisit.size() && + "Expected to find a load for each register?"); + assert(EarliestLoad != LatestLoad && EarliestLoad && + LatestLoad && "Expected at least two loads?"); + + // Check if there are any stores, calls, etc. between any of the loads. If + // there are, then we can't safely perform the combine. + // + // MaxIter is chosen based off the (worst case) number of iterations it + // typically takes to succeed in the LLVM test suite plus some padding. + // + // FIXME: Is there a better way to check for load fold barriers? + const unsigned MaxIter = 20; + unsigned Iter = 0; + for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(), + LatestLoad->getIterator())) { + if (Loads.count(&MI)) + continue; + if (MI.isLoadFoldBarrier()) + return None; + if (Iter++ == MaxIter) + return None; + } + + return std::make_pair(LowestIdxLoad, LowestIdx); +} + +bool CombinerHelper::matchLoadOrCombine( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_OR); + MachineFunction &MF = *MI.getMF(); + // Assuming a little-endian target, transform: + // s8 *a = ... + // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) + // => + // s32 val = *((i32)a) + // + // s8 *a = ... + // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] + // => + // s32 val = BSWAP(*((s32)a)) + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + if (Ty.isVector()) + return false; + + // We need to combine at least two loads into this type. Since the smallest + // possible load is into a byte, we need at least a 16-bit wide type. + const unsigned WideMemSizeInBits = Ty.getSizeInBits(); + if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0) + return false; + + // Match a collection of non-OR instructions in the pattern. + auto RegsToVisit = findCandidatesForLoadOrCombine(&MI); + if (!RegsToVisit) + return false; + + // We have a collection of non-OR instructions. Figure out how wide each of + // the small loads should be based off of the number of potential loads we + // found. + const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size(); + if (NarrowMemSizeInBits % 8 != 0) + return false; + + // Check if each register feeding into each OR is a load from the same + // base pointer + some arithmetic. + // + // e.g. a[0], a[1] << 8, a[2] << 16, etc. + // + // Also verify that each of these ends up putting a[i] into the same memory + // offset as a load into a wide type would. + SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx; + MachineInstr *LowestIdxLoad; + int64_t LowestIdx; + auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( + MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); + if (!MaybeLoadInfo) + return false; + std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo; + + // We have a bunch of loads being OR'd together. Using the addresses + offsets + // we found before, check if this corresponds to a big or little endian byte + // pattern. If it does, then we can represent it using a load + possibly a + // BSWAP. + bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); + Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); + if (!IsBigEndian.hasValue()) + return false; + bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; + if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) + return false; + + // Make sure that the load from the lowest index produces offset 0 in the + // final value. + // + // This ensures that we won't combine something like this: + // + // load x[i] -> byte 2 + // load x[i+1] -> byte 0 ---> wide_load x[i] + // load x[i+2] -> byte 1 + const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits; + const unsigned ZeroByteOffset = + *IsBigEndian + ? bigEndianByteAt(NumLoadsInTy, 0) + : littleEndianByteAt(NumLoadsInTy, 0); + auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset); + if (ZeroOffsetIdx == MemOffset2Idx.end() || + ZeroOffsetIdx->second != LowestIdx) + return false; + + // We wil reuse the pointer from the load which ends up at byte offset 0. It + // may not use index 0. + Register Ptr = LowestIdxLoad->getOperand(1).getReg(); + const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin(); + LegalityQuery::MemDesc MMDesc; + MMDesc.SizeInBits = WideMemSizeInBits; + MMDesc.AlignInBits = MMO.getAlign().value() * 8; + MMDesc.Ordering = MMO.getOrdering(); + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) + return false; + auto PtrInfo = MMO.getPointerInfo(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8); + + // Load must be allowed and fast on the target. + LLVMContext &C = MF.getFunction().getContext(); + auto &DL = MF.getDataLayout(); + bool Fast = false; + if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || + !Fast) + return false; + + MatchInfo = [=](MachineIRBuilder &MIB) { + Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; + MIB.buildLoad(LoadDst, Ptr, *NewMMO); + if (NeedsBSwap) + MIB.buildBSwap(Dst, LoadDst); + }; + return true; +} + +bool CombinerHelper::applyLoadOrCombine( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + MatchInfo(Builder); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp index bdaa6378e901..59f4d60a41d8 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp @@ -16,7 +16,7 @@ using namespace llvm; void GISelChangeObserver::changingAllUsesOfReg( - const MachineRegisterInfo &MRI, unsigned Reg) { + const MachineRegisterInfo &MRI, Register Reg) { for (auto &ChangingMI : MRI.use_instructions(Reg)) { changingInstr(ChangingMI); ChangingAllUsesOfReg.insert(&ChangingMI); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 0e9c6e4fab9f..2de20489e1d1 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -11,7 +11,6 @@ // //===------------------ #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -95,6 +94,25 @@ dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { << "\n"; } +/// Compute known bits for the intersection of \p Src0 and \p Src1 +void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, + KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + // Test src1 first, since we canonicalize simpler expressions to the RHS. + computeKnownBitsImpl(Src1, Known, DemandedElts, Depth); + + // If we don't know any bits, early out. + if (Known.isUnknown()) + return; + + KnownBits Known2; + computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); + + // Only known if known in both the LHS and RHS. + Known = KnownBits::commonBits(Known, Known2); +} + void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth) { @@ -182,8 +200,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, // For COPYs we don't do anything, don't increase the depth. computeKnownBitsImpl(SrcReg, Known2, DemandedElts, Depth + (Opcode != TargetOpcode::COPY)); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); // If we reach a point where we don't know anything // just stop looking through the operands. if (Known.One == 0 && Known.Zero == 0) @@ -200,8 +217,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, auto CstVal = getConstantVRegVal(R, MRI); if (!CstVal) break; - Known.One = *CstVal; - Known.Zero = ~Known.One; + Known = KnownBits::makeConstant(*CstVal); break; } case TargetOpcode::G_FRAME_INDEX: { @@ -268,33 +284,50 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Depth + 1); computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, Depth + 1); - // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conservative estimate for high known-0 bits. - // More trickiness is possible, but this is sufficient for the - // interesting case of alignment computation. - unsigned TrailZ = - Known.countMinTrailingZeros() + Known2.countMinTrailingZeros(); - unsigned LeadZ = - std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(), - BitWidth) - - BitWidth; - - Known.resetAll(); - Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); - Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); + Known = KnownBits::computeForMul(Known, Known2); break; } case TargetOpcode::G_SELECT: { - computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts, + computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), + Known, DemandedElts, Depth + 1); + break; + } + case TargetOpcode::G_SMIN: { + // TODO: Handle clamp pattern with number of sign bits + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); - // If we don't know any bits, early out. - if (Known.isUnknown()) - break; - computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, Depth + 1); - // Only known if known in both the LHS and RHS. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::smin(Known, KnownRHS); + break; + } + case TargetOpcode::G_SMAX: { + // TODO: Handle clamp pattern with number of sign bits + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, + Depth + 1); + Known = KnownBits::smax(Known, KnownRHS); + break; + } + case TargetOpcode::G_UMIN: { + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, + DemandedElts, Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, + DemandedElts, Depth + 1); + Known = KnownBits::umin(Known, KnownRHS); + break; + } + case TargetOpcode::G_UMAX: { + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, + DemandedElts, Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, + DemandedElts, Depth + 1); + Known = KnownBits::umax(Known, KnownRHS); break; } case TargetOpcode::G_FCMP: @@ -314,61 +347,56 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known = Known.sext(BitWidth); break; } + case TargetOpcode::G_SEXT_INREG: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + Known = Known.sextInReg(MI.getOperand(2).getImm()); + break; + } case TargetOpcode::G_ANYEXT: { computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); - Known = Known.zext(BitWidth); + Known = Known.anyext(BitWidth); break; } case TargetOpcode::G_LOAD: { - if (MI.hasOneMemOperand()) { - const MachineMemOperand *MMO = *MI.memoperands_begin(); - if (const MDNode *Ranges = MMO->getRanges()) { - computeKnownBitsFromRangeMetadata(*Ranges, Known); - } + const MachineMemOperand *MMO = *MI.memoperands_begin(); + if (const MDNode *Ranges = MMO->getRanges()) { + computeKnownBitsFromRangeMetadata(*Ranges, Known); } + break; } case TargetOpcode::G_ZEXTLOAD: { // Everything above the retrieved bits is zero - if (MI.hasOneMemOperand()) - Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); + Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); break; } - case TargetOpcode::G_ASHR: - case TargetOpcode::G_LSHR: - case TargetOpcode::G_SHL: { - KnownBits RHSKnown; + case TargetOpcode::G_ASHR: { + KnownBits LHSKnown, RHSKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, + Depth + 1); computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, Depth + 1); - if (!RHSKnown.isConstant()) { - LLVM_DEBUG( - MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg()); - dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI); - break; - } - uint64_t Shift = RHSKnown.getConstant().getZExtValue(); - LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n'); - - computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Known = KnownBits::ashr(LHSKnown, RHSKnown); + break; + } + case TargetOpcode::G_LSHR: { + KnownBits LHSKnown, RHSKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, Depth + 1); - - switch (Opcode) { - case TargetOpcode::G_ASHR: - Known.Zero = Known.Zero.ashr(Shift); - Known.One = Known.One.ashr(Shift); - break; - case TargetOpcode::G_LSHR: - Known.Zero = Known.Zero.lshr(Shift); - Known.One = Known.One.lshr(Shift); - Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift); - break; - case TargetOpcode::G_SHL: - Known.Zero = Known.Zero.shl(Shift); - Known.One = Known.One.shl(Shift); - Known.Zero.setBits(0, Shift); - break; - } + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + Known = KnownBits::lshr(LHSKnown, RHSKnown); + break; + } + case TargetOpcode::G_SHL: { + KnownBits LHSKnown, RHSKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + Known = KnownBits::shl(LHSKnown, RHSKnown); break; } case TargetOpcode::G_INTTOPTR: @@ -390,6 +418,48 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known.Zero.setBitsFrom(SrcBitWidth); break; } + case TargetOpcode::G_MERGE_VALUES: { + unsigned NumOps = MI.getNumOperands(); + unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + + for (unsigned I = 0; I != NumOps - 1; ++I) { + KnownBits SrcOpKnown; + computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown, + DemandedElts, Depth + 1); + Known.insertBits(SrcOpKnown, I * OpSize); + } + break; + } + case TargetOpcode::G_UNMERGE_VALUES: { + unsigned NumOps = MI.getNumOperands(); + Register SrcReg = MI.getOperand(NumOps - 1).getReg(); + if (MRI.getType(SrcReg).isVector()) + return; // TODO: Handle vectors. + + KnownBits SrcOpKnown; + computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1); + + // Figure out the result operand index + unsigned DstIdx = 0; + for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R; + ++DstIdx) + ; + + Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx); + break; + } + case TargetOpcode::G_BSWAP: { + Register SrcReg = MI.getOperand(1).getReg(); + computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); + Known.byteSwap(); + break; + } + case TargetOpcode::G_BITREVERSE: { + Register SrcReg = MI.getOperand(1).getReg(); + computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); + Known.reverseBits(); + break; + } } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -399,6 +469,17 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, ComputeKnownBitsCache[R] = Known; } +/// Compute number of sign bits for the intersection of \p Src0 and \p Src1 +unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, + const APInt &DemandedElts, + unsigned Depth) { + // Test src1 first, since we canonicalize simpler expressions to the RHS. + unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth); + if (Src1SignBits == 1) + return 1; + return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); +} + unsigned GISelKnownBits::computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth) { @@ -442,15 +523,30 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; } + case TargetOpcode::G_SEXT_INREG: { + // Max of the input and what this extends. + Register Src = MI.getOperand(1).getReg(); + unsigned SrcBits = MI.getOperand(2).getImm(); + unsigned InRegBits = TyBits - SrcBits + 1; + return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits); + } case TargetOpcode::G_SEXTLOAD: { - Register Dst = MI.getOperand(0).getReg(); - LLT Ty = MRI.getType(Dst); - // TODO: add vector support - if (Ty.isVector()) - break; - if (MI.hasOneMemOperand()) - return Ty.getSizeInBits() - (*MI.memoperands_begin())->getSizeInBits(); - break; + // FIXME: We need an in-memory type representation. + if (DstTy.isVector()) + return 1; + + // e.g. i16->i32 = '17' bits known. + const MachineMemOperand *MMO = *MI.memoperands_begin(); + return TyBits - MMO->getSizeInBits() + 1; + } + case TargetOpcode::G_ZEXTLOAD: { + // FIXME: We need an in-memory type representation. + if (DstTy.isVector()) + return 1; + + // e.g. i16->i32 = '16' bits known. + const MachineMemOperand *MMO = *MI.memoperands_begin(); + return TyBits - MMO->getSizeInBits(); } case TargetOpcode::G_TRUNC: { Register Src = MI.getOperand(1).getReg(); @@ -464,6 +560,11 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, return NumSrcSignBits - (NumSrcBits - DstTyBits); break; } + case TargetOpcode::G_SELECT: { + return computeNumSignBitsMin(MI.getOperand(2).getReg(), + MI.getOperand(3).getReg(), DemandedElts, + Depth + 1); + } case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: default: { diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8f6643b2f193..b97c369b832d 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -29,9 +29,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -48,11 +50,13 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -70,6 +74,7 @@ #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> +#include <cstddef> #include <cstdint> #include <iterator> #include <string> @@ -90,6 +95,8 @@ INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) @@ -110,7 +117,8 @@ static void reportTranslationError(MachineFunction &MF, ORE.emit(R); } -IRTranslator::IRTranslator() : MachineFunctionPass(ID) { } +IRTranslator::IRTranslator(CodeGenOpt::Level optlevel) + : MachineFunctionPass(ID), OptLevel(optlevel) {} #ifndef NDEBUG namespace { @@ -154,13 +162,17 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); AU.addRequired<GISelCSEAnalysisWrapperPass>(); + if (OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } IRTranslator::ValueToVRegInfo::VRegListT & IRTranslator::allocateVRegs(const Value &Val) { - assert(!VMap.contains(Val) && "Value already allocated in VMap"); + auto VRegsIt = VMap.findVRegs(Val); + if (VRegsIt != VMap.vregs_end()) + return *VRegsIt->second; auto *Regs = VMap.getVRegs(Val); auto *Offsets = VMap.getOffsets(Val); SmallVector<LLT, 4> SplitTys; @@ -222,8 +234,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) { } int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { - if (FrameIndices.find(&AI) != FrameIndices.end()) - return FrameIndices[&AI]; + auto MapEntry = FrameIndices.find(&AI); + if (MapEntry != FrameIndices.end()) + return MapEntry->second; uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType()); uint64_t Size = @@ -293,25 +306,8 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, return true; } -bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { - // -0.0 - X --> G_FNEG - if (isa<Constant>(U.getOperand(0)) && - U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) { - Register Op1 = getOrCreateVReg(*U.getOperand(1)); - Register Res = getOrCreateVReg(U); - uint16_t Flags = 0; - if (isa<Instruction>(U)) { - const Instruction &I = cast<Instruction>(U); - Flags = MachineInstr::copyFlagsFromInstruction(I); - } - // Negate the last operand of the FSUB - MIRBuilder.buildFNeg(Res, Op1, Flags); - return true; - } - return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); -} - -bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { +bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U, + MachineIRBuilder &MIRBuilder) { Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); uint16_t Flags = 0; @@ -319,10 +315,14 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { const Instruction &I = cast<Instruction>(U); Flags = MachineInstr::copyFlagsFromInstruction(I); } - MIRBuilder.buildFNeg(Res, Op0, Flags); + MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags); return true; } +bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { + return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder); +} + bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { auto *CI = dyn_cast<CmpInst>(&U); @@ -368,31 +368,289 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg); + return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg); +} + +void IRTranslator::emitBranchForMergedCondition( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + BranchProbability TProb, BranchProbability FProb, bool InvertCond) { + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + CmpInst::Predicate Condition; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + } else { + const FCmpInst *FC = cast<FCmpInst>(Cond); + Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + } + + SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0), + BOp->getOperand(1), nullptr, TBB, FBB, CurBB, + CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); + return; + } + + // Create a CaseBlock record representing this branch. + CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; + SwitchCG::CaseBlock CB( + Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); +} + +static bool isValInBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +void IRTranslator::findMergedConditions( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond) { + using namespace PatternMatch; + assert((Opc == Instruction::And || Opc == Instruction::Or) && + "Expected Opc to be AND/OR"); + // Skip over not part of the tree and remember to invert op and operands at + // next level. + Value *NotCond; + if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && + isValInBlock(NotCond, CurBB->getBasicBlock())) { + findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + + const Instruction *BOp = dyn_cast<Instruction>(Cond); + const Value *BOpOp0, *BOpOp1; + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0; + if (BOp) { + BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::And + : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::Or + : (Instruction::BinaryOps)0); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. + // Note that all nodes in the tree should have same opcode. + bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse(); + if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() || + !isValInBlock(BOpOp0, CurBB->getBasicBlock()) || + !isValInBlock(BOpOp1, CurBB->getBasicBlock())) { + emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, + InvertCond); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI(CurBB); + MachineBasicBlock *TmpBB = + MF->CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; + // Emit the LHS condition. + findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); + + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. + + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; + // Emit the LHS condition. + findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); + + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); + } +} + +bool IRTranslator::shouldEmitAsBranches( + const std::vector<SwitchCG::CaseBlock> &Cases) { + // For multiple cases, it's better to emit as branches. + if (Cases.size() != 2) + return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred && + isa<Constant>(Cases[0].CmpRHS) && + cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ && + Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE && + Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; } bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { const BranchInst &BrInst = cast<BranchInst>(U); - unsigned Succ = 0; - if (!BrInst.isUnconditional()) { - // We want a G_BRCOND to the true BB followed by an unconditional branch. - Register Tst = getOrCreateVReg(*BrInst.getCondition()); - const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++)); - MachineBasicBlock &TrueBB = getMBB(TrueTgt); - MIRBuilder.buildBrCond(Tst, TrueBB); + auto &CurMBB = MIRBuilder.getMBB(); + auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0)); + + if (BrInst.isUnconditional()) { + // If the unconditional target is the layout successor, fallthrough. + if (!CurMBB.isLayoutSuccessor(Succ0MBB)) + MIRBuilder.buildBr(*Succ0MBB); + + // Link successors. + for (const BasicBlock *Succ : successors(&BrInst)) + CurMBB.addSuccessor(&getMBB(*Succ)); + return true; } - const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ)); - MachineBasicBlock &TgtBB = getMBB(BrTgt); - MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = BrInst.getCondition(); + MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1)); - // If the unconditional target is the layout successor, fallthrough. - if (!CurBB.isLayoutSuccessor(&TgtBB)) - MIRBuilder.buildBr(TgtBB); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); - // Link successors. - for (const BasicBlock *Succ : successors(&BrInst)) - CurBB.addSuccessor(&getMBB(*Succ)); + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive (exceptions for multi-use logic ops, + // unpredictable branches, and vector extracts because those jumps are likely + // expensive for any target), this should improve performance. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + using namespace PatternMatch; + const Instruction *CondI = dyn_cast<Instruction>(CondVal); + if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() && + !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) { + Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; + Value *Vec; + const Value *BOp0, *BOp1; + if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::And; + else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::Or; + + if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode, + getEdgeProbability(&CurMBB, Succ0MBB), + getEdgeProbability(&CurMBB, Succ1MBB), + /*InvertCond=*/false); + assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (shouldEmitAsBranches(SL->SwitchCases)) { + // Emit the branch for this block. + emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder); + SL->SwitchCases.erase(SL->SwitchCases.begin()); + return true; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I) + MF->erase(SL->SwitchCases[I].ThisBB); + + SL->SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal, + ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, Succ0MBB, Succ1MBB, &CurMBB, + CurBuilder->getDebugLoc()); + + // Use emitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + emitSwitchCase(CB, &CurMBB, *CurBuilder); return true; } @@ -457,6 +715,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { } SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr); + SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ dbgs() << "Case clusters: "; @@ -577,8 +836,23 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, const LLT i1Ty = LLT::scalar(1); // Build the compare. if (!CB.CmpMHS) { - Register CondRHS = getOrCreateVReg(*CB.CmpRHS); - Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS); + // For conditional branch lowering, we might try to do something silly like + // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, + // just re-use the existing condition vreg. + if (CI && CI->getZExtValue() == 1 && + MRI->getType(CondLHS).getSizeInBits() == 1 && + CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + Cond = CondLHS; + } else { + Register CondRHS = getOrCreateVReg(*CB.CmpRHS); + if (CmpInst::isFPPredicate(CB.PredInfo.Pred)) + Cond = + MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + else + Cond = + MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + } } else { assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && "Can only handle SLE ranges"); @@ -611,17 +885,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb); CB.ThisBB->normalizeSuccProbs(); - // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock()) - addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, - CB.ThisBB); - - // If the lhs block is the next block, invert the condition so that we can - // fall through to the lhs instead of the rhs block. - if (CB.TrueBB == CB.ThisBB->getNextNode()) { - std::swap(CB.TrueBB, CB.FalseBB); - auto True = MIB.buildConstant(i1Ty, 1); - Cond = MIB.buildXor(i1Ty, Cond, True).getReg(0); - } + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, + CB.ThisBB); MIB.buildBrCond(Cond, *CB.TrueBB); MIB.buildBr(*CB.FalseBB); @@ -734,6 +999,156 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, return true; } +void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, + MachineBasicBlock *SwitchBB) { + MachineIRBuilder &MIB = *CurBuilder; + MIB.setMBB(*SwitchBB); + + // Subtract the minimum value. + Register SwitchOpReg = getOrCreateVReg(*B.SValue); + + LLT SwitchOpTy = MRI->getType(SwitchOpReg); + Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0); + auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg); + + // Ensure that the type will fit the mask value. + LLT MaskTy = SwitchOpTy; + for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { + if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { + // Switch table case range are encoded into series of masks. + // Just use pointer type, it's guaranteed to fit. + MaskTy = LLT::scalar(64); + break; + } + } + Register SubReg = RangeSub.getReg(0); + if (SwitchOpTy != MaskTy) + SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0); + + B.RegVT = getMVTForLLT(MaskTy); + B.Reg = SubReg; + + MachineBasicBlock *MBB = B.Cases[0].ThisBB; + + if (!B.OmitRangeCheck) + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + addSuccessorWithProb(SwitchBB, MBB, B.Prob); + + SwitchBB->normalizeSuccProbs(); + + if (!B.OmitRangeCheck) { + // Conditional branch to the default block. + auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range); + auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1), + RangeSub, RangeCst); + MIB.buildBrCond(RangeCmp, *B.Default); + } + + // Avoid emitting unnecessary branches to the next block. + if (MBB != SwitchBB->getNextNode()) + MIB.buildBr(*MBB); +} + +void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, + MachineBasicBlock *NextMBB, + BranchProbability BranchProbToNext, + Register Reg, SwitchCG::BitTestCase &B, + MachineBasicBlock *SwitchBB) { + MachineIRBuilder &MIB = *CurBuilder; + MIB.setMBB(*SwitchBB); + + LLT SwitchTy = getLLTForMVT(BB.RegVT); + Register Cmp; + unsigned PopCount = countPopulation(B.Mask); + if (PopCount == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + auto MaskTrailingZeros = + MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask)); + Cmp = + MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros) + .getReg(0); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + auto MaskTrailingOnes = + MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask)); + Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes) + .getReg(0); + } else { + // Make desired shift. + auto CstOne = MIB.buildConstant(SwitchTy, 1); + auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg); + + // Emit bit tests and jumps. + auto CstMask = MIB.buildConstant(SwitchTy, B.Mask); + auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask); + auto CstZero = MIB.buildConstant(SwitchTy, 0); + Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero) + .getReg(0); + } + + // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. + addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); + // The branch probability from SwitchBB to NextMBB is BranchProbToNext. + addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); + // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is + // one as they are relative probabilities (and thus work more like weights), + // and hence we need to normalize them to let the sum of them become one. + SwitchBB->normalizeSuccProbs(); + + // Record the fact that the IR edge from the header to the bit test target + // will go through our new block. Neeeded for PHIs to have nodes added. + addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()}, + SwitchBB); + + MIB.buildBrCond(Cmp, *B.TargetBB); + + // Avoid emitting unnecessary branches to the next block. + if (NextMBB != SwitchBB->getNextNode()) + MIB.buildBr(*NextMBB); +} + +bool IRTranslator::lowerBitTestWorkItem( + SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, + MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB, MachineFunction::iterator BBI, + BranchProbability DefaultProb, BranchProbability UnhandledProbs, + SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable) { + using namespace SwitchCG; + MachineFunction *CurMF = SwitchMBB->getParent(); + // FIXME: Optimize away range check based on pivot comparisons. + BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; + // The bit test blocks haven't been inserted yet; insert them here. + for (BitTestCase &BTC : BTB->Cases) + CurMF->insert(BBI, BTC.ThisBB); + + // Fill in fields of the BitTestBlock. + BTB->Parent = CurMBB; + BTB->Default = Fallthrough; + + BTB->DefaultProb = UnhandledProbs; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the probability on the edge to Fallthrough to two + // successors of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Prob += DefaultProb / 2; + BTB->DefaultProb -= DefaultProb / 2; + } + + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + BTB->OmitRangeCheck = true; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { + emitBitTestHeader(*BTB, SwitchMBB); + BTB->Emitted = true; + } + return true; +} + bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, @@ -794,9 +1209,15 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, switch (I->Kind) { case CC_BitTests: { - LLVM_DEBUG(dbgs() << "Switch to bit test optimization unimplemented"); - return false; // Bit tests currently unimplemented. + if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, + DefaultProb, UnhandledProbs, I, Fallthrough, + FallthroughUnreachable)) { + LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch"); + return false; + } + break; } + case CC_JumpTable: { if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, UnhandledProbs, I, Fallthrough, @@ -1137,16 +1558,33 @@ bool IRTranslator::translateGetElementPtr(const User &U, bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - Intrinsic::ID ID) { + unsigned Opcode) { // If the source is undef, then just emit a nop. if (isa<UndefValue>(CI.getArgOperand(1))) return true; - ArrayRef<Register> Res; - auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true); - for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) - ICall.addUse(getOrCreateVReg(**AI)); + SmallVector<Register, 3> SrcRegs; + + unsigned MinPtrSize = UINT_MAX; + for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) { + Register SrcReg = getOrCreateVReg(**AI); + LLT SrcTy = MRI->getType(SrcReg); + if (SrcTy.isPointer()) + MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize); + SrcRegs.push_back(SrcReg); + } + + LLT SizeTy = LLT::scalar(MinPtrSize); + + // The size operand should be the minimum of the pointer sizes. + Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1]; + if (MRI->getType(SizeOpReg) != SizeTy) + SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0); + + auto ICall = MIRBuilder.buildInstr(Opcode); + for (Register SrcReg : SrcRegs) + ICall.addUse(SrcReg); Align DstAlign; Align SrcAlign; @@ -1175,7 +1613,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, ICall.addMemOperand(MF->getMachineMemOperand( MachinePointerInfo(CI.getArgOperand(0)), MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); - if (ID != Intrinsic::memset) + if (Opcode != TargetOpcode::G_MEMSET) ICall.addMemOperand(MF->getMachineMemOperand( MachinePointerInfo(CI.getArgOperand(1)), MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); @@ -1214,6 +1652,16 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, return true; } +bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI, + MachineIRBuilder &MIRBuilder) { + Register Dst = getOrCreateVReg(CI); + Register Src0 = getOrCreateVReg(*CI.getOperand(0)); + Register Src1 = getOrCreateVReg(*CI.getOperand(1)); + uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue(); + MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale }); + return true; +} + unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { switch (ID) { default: @@ -1264,10 +1712,14 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_FNEARBYINT; case Intrinsic::pow: return TargetOpcode::G_FPOW; + case Intrinsic::powi: + return TargetOpcode::G_FPOWI; case Intrinsic::rint: return TargetOpcode::G_FRINT; case Intrinsic::round: return TargetOpcode::G_INTRINSIC_ROUND; + case Intrinsic::roundeven: + return TargetOpcode::G_INTRINSIC_ROUNDEVEN; case Intrinsic::sin: return TargetOpcode::G_FSIN; case Intrinsic::sqrt: @@ -1278,6 +1730,31 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_READCYCLECOUNTER; case Intrinsic::ptrmask: return TargetOpcode::G_PTRMASK; + case Intrinsic::lrint: + return TargetOpcode::G_INTRINSIC_LRINT; + // FADD/FMUL require checking the FMF, so are handled elsewhere. + case Intrinsic::vector_reduce_fmin: + return TargetOpcode::G_VECREDUCE_FMIN; + case Intrinsic::vector_reduce_fmax: + return TargetOpcode::G_VECREDUCE_FMAX; + case Intrinsic::vector_reduce_add: + return TargetOpcode::G_VECREDUCE_ADD; + case Intrinsic::vector_reduce_mul: + return TargetOpcode::G_VECREDUCE_MUL; + case Intrinsic::vector_reduce_and: + return TargetOpcode::G_VECREDUCE_AND; + case Intrinsic::vector_reduce_or: + return TargetOpcode::G_VECREDUCE_OR; + case Intrinsic::vector_reduce_xor: + return TargetOpcode::G_VECREDUCE_XOR; + case Intrinsic::vector_reduce_smax: + return TargetOpcode::G_VECREDUCE_SMAX; + case Intrinsic::vector_reduce_smin: + return TargetOpcode::G_VECREDUCE_SMIN; + case Intrinsic::vector_reduce_umax: + return TargetOpcode::G_VECREDUCE_UMAX; + case Intrinsic::vector_reduce_umin: + return TargetOpcode::G_VECREDUCE_UMIN; } return Intrinsic::not_intrinsic; } @@ -1370,7 +1847,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // Get the underlying objects for the location passed on the lifetime // marker. SmallVector<const Value *, 4> Allocas; - GetUnderlyingObjects(CI.getArgOperand(1), Allocas, *DL); + getUnderlyingObjects(CI.getArgOperand(1), Allocas); // Iterate over each underlying object, creating lifetime markers for each // static alloca. Quit if we find a non-static alloca. @@ -1484,6 +1961,37 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder); case Intrinsic::ssub_sat: return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder); + case Intrinsic::ushl_sat: + return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder); + case Intrinsic::sshl_sat: + return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder); + case Intrinsic::umin: + return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder); + case Intrinsic::umax: + return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder); + case Intrinsic::smin: + return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder); + case Intrinsic::smax: + return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder); + case Intrinsic::abs: + // TODO: Preserve "int min is poison" arg in GMIR? + return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder); + case Intrinsic::smul_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder); + case Intrinsic::umul_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder); + case Intrinsic::smul_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder); + case Intrinsic::umul_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder); + case Intrinsic::sdiv_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder); + case Intrinsic::udiv_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder); + case Intrinsic::sdiv_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder); + case Intrinsic::udiv_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder); case Intrinsic::fmuladd: { const TargetMachine &TM = MF->getTarget(); const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); @@ -1507,10 +2015,24 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } return true; } + case Intrinsic::convert_from_fp16: + // FIXME: This intrinsic should probably be removed from the IR. + MIRBuilder.buildFPExt(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + case Intrinsic::convert_to_fp16: + // FIXME: This intrinsic should probably be removed from the IR. + MIRBuilder.buildFPTrunc(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; case Intrinsic::memcpy: + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY); case Intrinsic::memmove: + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE); case Intrinsic::memset: - return translateMemFunc(CI, MIRBuilder, ID); + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET); case Intrinsic::eh_typeid_for: { GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); Register Reg = getOrCreateVReg(CI); @@ -1593,7 +2115,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } case Intrinsic::invariant_end: return true; + case Intrinsic::expect: + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: { + // Drop the intrinsic, but forward the value. + MIRBuilder.buildCopy(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getArgOperand(0))); + return true; + } case Intrinsic::assume: + case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::var_annotation: case Intrinsic::sideeffect: // Discard annotate attributes, assumptions, and artificial side-effects. @@ -1613,6 +2146,68 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; } + case Intrinsic::localescape: { + MachineBasicBlock &EntryMBB = MF->front(); + StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName()); + + // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) { + Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts(); + if (isa<ConstantPointerNull>(Arg)) + continue; // Skip null pointers. They represent a hole in index space. + + int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg)); + MCSymbol *FrameAllocSym = + MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName, + Idx); + + // This should be inserted at the start of the entry block. + auto LocalEscape = + MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + + EntryMBB.insert(EntryMBB.begin(), LocalEscape); + } + + return true; + } + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: { + // Need to check for the reassoc flag to decide whether we want a + // sequential reduction opcode or not. + Register Dst = getOrCreateVReg(CI); + Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0)); + Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1)); + unsigned Opc = 0; + if (!CI.hasAllowReassoc()) { + // The sequential ordering case. + Opc = ID == Intrinsic::vector_reduce_fadd + ? TargetOpcode::G_VECREDUCE_SEQ_FADD + : TargetOpcode::G_VECREDUCE_SEQ_FMUL; + MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc}, + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + } + // We split the operation into a separate G_FADD/G_FMUL + the reduce, + // since the associativity doesn't matter. + unsigned ScalarOpc; + if (ID == Intrinsic::vector_reduce_fadd) { + Opc = TargetOpcode::G_VECREDUCE_FADD; + ScalarOpc = TargetOpcode::G_FADD; + } else { + Opc = TargetOpcode::G_VECREDUCE_FMUL; + ScalarOpc = TargetOpcode::G_FMUL; + } + LLT DstTy = MRI->getType(Dst); + auto Rdx = MIRBuilder.buildInstr( + Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI)); + MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx}, + MachineInstr::copyFlagsFromInstruction(CI)); + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" @@ -1722,10 +2317,6 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { MIB->copyIRFlags(CI); for (auto &Arg : enumerate(CI.arg_operands())) { - // Some intrinsics take metadata parameters. Reject them. - if (isa<MetadataAsValue>(Arg.value())) - return false; - // If this is required to be an immediate, don't materialize it in a // register. if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { @@ -1738,6 +2329,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { } else { MIB.addFPImm(cast<ConstantFP>(Arg.value())); } + } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) { + auto *MDN = dyn_cast<MDNode>(MD->getMetadata()); + if (!MDN) // This was probably an MDString. + return false; + MIB.addMetadata(MDN); } else { ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value()); if (VRegs.size() > 1) @@ -1762,6 +2358,62 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { return true; } +bool IRTranslator::findUnwindDestinations( + const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + EHPersonality Personality = classifyEHPersonality( + EHPadBB->getParent()->getFunction().getPersonalityFn()); + bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; + bool IsSEH = isAsynchronousEHPersonality(Personality); + + if (IsWasmCXX) { + // Ignore this for now. + return false; + } + + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + BasicBlock *NewEHPadBB = nullptr; + if (isa<LandingPadInst>(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); + break; + } + if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + UnwindDests.back().first->setIsEHFuncletEntry(); + break; + } + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob); + // For MSVC++ and the CLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + UnwindDests.back().first->setIsEHFuncletEntry(); + if (!IsSEH) + UnwindDests.back().first->setIsEHScopeEntry(); + } + NewEHPadBB = CatchSwitch->getUnwindDest(); + } else { + continue; + } + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI && NewEHPadBB) + Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); + EHPadBB = NewEHPadBB; + } + return true; +} + bool IRTranslator::translateInvoke(const User &U, MachineIRBuilder &MIRBuilder) { const InvokeInst &I = cast<InvokeInst>(U); @@ -1787,7 +2439,7 @@ bool IRTranslator::translateInvoke(const User &U, return false; // FIXME: support Windows exception handling. - if (!isa<LandingPadInst>(EHPadBB->front())) + if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI())) return false; // Emit the actual call, bracketed by EH_LABELs so that the MF knows about @@ -1801,14 +2453,28 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *EndSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); - // FIXME: track probabilities. + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + BranchProbabilityInfo *BPI = FuncInfo.BPI; + MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(); + BranchProbability EHPadBBProb = + BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) + : BranchProbability::getZero(); + + if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests)) + return false; + MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB), &ReturnMBB = getMBB(*ReturnBB); + // Update successor info. + addSuccessorWithProb(InvokeMBB, &ReturnMBB); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); + } + InvokeMBB->normalizeSuccProbs(); + MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol); - MIRBuilder.getMBB().addSuccessor(&ReturnMBB); - MIRBuilder.getMBB().addSuccessor(&EHPadMBB); MIRBuilder.buildBr(ReturnMBB); - return true; } @@ -1846,6 +2512,12 @@ bool IRTranslator::translateLandingPad(const User &U, MIRBuilder.buildInstr(TargetOpcode::EH_LABEL) .addSym(MF->addLandingPad(&MBB)); + // If the unwinder does not preserve all registers, ensure that the + // function marks the clobbered registers as used. + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF)) + MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask); + LLT Ty = getLLTForType(*LP.getType(), *DL); Register Undef = MRI->createGenericVirtualRegister(Ty); MIRBuilder.buildUndef(Undef); @@ -2184,8 +2856,8 @@ bool IRTranslator::translate(const Instruction &Inst) { // We only emit constants into the entry block from here. To prevent jumpy // debug behaviour set the line to 0. if (const DebugLoc &DL = Inst.getDebugLoc()) - EntryBuilder->setDebugLoc( - DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt())); + EntryBuilder->setDebugLoc(DILocation::get( + Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt())); else EntryBuilder->setDebugLoc(DebugLoc()); @@ -2263,6 +2935,57 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { } void IRTranslator::finalizeBasicBlock() { + for (auto &BTB : SL->BitTestCases) { + // Emit header first, if it wasn't already emitted. + if (!BTB.Emitted) + emitBitTestHeader(BTB, BTB.Parent); + + BranchProbability UnhandledProb = BTB.Prob; + for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { + UnhandledProb -= BTB.Cases[j].ExtraProb; + // Set the current basic block to the mbb we wish to insert the code into + MachineBasicBlock *MBB = BTB.Cases[j].ThisBB; + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. In this case, there is no need + // to perform the last bit test, as it will always be true. Instead, make + // the second-to-last bit-test fall through to the target of the last bit + // test, and delete the last bit test. + + MachineBasicBlock *NextMBB; + if (BTB.ContiguousRange && j + 2 == ej) { + // Second-to-last bit-test with contiguous range: fall through to the + // target of the final bit test. + NextMBB = BTB.Cases[j + 1].TargetBB; + } else if (j + 1 == ej) { + // For the last bit test, fall through to Default. + NextMBB = BTB.Default; + } else { + // Otherwise, fall through to the next bit test. + NextMBB = BTB.Cases[j + 1].ThisBB; + } + + emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB); + + // FIXME delete this block below? + if (BTB.ContiguousRange && j + 2 == ej) { + // Since we're not going to use the final bit test, remove it. + BTB.Cases.pop_back(); + break; + } + } + // This is "default" BB. We have two jumps to it. From "header" BB and from + // last "case" BB, unless the latter was skipped. + CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(), + BTB.Default->getBasicBlock()}; + addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent); + if (!BTB.ContiguousRange) { + addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB); + } + } + SL->BitTestCases.clear(); + for (auto &JTCase : SL->JTCases) { // Emit header first, if it wasn't already emitted. if (!JTCase.first.Emitted) @@ -2271,6 +2994,10 @@ void IRTranslator::finalizeBasicBlock() { emitJumpTable(JTCase.second, JTCase.second.MBB); } SL->JTCases.clear(); + + for (auto &SwCase : SL->SwitchCases) + emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder); + SL->SwitchCases.clear(); } void IRTranslator::finalizeFunction() { @@ -2332,14 +3059,23 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); ORE = std::make_unique<OptimizationRemarkEmitter>(&F); + const TargetMachine &TM = MF->getTarget(); + TM.resetTargetOptions(F); + EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); FuncInfo.MF = MF; - FuncInfo.BPI = nullptr; + if (EnableOpts) + FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + else + FuncInfo.BPI = nullptr; + + FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); - const TargetMachine &TM = MF->getTarget(); + SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo); SL->init(TLI, TM, *DL); - EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); + assert(PendingPHIs.empty() && "stale PHIs"); @@ -2407,7 +3143,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) { + if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower arguments: " << ore::NV("Prototype", F.getType()); diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index 2ce1d414e755..bb4d41cfd69f 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -455,19 +455,23 @@ bool InlineAsmLowering::lowerInlineAsm( unsigned DefRegIdx = InstFlagIdx + 1; Register Def = Inst->getOperand(DefRegIdx).getReg(); - // Copy input to new vreg with same reg class as Def - const TargetRegisterClass *RC = MRI->getRegClass(Def); ArrayRef<Register> SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); assert(SrcRegs.size() == 1 && "Single register is expected here"); - Register Tmp = MRI->createVirtualRegister(RC); - if (!buildAnyextOrCopy(Tmp, SrcRegs[0], MIRBuilder)) - return false; - // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def. + // When Def is physreg: use given input. + Register In = SrcRegs[0]; + // When Def is vreg: copy input to new vreg with same reg class as Def. + if (Def.isVirtual()) { + In = MRI->createVirtualRegister(MRI->getRegClass(Def)); + if (!buildAnyextOrCopy(In, SrcRegs[0], MIRBuilder)) + return false; + } + + // Add Flag and input register operand (In) to Inst. Tie In to Def. unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1); unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx); Inst.addImm(Flag); - Inst.addReg(Tmp); + Inst.addReg(In); Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1); break; } @@ -558,6 +562,11 @@ bool InlineAsmLowering::lowerInlineAsm( } unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs); + if (OpInfo.Regs.front().isVirtual()) { + // Put the register class of the virtual registers in the flag word. + const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front()); + Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + } Inst.addImm(Flag); if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder)) return false; @@ -653,6 +662,7 @@ bool InlineAsmLowering::lowerAsmOperandForConstraint( default: return false; case 'i': // Simple Integer or Relocatable Constant + case 'n': // immediate integer with a known value. if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { assert(CI->getBitWidth() <= 64 && "expected immediate to fit into 64-bits"); diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index f32278d07052..25fae5487187 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -41,7 +41,7 @@ static cl::opt<std::string> cl::desc("Record GlobalISel rule coverage files of this " "prefix if instrumentation was generated")); #else -static const std::string CoveragePrefix = ""; +static const std::string CoveragePrefix; #endif char InstructionSelect::ID = 0; diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 2fedc034d315..4fec9e628ddb 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -33,24 +33,12 @@ InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) InstructionSelector::InstructionSelector() = default; -bool InstructionSelector::constrainOperandRegToRegClass( - MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC, - const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { - MachineBasicBlock &MBB = *I.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC, - I.getOperand(OpIdx)); -} - bool InstructionSelector::isOperandImmEqual( const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const { if (MO.isReg() && MO.getReg()) if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI)) - return VRegVal->Value == Value; + return VRegVal->Value.getSExtValue() == Value; return false; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index a83742f2138f..1993f6033291 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -10,6 +10,17 @@ // //===----------------------------------------------------------------------===// +// Enable optimizations to work around MSVC debug mode bug in 32-bit: +// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html +// FIXME: Remove this when the issue is closed. +#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86) +// We have to disable runtime checks in order to enable optimizations. This is +// done for the entire file because the problem is actually observed in STL +// template functions. +#pragma runtime_checks("", off) +#pragma optimize("gs", on) +#endif + #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" using namespace llvm; @@ -24,7 +35,7 @@ LegalityPredicates::typeInSet(unsigned TypeIdx, std::initializer_list<LLT> TypesInit) { SmallVector<LLT, 4> Types = TypesInit; return [=](const LegalityQuery &Query) { - return std::find(Types.begin(), Types.end(), Query.Types[TypeIdx]) != Types.end(); + return llvm::is_contained(Types, Query.Types[TypeIdx]); }; } @@ -34,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairInSet( SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit; return [=](const LegalityQuery &Query) { std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]}; - return std::find(Types.begin(), Types.end(), Match) != Types.end(); + return llvm::is_contained(Types, Match); }; } @@ -46,11 +57,10 @@ LegalityPredicate LegalityPredicates::typePairAndMemDescInSet( TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], Query.MMODescrs[MMOIdx].SizeInBits, Query.MMODescrs[MMOIdx].AlignInBits}; - return std::find_if( - TypesAndMemDesc.begin(), TypesAndMemDesc.end(), - [=](const TypePairAndMemDesc &Entry) ->bool { - return Match.isCompatible(Entry); - }) != TypesAndMemDesc.end(); + return llvm::any_of(TypesAndMemDesc, + [=](const TypePairAndMemDesc &Entry) -> bool { + return Match.isCompatible(Entry); + }); }; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index fcbecf90a845..f3ba3f080198 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -43,6 +43,16 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx, }; } +LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx, + unsigned FromTypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT OldTy = Query.Types[TypeIdx]; + const LLT NewTy = Query.Types[FromTypeIdx]; + const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits()); + return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy)); + }; +} + LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min) { return [=](const LegalityQuery &Query) { diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 1d7be54de3b0..5ba9367cac8a 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -284,7 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, WrapperObserver)) { WorkListObserver.printNewInstrs(); for (auto *DeadMI : DeadInstructions) { - LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n"); + LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI); RemoveDeadInstFromLists(DeadMI); DeadMI->eraseFromParentAndMarkDBGValuesForRemoval(); } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index da519f99ad7e..e7f40523efaf 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -29,6 +30,7 @@ using namespace llvm; using namespace LegalizeActions; +using namespace MIPatternMatch; /// Try to break down \p OrigTy into \p NarrowTy sized pieces. /// @@ -75,6 +77,8 @@ static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) { return Type::getFloatTy(Ctx); case 64: return Type::getDoubleTy(Ctx); + case 80: + return Type::getX86_FP80Ty(Ctx); case 128: return Type::getFP128Ty(Ctx); default: @@ -86,16 +90,15 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &Builder) : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()), - LI(*MF.getSubtarget().getLegalizerInfo()) { - MIRBuilder.setChangeObserver(Observer); -} + LI(*MF.getSubtarget().getLegalizerInfo()), + TLI(*MF.getSubtarget().getTargetLowering()) { } LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, GISelChangeObserver &Observer, MachineIRBuilder &B) - : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI) { - MIRBuilder.setChangeObserver(Observer); -} + : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), + TLI(*MF.getSubtarget().getTargetLowering()) { } + LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Legalizing: " << MI); @@ -237,22 +240,21 @@ void LegalizerHelper::insertParts(Register DstReg, } } -/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs +/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. static void getUnmergeResults(SmallVectorImpl<Register> &Regs, const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); + const int StartIdx = Regs.size(); const int NumResults = MI.getNumOperands() - 1; - Regs.resize(NumResults); + Regs.resize(Regs.size() + NumResults); for (int I = 0; I != NumResults; ++I) - Regs[I] = MI.getOperand(I).getReg(); + Regs[StartIdx + I] = MI.getOperand(I).getReg(); } -LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, - LLT NarrowTy, Register SrcReg) { +void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, + LLT GCDTy, Register SrcReg) { LLT SrcTy = MRI.getType(SrcReg); - - LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy)); if (SrcTy == GCDTy) { // If the source already evenly divides the result type, we don't need to do // anything. @@ -262,7 +264,13 @@ LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); getUnmergeResults(Parts, *Unmerge); } +} +LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, + LLT NarrowTy, Register SrcReg) { + LLT SrcTy = MRI.getType(SrcReg); + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + extractGCDType(Parts, GCDTy, SrcReg); return GCDTy; } @@ -376,7 +384,14 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, } if (LCMTy.isVector()) { - MIRBuilder.buildExtract(DstReg, Remerge, 0); + unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits(); + SmallVector<Register, 8> UnmergeDefs(NumDefs); + UnmergeDefs[0] = DstReg; + for (unsigned I = 1; I != NumDefs; ++I) + UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy); + + MIRBuilder.buildUnmerge(UnmergeDefs, + MIRBuilder.buildMerge(LCMTy, RemergeRegs)); return; } @@ -384,7 +399,7 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, } static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { -#define RTLIBCASE(LibcallPrefix) \ +#define RTLIBCASE_INT(LibcallPrefix) \ do { \ switch (Size) { \ case 32: \ @@ -398,19 +413,33 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { } \ } while (0) - assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); +#define RTLIBCASE(LibcallPrefix) \ + do { \ + switch (Size) { \ + case 32: \ + return RTLIB::LibcallPrefix##32; \ + case 64: \ + return RTLIB::LibcallPrefix##64; \ + case 80: \ + return RTLIB::LibcallPrefix##80; \ + case 128: \ + return RTLIB::LibcallPrefix##128; \ + default: \ + llvm_unreachable("unexpected size"); \ + } \ + } while (0) switch (Opcode) { case TargetOpcode::G_SDIV: - RTLIBCASE(SDIV_I); + RTLIBCASE_INT(SDIV_I); case TargetOpcode::G_UDIV: - RTLIBCASE(UDIV_I); + RTLIBCASE_INT(UDIV_I); case TargetOpcode::G_SREM: - RTLIBCASE(SREM_I); + RTLIBCASE_INT(SREM_I); case TargetOpcode::G_UREM: - RTLIBCASE(UREM_I); + RTLIBCASE_INT(UREM_I); case TargetOpcode::G_CTLZ_ZERO_UNDEF: - RTLIBCASE(CTLZ_I); + RTLIBCASE_INT(CTLZ_I); case TargetOpcode::G_FADD: RTLIBCASE(ADD_F); case TargetOpcode::G_FSUB: @@ -453,13 +482,16 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(RINT_F); case TargetOpcode::G_FNEARBYINT: RTLIBCASE(NEARBYINT_F); + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + RTLIBCASE(ROUNDEVEN_F); } llvm_unreachable("Unknown libcall function"); } /// True if an instruction is in tail position in its caller. Intended for /// legalizing libcalls as tail calls when possible. -static bool isLibCallInTailPosition(MachineInstr &MI) { +static bool isLibCallInTailPosition(const TargetInstrInfo &TII, + MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const Function &F = MBB.getParent()->getFunction(); @@ -479,7 +511,6 @@ static bool isLibCallInTailPosition(MachineInstr &MI) { return false; // Only tail call if the following instruction is a standard return. - auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn()) return false; @@ -531,12 +562,11 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, LegalizerHelper::LegalizeResult llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); SmallVector<CallLowering::ArgInfo, 3> Args; // Add all the args, except for the last which is an imm denoting 'tail'. - for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) { + for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) { Register Reg = MI.getOperand(i).getReg(); // Need derive an IR type for call lowering. @@ -551,31 +581,28 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID(); RTLIB::Libcall RTLibcall; - switch (ID) { - case Intrinsic::memcpy: + switch (MI.getOpcode()) { + case TargetOpcode::G_MEMCPY: RTLibcall = RTLIB::MEMCPY; break; - case Intrinsic::memset: - RTLibcall = RTLIB::MEMSET; - break; - case Intrinsic::memmove: + case TargetOpcode::G_MEMMOVE: RTLibcall = RTLIB::MEMMOVE; break; + case TargetOpcode::G_MEMSET: + RTLibcall = RTLIB::MEMSET; + break; default: return LegalizerHelper::UnableToLegalize; } const char *Name = TLI.getLibcallName(RTLibcall); - MIRBuilder.setInstrAndDebugLoc(MI); - CallLowering::CallLoweringInfo Info; Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); - Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 && - isLibCallInTailPosition(MI); + Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && + isLibCallInTailPosition(MIRBuilder.getTII(), MI); std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) @@ -668,10 +695,11 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FSQRT: case TargetOpcode::G_FRINT: - case TargetOpcode::G_FNEARBYINT: { + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); - if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) { - LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n"); + if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { + LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); return UnableToLegalize; } auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); @@ -720,6 +748,13 @@ LegalizerHelper::libcall(MachineInstr &MI) { return Status; break; } + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: { + LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI); + MI.eraseFromParent(); + return Result; + } } MI.eraseFromParent(); @@ -900,7 +935,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_INSERT: return narrowScalarInsert(MI, TypeIdx, NarrowTy); case TargetOpcode::G_LOAD: { - const auto &MMO = **MI.memoperands_begin(); + auto &MMO = **MI.memoperands_begin(); Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); if (DstTy.isVector()) @@ -908,7 +943,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (8 * MMO.getSize() != DstTy.getSizeInBits()) { Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - auto &MMO = **MI.memoperands_begin(); MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO); MIRBuilder.buildAnyExt(DstReg, TmpReg); MI.eraseFromParent(); @@ -925,10 +959,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); auto &MMO = **MI.memoperands_begin(); - if (MMO.getSizeInBits() == NarrowSize) { + unsigned MemSize = MMO.getSizeInBits(); + + if (MemSize == NarrowSize) { MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); - } else { + } else if (MemSize < NarrowSize) { MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO); + } else if (MemSize > NarrowSize) { + // FIXME: Need to split the load. + return UnableToLegalize; } if (ZExt) @@ -1204,6 +1243,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_PTRMASK: { if (TypeIdx != 1) return UnableToLegalize; @@ -1212,6 +1252,29 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_FPTOUI: { + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_FPTOSI: { + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_FPEXT: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); + return Legalized; } } @@ -1272,10 +1335,8 @@ void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); - Register DstExt = MRI.createGenericVirtualRegister(WideTy); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MIRBuilder.buildExtract(MO, DstExt, 0); - MO.setReg(DstExt); + MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); } void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, @@ -1443,6 +1504,40 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { + Register WideReg = MRI.createGenericVirtualRegister(WideTy); + LLT OrigTy = MRI.getType(OrigReg); + LLT LCMTy = getLCMType(WideTy, OrigTy); + + const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); + const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); + + Register UnmergeSrc = WideReg; + + // Create a merge to the LCM type, padding with undef + // %0:_(<3 x s32>) = G_FOO => <4 x s32> + // => + // %1:_(<4 x s32>) = G_FOO + // %2:_(<4 x s32>) = G_IMPLICIT_DEF + // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 + // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 + if (NumMergeParts > 1) { + Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); + SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); + MergeParts[0] = WideReg; + UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); + } + + // Unmerge to the original register and pad with dead defs. + SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); + UnmergeResults[0] = OrigReg; + for (int I = 1; I != NumUnmergeParts; ++I) + UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); + + MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); + return WideReg; +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { @@ -1512,35 +1607,60 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc); - // Create a sequence of unmerges to the original results. since we may have - // widened the source, we will need to pad the results with dead defs to cover - // the source register. - // e.g. widen s16 to s32: - // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48) + // Create a sequence of unmerges and merges to the original results. Since we + // may have widened the source, we will need to pad the results with dead defs + // to cover the source register. + // e.g. widen s48 to s64: + // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96) // // => - // %4:_(s64) = G_ANYEXT %0:_(s48) - // %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge - // %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs - // %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def - + // %4:_(s192) = G_ANYEXT %0:_(s96) + // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge + // ; unpack to GCD type, with extra dead defs + // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64) + // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64) + // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64) + // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination + // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination + const LLT GCDTy = getGCDType(WideTy, DstTy); const int NumUnmerge = Unmerge->getNumOperands() - 1; - const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); - - for (int I = 0; I != NumUnmerge; ++I) { - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); - - for (int J = 0; J != PartsPerUnmerge; ++J) { - int Idx = I * PartsPerUnmerge + J; - if (Idx < NumDst) - MIB.addDef(MI.getOperand(Idx).getReg()); - else { - // Create dead def for excess components. - MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits(); + + // Directly unmerge to the destination without going through a GCD type + // if possible + if (PartsPerRemerge == 1) { + const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) { + int Idx = I * PartsPerUnmerge + J; + if (Idx < NumDst) + MIB.addDef(MI.getOperand(Idx).getReg()); + else { + // Create dead def for excess components. + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + } } + + MIB.addUse(Unmerge.getReg(I)); } + } else { + SmallVector<Register, 16> Parts; + for (int J = 0; J != NumUnmerge; ++J) + extractGCDType(Parts, GCDTy, Unmerge.getReg(J)); + + SmallVector<Register, 8> RemergeParts; + for (int I = 0; I != NumDst; ++I) { + for (int J = 0; J < PartsPerRemerge; ++J) { + const int Idx = I * PartsPerRemerge + J; + RemergeParts.emplace_back(Parts[Idx]); + } - MIB.addUse(Unmerge.getReg(I)); + MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); + RemergeParts.clear(); + } } MI.eraseFromParent(); @@ -1590,8 +1710,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { Src = MIRBuilder.buildAnyExt(WideTy, Src); ShiftTy = WideTy; - } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) - return UnableToLegalize; + } auto LShr = MIRBuilder.buildLShr( ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); @@ -1629,7 +1748,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - if (TypeIdx != 0) + if (TypeIdx != 0 || WideTy.isVector()) return UnableToLegalize; Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); @@ -1639,14 +1758,45 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult -LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy) { +LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx == 1) + return UnableToLegalize; // TODO + unsigned Op = MI.getOpcode(); + unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO + ? TargetOpcode::G_ADD + : TargetOpcode::G_SUB; + unsigned ExtOpcode = + Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO + ? TargetOpcode::G_ZEXT + : TargetOpcode::G_SEXT; + auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); + auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); + // Do the arithmetic in the larger type. + auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}); + LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); + auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp); + auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp}); + // There is no overflow if the ExtOp is the same as NewOp. + MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp); + // Now trunc the NewOp to the original result. + MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT || - MI.getOpcode() == TargetOpcode::G_SSUBSAT; + MI.getOpcode() == TargetOpcode::G_SSUBSAT || + MI.getOpcode() == TargetOpcode::G_SSHLSAT; + bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || + MI.getOpcode() == TargetOpcode::G_USHLSAT; // We can convert this to: // 1. Any extend iN to iM // 2. SHL by M-N - // 3. [US][ADD|SUB]SAT + // 3. [US][ADD|SUB|SHL]SAT // 4. L/ASHR by M-N // // It may be more efficient to lower this to a min and a max operation in @@ -1657,11 +1807,14 @@ LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx, unsigned NewBits = WideTy.getScalarSizeInBits(); unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits(); + // Shifts must zero-extend the RHS to preserve the unsigned quantity, and + // must not left shift the RHS to preserve the shift amount. auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1)); - auto RHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); + auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) + : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount); auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK); - auto ShiftR = MIRBuilder.buildShl(WideTy, RHS, ShiftK); + auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {ShiftL, ShiftR}, MI.getFlags()); @@ -1689,34 +1842,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return widenScalarMergeValues(MI, TypeIdx, WideTy); case TargetOpcode::G_UNMERGE_VALUES: return widenScalarUnmergeValues(MI, TypeIdx, WideTy); + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: case TargetOpcode::G_UADDO: - case TargetOpcode::G_USUBO: { - if (TypeIdx == 1) - return UnableToLegalize; // TODO - auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2)); - auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3)); - unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO - ? TargetOpcode::G_ADD - : TargetOpcode::G_SUB; - // Do the arithmetic in the larger type. - auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); - LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); - APInt Mask = - APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits()); - auto AndOp = MIRBuilder.buildAnd( - WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask)); - // There is no overflow if the AndOp is the same as NewOp. - MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp); - // Now trunc the NewOp to the original result. - MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); - MI.eraseFromParent(); - return Legalized; - } + case TargetOpcode::G_USUBO: + return widenScalarAddoSubo(MI, TypeIdx, WideTy); case TargetOpcode::G_SADDSAT: case TargetOpcode::G_SSUBSAT: + case TargetOpcode::G_SSHLSAT: case TargetOpcode::G_UADDSAT: case TargetOpcode::G_USUBSAT: - return widenScalarAddSubSat(MI, TypeIdx, WideTy); + case TargetOpcode::G_USHLSAT: + return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -1908,21 +2045,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_SITOFP: - if (TypeIdx != 1) - return UnableToLegalize; Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_UITOFP: - if (TypeIdx != 1) - return UnableToLegalize; Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: @@ -1936,7 +2077,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return UnableToLegalize; LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (!isPowerOf2_32(Ty.getSizeInBits())) + if (!Ty.isScalar()) return UnableToLegalize; Observer.changingInstr(MI); @@ -2134,6 +2275,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPOW: case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: assert(TypeIdx == 0); Observer.changingInstr(MI); @@ -2143,6 +2285,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_FPOWI: { + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_INTTOPTR: if (TypeIdx != 1) return UnableToLegalize; @@ -2169,8 +2320,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { // Avoid changing the result vector type if the source element type was // requested. if (TypeIdx == 1) { - auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); - MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); + MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); } else { widenScalarDst(MI, WideTy, 0); } @@ -2273,6 +2423,376 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { return UnableToLegalize; } +/// Figure out the bit offset into a register when coercing a vector index for +/// the wide element type. This is only for the case when promoting vector to +/// one with larger elements. +// +/// +/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) +/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) +static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, + Register Idx, + unsigned NewEltSize, + unsigned OldEltSize) { + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + LLT IdxTy = B.getMRI()->getType(Idx); + + // Now figure out the amount we need to shift to get the target bits. + auto OffsetMask = B.buildConstant( + IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); + auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); + return B.buildShl(IdxTy, OffsetIdx, + B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); +} + +/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this +/// is casting to a vector with a smaller element size, perform multiple element +/// extracts and merge the results. If this is coercing to a vector with larger +/// elements, index the bitcasted vector and extract the target element with bit +/// operations. This is intended to force the indexing in the native register +/// size for architectures that can dynamically index the register file. +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register Idx = MI.getOperand(2).getReg(); + LLT SrcVecTy = MRI.getType(SrcVec); + LLT IdxTy = MRI.getType(Idx); + + LLT SrcEltTy = SrcVecTy.getElementType(); + unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; + unsigned OldNumElts = SrcVecTy.getNumElements(); + + LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; + Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); + + const unsigned NewEltSize = NewEltTy.getSizeInBits(); + const unsigned OldEltSize = SrcEltTy.getSizeInBits(); + if (NewNumElts > OldNumElts) { + // Decreasing the vector element size + // + // e.g. i64 = extract_vector_elt x:v2i64, y:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // + // i64 = bitcast + // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), + // (i32 (extract_vector_elt castx, (2 * y + 1))) + // + if (NewNumElts % OldNumElts != 0) + return UnableToLegalize; + + // Type of the intermediate result vector. + const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; + LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy); + + auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); + + SmallVector<Register, 8> NewOps(NewEltsPerOldElt); + auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); + + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); + auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); + auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); + NewOps[I] = Elt.getReg(0); + } + + auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); + MIRBuilder.buildBitcast(Dst, NewVec); + MI.eraseFromParent(); + return Legalized; + } + + if (NewNumElts < OldNumElts) { + if (NewEltSize % OldEltSize != 0) + return UnableToLegalize; + + // This only depends on powers of 2 because we use bit tricks to figure out + // the bit offset we need to shift to get the target element. A general + // expansion could emit division/multiply. + if (!isPowerOf2_32(NewEltSize / OldEltSize)) + return UnableToLegalize; + + // Increasing the vector element size. + // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx + // + // => + // + // %cast = G_BITCAST %vec + // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) + // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx + // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) + // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) + // %elt_bits = G_LSHR %wide_elt, %offset_bits + // %elt = G_TRUNC %elt_bits + + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); + + // Divide to get the index in the wider element type. + auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); + + Register WideElt = CastVec; + if (CastTy.isVector()) { + WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, + ScaledIdx).getReg(0); + } + + // Compute the bit offset into the register of the target element. + Register OffsetBits = getBitcastWiderVectorElementOffset( + MIRBuilder, Idx, NewEltSize, OldEltSize); + + // Shift the wide element to get the target element. + auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); + MIRBuilder.buildTrunc(Dst, ExtractedBits); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p +/// TargetReg, while preserving other bits in \p TargetReg. +/// +/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset) +static Register buildBitFieldInsert(MachineIRBuilder &B, + Register TargetReg, Register InsertReg, + Register OffsetBits) { + LLT TargetTy = B.getMRI()->getType(TargetReg); + LLT InsertTy = B.getMRI()->getType(InsertReg); + auto ZextVal = B.buildZExt(TargetTy, InsertReg); + auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits); + + // Produce a bitmask of the value to insert + auto EltMask = B.buildConstant( + TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(), + InsertTy.getSizeInBits())); + // Shift it into position + auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits); + auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask); + + // Clear out the bits in the wide element + auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask); + + // The value to insert has all zeros already, so stick it into the masked + // wide element. + return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0); +} + +/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this +/// is increasing the element size, perform the indexing in the target element +/// type, and use bit operations to insert at the element position. This is +/// intended for architectures that can dynamically index the register file and +/// want to force indexing in the native register size. +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register Val = MI.getOperand(2).getReg(); + Register Idx = MI.getOperand(3).getReg(); + + LLT VecTy = MRI.getType(Dst); + LLT IdxTy = MRI.getType(Idx); + + LLT VecEltTy = VecTy.getElementType(); + LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; + const unsigned NewEltSize = NewEltTy.getSizeInBits(); + const unsigned OldEltSize = VecEltTy.getSizeInBits(); + + unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; + unsigned OldNumElts = VecTy.getNumElements(); + + Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); + if (NewNumElts < OldNumElts) { + if (NewEltSize % OldEltSize != 0) + return UnableToLegalize; + + // This only depends on powers of 2 because we use bit tricks to figure out + // the bit offset we need to shift to get the target element. A general + // expansion could emit division/multiply. + if (!isPowerOf2_32(NewEltSize / OldEltSize)) + return UnableToLegalize; + + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); + + // Divide to get the index in the wider element type. + auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); + + Register ExtractedElt = CastVec; + if (CastTy.isVector()) { + ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, + ScaledIdx).getReg(0); + } + + // Compute the bit offset into the register of the target element. + Register OffsetBits = getBitcastWiderVectorElementOffset( + MIRBuilder, Idx, NewEltSize, OldEltSize); + + Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt, + Val, OffsetBits); + if (CastTy.isVector()) { + InsertedElt = MIRBuilder.buildInsertVectorElement( + CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0); + } + + MIRBuilder.buildBitcast(Dst, InsertedElt); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerLoad(MachineInstr &MI) { + // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + auto &MMO = **MI.memoperands_begin(); + + if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 zextload (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + auto LargeLoad = MIRBuilder.buildLoadInstr( + TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); + + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), + *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } + + MIRBuilder.buildLoad(DstReg, PtrReg, MMO); + MI.eraseFromParent(); + return Legalized; + } + + if (DstTy.isScalar()) { + Register TmpReg = + MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); + MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_LOAD: + MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg); + break; + case TargetOpcode::G_SEXTLOAD: + MIRBuilder.buildSExt(DstReg, TmpReg); + break; + case TargetOpcode::G_ZEXTLOAD: + MIRBuilder.buildZExt(DstReg, TmpReg); + break; + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerStore(MachineInstr &MI) { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store. + // We do this by first extending the stored value to the next largest power + // of 2 type, and then using truncating stores to store the components. + // By doing this, likewise with G_LOAD, generate an extend that can be + // artifact-combined away instead of leaving behind extracts. + Register SrcReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Extend to the next pow-2. + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + + // Obtain the smaller value by shifting away the larger value. + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + + // Generate the PtrAdd and truncating stores. + LLT PtrTy = MRI.getType(PtrReg); + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { switch (MI.getOpcode()) { @@ -2321,13 +2841,24 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + return bitcastExtractVectorElt(MI, TypeIdx, CastTy); + case TargetOpcode::G_INSERT_VECTOR_ELT: + return bitcastInsertVectorElt(MI, TypeIdx, CastTy); default: return UnableToLegalize; } } +// Legalize an instruction by changing the opcode in place. +void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) { + Observer.changingInstr(MI); + MI.setDesc(MIRBuilder.getTII().get(NewOpcode)); + Observer.changedInstr(MI); +} + LegalizerHelper::LegalizeResult -LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { +LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { using namespace TargetOpcode; switch(MI.getOpcode()) { @@ -2337,6 +2868,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerBitcast(MI); case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); auto Quot = MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty}, {MI.getOperand(1), MI.getOperand(2)}); @@ -2349,6 +2881,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case TargetOpcode::G_SADDO: case TargetOpcode::G_SSUBO: return lowerSADDO_SSUBO(MI); + case TargetOpcode::G_UMULH: + case TargetOpcode::G_SMULH: + return lowerSMULH_UMULH(MI); case TargetOpcode::G_SMULO: case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the @@ -2357,6 +2892,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { Register Overflow = MI.getOperand(1).getReg(); Register LHS = MI.getOperand(2).getReg(); Register RHS = MI.getOperand(3).getReg(); + LLT Ty = MRI.getType(Res); unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO ? TargetOpcode::G_SMULH @@ -2368,11 +2904,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.RemoveOperand(1); Observer.changedInstr(MI); - MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); auto Zero = MIRBuilder.buildConstant(Ty, 0); + // Move insert point forward so we can use the Res register if needed. + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + // For *signed* multiply, overflow is detected by checking: // (hi != (lo >> bitwidth-1)) if (Opcode == TargetOpcode::G_SMULH) { @@ -2385,31 +2922,29 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_FNEG: { + Register Res = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Res); + // TODO: Handle vector types once we are able to // represent them. if (Ty.isVector()) return UnableToLegalize; - Register Res = MI.getOperand(0).getReg(); - LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty); - if (!ZeroTy) - return UnableToLegalize; - ConstantFP &ZeroForNegation = - *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); - auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); + auto SignMask = + MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); Register SubByReg = MI.getOperand(1).getReg(); - Register ZeroReg = Zero.getReg(0); - MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags()); + MIRBuilder.buildXor(Res, SubByReg, SignMask); MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_FSUB: { + Register Res = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Res); + // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). // First, check if G_FNEG is marked as Lower. If so, we may // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) return UnableToLegalize; - Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); Register Neg = MRI.createGenericVirtualRegister(Ty); @@ -2424,6 +2959,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerFFloor(MI); case TargetOpcode::G_INTRINSIC_ROUND: return lowerIntrinsicRound(MI); + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + // Since round even is the assumed rounding mode for unconstrained FP + // operations, rint and roundeven are the same operation. + changeOpcode(MI, TargetOpcode::G_FRINT); + return Legalized; + } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -2438,145 +2979,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { } case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: - case TargetOpcode::G_ZEXTLOAD: { - // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT - Register DstReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - auto &MMO = **MI.memoperands_begin(); - - if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { - if (MI.getOpcode() == TargetOpcode::G_LOAD) { - // This load needs splitting into power of 2 sized loads. - if (DstTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(DstTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Our strategy here is to generate anyextending loads for the smaller - // types up to next power-2 result type, and then combine the two larger - // result values together, before truncating back down to the non-pow-2 - // type. - // E.g. v1 = i24 load => - // v2 = i32 zextload (2 byte) - // v3 = i32 load (1 byte) - // v4 = i32 shl v3, 16 - // v5 = i32 or v4, v2 - // v1 = i24 trunc v5 - // By doing this we generate the correct truncate which should get - // combined away as an artifact with a matching extend. - uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); - uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; - - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = MF.getMachineMemOperand( - &MMO, LargeSplitSize / 8, SmallSplitSize / 8); - - LLT PtrTy = MRI.getType(PtrReg); - unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); - LLT AnyExtTy = LLT::scalar(AnyExtSize); - Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - auto LargeLoad = MIRBuilder.buildLoadInstr( - TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); - - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); - auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), - *SmallMMO); - - auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); - auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); - auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); - MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); - MI.eraseFromParent(); - return Legalized; - } - MIRBuilder.buildLoad(DstReg, PtrReg, MMO); - MI.eraseFromParent(); - return Legalized; - } - - if (DstTy.isScalar()) { - Register TmpReg = - MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); - MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected opcode"); - case TargetOpcode::G_LOAD: - MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg); - break; - case TargetOpcode::G_SEXTLOAD: - MIRBuilder.buildSExt(DstReg, TmpReg); - break; - case TargetOpcode::G_ZEXTLOAD: - MIRBuilder.buildZExt(DstReg, TmpReg); - break; - } - MI.eraseFromParent(); - return Legalized; - } - - return UnableToLegalize; - } - case TargetOpcode::G_STORE: { - // Lower a non-power of 2 store into multiple pow-2 stores. - // E.g. split an i24 store into an i16 store + i8 store. - // We do this by first extending the stored value to the next largest power - // of 2 type, and then using truncating stores to store the components. - // By doing this, likewise with G_LOAD, generate an extend that can be - // artifact-combined away instead of leaving behind extracts. - Register SrcReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - MachineMemOperand &MMO = **MI.memoperands_begin(); - if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) - return UnableToLegalize; - if (SrcTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(SrcTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Extend to the next pow-2. - const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); - auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); - - // Obtain the smaller value by shifting away the larger value. - uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); - uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; - auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); - auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); - - // Generate the PtrAdd and truncating stores. - LLT PtrTy = MRI.getType(PtrReg); - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); - - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = - MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); - MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); - MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); - MI.eraseFromParent(); - return Legalized; - } + case TargetOpcode::G_ZEXTLOAD: + return lowerLoad(MI); + case TargetOpcode::G_STORE: + return lowerStore(MI); case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTPOP: - return lowerBitCount(MI, TypeIdx, Ty); + return lowerBitCount(MI); case G_UADDO: { Register Res = MI.getOperand(0).getReg(); Register CarryOut = MI.getOperand(1).getReg(); @@ -2638,22 +3050,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case G_UITOFP: - return lowerUITOFP(MI, TypeIdx, Ty); + return lowerUITOFP(MI); case G_SITOFP: - return lowerSITOFP(MI, TypeIdx, Ty); + return lowerSITOFP(MI); case G_FPTOUI: - return lowerFPTOUI(MI, TypeIdx, Ty); + return lowerFPTOUI(MI); case G_FPTOSI: return lowerFPTOSI(MI); case G_FPTRUNC: - return lowerFPTRUNC(MI, TypeIdx, Ty); + return lowerFPTRUNC(MI); + case G_FPOWI: + return lowerFPOWI(MI); case G_SMIN: case G_SMAX: case G_UMIN: case G_UMAX: - return lowerMinMax(MI, TypeIdx, Ty); + return lowerMinMax(MI); case G_FCOPYSIGN: - return lowerFCopySign(MI, TypeIdx, Ty); + return lowerFCopySign(MI); case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); @@ -2676,6 +3090,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case G_EXTRACT_VECTOR_ELT: + case G_INSERT_VECTOR_ELT: + return lowerExtractInsertVectorElt(MI); case G_SHUFFLE_VECTOR: return lowerShuffleVector(MI); case G_DYN_STACKALLOC: @@ -2691,33 +3108,123 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case G_READ_REGISTER: case G_WRITE_REGISTER: return lowerReadWriteRegister(MI); + case G_UADDSAT: + case G_USUBSAT: { + // Try to make a reasonable guess about which lowering strategy to use. The + // target can override this with custom lowering and calling the + // implementation functions. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (LI.isLegalOrCustom({G_UMIN, Ty})) + return lowerAddSubSatToMinMax(MI); + return lowerAddSubSatToAddoSubo(MI); } + case G_SADDSAT: + case G_SSUBSAT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + + // FIXME: It would probably make more sense to see if G_SADDO is preferred, + // since it's a shorter expansion. However, we would need to figure out the + // preferred boolean type for the carry out for the query. + if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty})) + return lowerAddSubSatToMinMax(MI); + return lowerAddSubSatToAddoSubo(MI); + } + case G_SSHLSAT: + case G_USHLSAT: + return lowerShlSat(MI); + case G_ABS: { + // Expand %res = G_ABS %a into: + // %v1 = G_ASHR %a, scalar_size-1 + // %v2 = G_ADD %a, %v1 + // %res = G_XOR %v2, %v1 + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register OpReg = MI.getOperand(1).getReg(); + auto ShiftAmt = + MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); + auto Shift = + MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); + auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); + MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); + MI.eraseFromParent(); + return Legalized; + } + case G_SELECT: + return lowerSelect(MI); + } +} + +Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty, + Align MinAlign) const { + // FIXME: We're missing a way to go back from LLT to llvm::Type to query the + // datalayout for the preferred alignment. Also there should be a target hook + // for this to allow targets to reduce the alignment and ignore the + // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of + // the type. + return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign); +} + +MachineInstrBuilder +LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, + MachinePointerInfo &PtrInfo) { + MachineFunction &MF = MIRBuilder.getMF(); + const DataLayout &DL = MIRBuilder.getDataLayout(); + int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); + + unsigned AddrSpace = DL.getAllocaAddrSpace(); + LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); + + PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); + return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); +} + +static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, + LLT VecTy) { + int64_t IdxVal; + if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) + return IdxReg; + + LLT IdxTy = B.getMRI()->getType(IdxReg); + unsigned NElts = VecTy.getNumElements(); + if (isPowerOf2_32(NElts)) { + APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); + return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); + } + + return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1)) + .getReg(0); +} + +Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, + Register Index) { + LLT EltTy = VecTy.getElementType(); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltTy.getSizeInBits() && + "Converting bits to bytes lost precision"); + + Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); + + LLT IdxTy = MRI.getType(Index); + auto Mul = MIRBuilder.buildMul(IdxTy, Index, + MIRBuilder.buildConstant(IdxTy, EltSize)); + + LLT PtrTy = MRI.getType(VecPtr); + return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); } LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - SmallVector<Register, 2> DstRegs; - - unsigned NarrowSize = NarrowTy.getSizeInBits(); Register DstReg = MI.getOperand(0).getReg(); - unsigned Size = MRI.getType(DstReg).getSizeInBits(); - int NumParts = Size / NarrowSize; - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (Size % NarrowSize != 0) - return UnableToLegalize; + LLT DstTy = MRI.getType(DstReg); + LLT LCMTy = getLCMType(DstTy, NarrowTy); - for (int i = 0; i < NumParts; ++i) { - Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUndef(TmpReg); - DstRegs.push_back(TmpReg); - } + unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); + auto NewUndef = MIRBuilder.buildUndef(NarrowTy); + SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); MI.eraseFromParent(); return Legalized; } @@ -2838,7 +3345,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) return UnableToLegalize; - NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); + NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType()); } else { NumParts = DstTy.getNumElements(); NarrowTy1 = SrcTy.getElementType(); @@ -3111,63 +3618,116 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, return Legalized; } +// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces +// a vector +// +// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with +// undef as necessary. +// +// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 +// -> <2 x s16> +// +// %4:_(s16) = G_IMPLICIT_DEF +// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 +// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 +// %7:_(<2 x s16>) = G_IMPLICIT_DEF +// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 +// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, - unsigned TypeIdx, - LLT NarrowTy) { - assert(TypeIdx == 0 && "not a vector type index"); +LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = DstTy.getElementType(); + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); - int DstNumElts = DstTy.getNumElements(); - int NarrowNumElts = NarrowTy.getNumElements(); - int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; - LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); + // Break into a common type + SmallVector<Register, 16> Parts; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); - SmallVector<Register, 8> ConcatOps; - SmallVector<Register, 8> SubBuildVector; + // Build the requested new merge, padding with undef. + LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, + TargetOpcode::G_ANYEXT); - Register UndefReg; - if (WidenedDstTy != DstTy) - UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + // Pack into the original result register. + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as - // necessary. - // - // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 - // -> <2 x s16> - // - // %4:_(s16) = G_IMPLICIT_DEF - // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 - // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 - // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 - // %3:_(<3 x s16>) = G_EXTRACT %7, 0 - for (int I = 0; I != NumConcat; ++I) { - for (int J = 0; J != NarrowNumElts; ++J) { - int SrcIdx = NarrowNumElts * I + J; - - if (SrcIdx < DstNumElts) { - Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); - SubBuildVector.push_back(SrcReg); - } else - SubBuildVector.push_back(UndefReg); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowVecTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register InsertVal; + bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; + + assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); + if (IsInsert) + InsertVal = MI.getOperand(2).getReg(); + + Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); + + // TODO: Handle total scalarization case. + if (!NarrowVecTy.isVector()) + return UnableToLegalize; + + LLT VecTy = MRI.getType(SrcVec); + + // If the index is a constant, we can really break this down as you would + // expect, and index into the target size pieces. + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + // Avoid out of bounds indexing the pieces. + if (IdxVal >= VecTy.getNumElements()) { + MIRBuilder.buildUndef(DstReg); + MI.eraseFromParent(); + return Legalized; } - auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); - ConcatOps.push_back(BuildVec.getReg(0)); - SubBuildVector.clear(); - } + SmallVector<Register, 8> VecParts; + LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); + + // Build a sequence of NarrowTy pieces in VecParts for this operand. + LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, + TargetOpcode::G_ANYEXT); + + unsigned NewNumElts = NarrowVecTy.getNumElements(); - if (DstTy == WidenedDstTy) - MIRBuilder.buildConcatVectors(DstReg, ConcatOps); - else { - auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); - MIRBuilder.buildExtract(DstReg, Concat, 0); + LLT IdxTy = MRI.getType(Idx); + int64_t PartIdx = IdxVal / NewNumElts; + auto NewIdx = + MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); + + if (IsInsert) { + LLT PartTy = MRI.getType(VecParts[PartIdx]); + + // Use the adjusted index to insert into one of the subvectors. + auto InsertPart = MIRBuilder.buildInsertVectorElement( + PartTy, VecParts[PartIdx], InsertVal, NewIdx); + VecParts[PartIdx] = InsertPart.getReg(0); + + // Recombine the inserted subvector with the others to reform the result + // vector. + buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); + } else { + MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); + } + + MI.eraseFromParent(); + return Legalized; } - MI.eraseFromParent(); - return Legalized; + // With a variable index, we can't perform the operation in a smaller type, so + // we're forced to expand this. + // + // TODO: We could emit a chain of compare/select to figure out which piece to + // index. + return lowerExtractInsertVectorElt(MI); } LegalizerHelper::LegalizeResult @@ -3213,7 +3773,8 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, if (NumParts == -1) return UnableToLegalize; - const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); + LLT PtrTy = MRI.getType(AddrReg); + const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); unsigned TotalSize = ValTy.getSizeInBits(); @@ -3411,6 +3972,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_ADD: case G_SUB: case G_MUL: + case G_PTR_ADD: case G_SMULH: case G_UMULH: case G_FADD: @@ -3434,6 +3996,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FFLOOR: case G_FRINT: case G_INTRINSIC_ROUND: + case G_INTRINSIC_ROUNDEVEN: case G_INTRINSIC_TRUNC: case G_FCOS: case G_FSIN: @@ -3465,6 +4028,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_SHL: case G_LSHR: case G_ASHR: + case G_SSHLSAT: + case G_USHLSAT: case G_CTLZ: case G_CTLZ_ZERO_UNDEF: case G_CTTZ: @@ -3495,7 +4060,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: - return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); + assert(TypeIdx == 0 && "not a vector type index"); + return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); + case G_CONCAT_VECTORS: + if (TypeIdx != 1) // TODO: This probably does work as expected already. + return UnableToLegalize; + return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); + case G_EXTRACT_VECTOR_ELT: + case G_INSERT_VECTOR_ELT: + return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); @@ -4267,9 +4840,9 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { +LegalizerHelper::lowerBitCount(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); - auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + const auto &TII = MIRBuilder.getTII(); auto isSupported = [this](const LegalityQuery &Q) { auto QAction = LI.getAction(Q).Action; return QAction == Legal || QAction == Libcall || QAction == Custom; @@ -4357,15 +4930,15 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // Ref: "Hacker's Delight" by Henry Warren - auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); - auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1); + auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1); + auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1); auto MIBTmp = MIRBuilder.buildAnd( - Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1)); - if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && - isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { - auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); + SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1)); + if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) && + isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) { + auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len); MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen, - MIRBuilder.buildCTLZ(Ty, MIBTmp)); + MIRBuilder.buildCTLZ(SrcTy, MIBTmp)); MI.eraseFromParent(); return Legalized; } @@ -4374,6 +4947,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_CTPOP: { + Register SrcReg = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(SrcReg); unsigned Size = Ty.getSizeInBits(); MachineIRBuilder &B = MIRBuilder; @@ -4383,11 +4958,11 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // B2Count = val - { (val >> 1) & 0x55555555 } // since it gives same result in blocks of 2 with one instruction less. auto C_1 = B.buildConstant(Ty, 1); - auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1); + auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1); APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55)); auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0); auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0); - auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi); + auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi); // In order to get count in blocks of 4 add values from adjacent block of 2. // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 } @@ -4486,8 +5061,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); @@ -4515,8 +5089,7 @@ LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); @@ -4562,8 +5135,7 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); @@ -4780,7 +5352,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { +LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -4795,6 +5367,20 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } +// TODO: If RHS is a constant SelectionDAGBuilder expands this into a +// multiplication tree. +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Dst); + + auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); + MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags()); + MI.eraseFromParent(); + return Legalized; +} + static CmpInst::Predicate minMaxToCompare(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: @@ -4810,8 +5396,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) { } } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); @@ -4827,7 +5412,7 @@ LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { +LegalizerHelper::lowerFCopySign(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); @@ -5049,6 +5634,71 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { return Legalized; } +/// Lower a vector extract or insert by writing the vector to a stack temporary +/// and reloading the element or vector. +/// +/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx +/// => +/// %stack_temp = G_FRAME_INDEX +/// G_STORE %vec, %stack_temp +/// %idx = clamp(%idx, %vec.getNumElements()) +/// %element_ptr = G_PTR_ADD %stack_temp, %idx +/// %dst = G_LOAD %element_ptr +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register InsertVal; + if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) + InsertVal = MI.getOperand(2).getReg(); + + Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); + + LLT VecTy = MRI.getType(SrcVec); + LLT EltTy = VecTy.getElementType(); + if (!EltTy.isByteSized()) { // Not implemented. + LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); + return UnableToLegalize; + } + + unsigned EltBytes = EltTy.getSizeInBytes(); + Align VecAlign = getStackTemporaryAlignment(VecTy); + Align EltAlign; + + MachinePointerInfo PtrInfo; + auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), + VecAlign, PtrInfo); + MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); + + // Get the pointer to the element, and be sure not to hit undefined behavior + // if the index is out of bounds. + Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); + + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + int64_t Offset = IdxVal * EltBytes; + PtrInfo = PtrInfo.getWithOffset(Offset); + EltAlign = commonAlignment(VecAlign, Offset); + } else { + // We lose information with a variable offset. + EltAlign = getStackTemporaryAlignment(EltTy); + PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace()); + } + + if (InsertVal) { + // Write the inserted element + MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign); + + // Reload the whole vector. + MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign); + } else { + MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign); + } + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { Register DstReg = MI.getOperand(0).getReg(); @@ -5119,7 +5769,6 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { LLT PtrTy = MRI.getType(Dst); LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); - const auto &TLI = *MF.getSubtarget().getTargetLowering(); Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); @@ -5265,6 +5914,185 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { } LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + bool IsSigned; + bool IsAdd; + unsigned BaseOp; + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected addsat/subsat opcode"); + case TargetOpcode::G_UADDSAT: + IsSigned = false; + IsAdd = true; + BaseOp = TargetOpcode::G_ADD; + break; + case TargetOpcode::G_SADDSAT: + IsSigned = true; + IsAdd = true; + BaseOp = TargetOpcode::G_ADD; + break; + case TargetOpcode::G_USUBSAT: + IsSigned = false; + IsAdd = false; + BaseOp = TargetOpcode::G_SUB; + break; + case TargetOpcode::G_SSUBSAT: + IsSigned = true; + IsAdd = false; + BaseOp = TargetOpcode::G_SUB; + break; + } + + if (IsSigned) { + // sadd.sat(a, b) -> + // hi = 0x7fffffff - smax(a, 0) + // lo = 0x80000000 - smin(a, 0) + // a + smin(smax(lo, b), hi) + // ssub.sat(a, b) -> + // lo = smax(a, -1) - 0x7fffffff + // hi = smin(a, -1) - 0x80000000 + // a - smin(smax(lo, b), hi) + // TODO: AMDGPU can use a "median of 3" instruction here: + // a +/- med3(lo, b, hi) + uint64_t NumBits = Ty.getScalarSizeInBits(); + auto MaxVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); + auto MinVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); + MachineInstrBuilder Hi, Lo; + if (IsAdd) { + auto Zero = MIRBuilder.buildConstant(Ty, 0); + Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); + Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); + } else { + auto NegOne = MIRBuilder.buildConstant(Ty, -1); + Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), + MaxVal); + Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), + MinVal); + } + auto RHSClamped = + MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); + MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); + } else { + // uadd.sat(a, b) -> a + umin(~a, b) + // usub.sat(a, b) -> a - umin(a, b) + Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; + auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); + MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + LLT BoolTy = Ty.changeElementSize(1); + bool IsSigned; + bool IsAdd; + unsigned OverflowOp; + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected addsat/subsat opcode"); + case TargetOpcode::G_UADDSAT: + IsSigned = false; + IsAdd = true; + OverflowOp = TargetOpcode::G_UADDO; + break; + case TargetOpcode::G_SADDSAT: + IsSigned = true; + IsAdd = true; + OverflowOp = TargetOpcode::G_SADDO; + break; + case TargetOpcode::G_USUBSAT: + IsSigned = false; + IsAdd = false; + OverflowOp = TargetOpcode::G_USUBO; + break; + case TargetOpcode::G_SSUBSAT: + IsSigned = true; + IsAdd = false; + OverflowOp = TargetOpcode::G_SSUBO; + break; + } + + auto OverflowRes = + MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); + Register Tmp = OverflowRes.getReg(0); + Register Ov = OverflowRes.getReg(1); + MachineInstrBuilder Clamp; + if (IsSigned) { + // sadd.sat(a, b) -> + // {tmp, ov} = saddo(a, b) + // ov ? (tmp >>s 31) + 0x80000000 : r + // ssub.sat(a, b) -> + // {tmp, ov} = ssubo(a, b) + // ov ? (tmp >>s 31) + 0x80000000 : r + uint64_t NumBits = Ty.getScalarSizeInBits(); + auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); + auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); + auto MinVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); + Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); + } else { + // uadd.sat(a, b) -> + // {tmp, ov} = uaddo(a, b) + // ov ? 0xffffffff : tmp + // usub.sat(a, b) -> + // {tmp, ov} = usubo(a, b) + // ov ? 0 : tmp + Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); + } + MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerShlSat(MachineInstr &MI) { + assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || + MI.getOpcode() == TargetOpcode::G_USHLSAT) && + "Expected shlsat opcode!"); + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + LLT BoolTy = Ty.changeElementSize(1); + + unsigned BW = Ty.getScalarSizeInBits(); + auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); + auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) + : MIRBuilder.buildLShr(Ty, Result, RHS); + + MachineInstrBuilder SatVal; + if (IsSigned) { + auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); + auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); + auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, + MIRBuilder.buildConstant(Ty, 0)); + SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); + } else { + SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); + } + auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig); + MIRBuilder.buildSelect(Res, Ov, SatVal, Result); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -5344,8 +6172,6 @@ LegalizerHelper::lowerBitreverse(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { MachineFunction &MF = MIRBuilder.getMF(); - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const TargetLowering *TLI = STI.getTargetLowering(); bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER; int NameOpIdx = IsRead ? 1 : 0; @@ -5356,7 +6182,7 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { const MDString *RegStr = cast<MDString>( cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0)); - Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF); + Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); if (!PhysReg.isValid()) return UnableToLegalize; @@ -5368,3 +6194,63 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH; + unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + Register Result = MI.getOperand(0).getReg(); + LLT OrigTy = MRI.getType(Result); + auto SizeInBits = OrigTy.getScalarSizeInBits(); + LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2); + + auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)}); + auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)}); + auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS); + unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR; + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits); + auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt}); + MIRBuilder.buildTrunc(Result, Shifted); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { + // Implement vector G_SELECT in terms of XOR, AND, OR. + Register DstReg = MI.getOperand(0).getReg(); + Register MaskReg = MI.getOperand(1).getReg(); + Register Op1Reg = MI.getOperand(2).getReg(); + Register Op2Reg = MI.getOperand(3).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT MaskTy = MRI.getType(MaskReg); + LLT Op1Ty = MRI.getType(Op1Reg); + if (!DstTy.isVector()) + return UnableToLegalize; + + // Vector selects can have a scalar predicate. If so, splat into a vector and + // finish for later legalization attempts to try again. + if (MaskTy.isScalar()) { + Register MaskElt = MaskReg; + if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) + MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); + // Generate a vector splat idiom to be pattern matched later. + auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(ShufSplat.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + + if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { + return UnableToLegalize; + } + + auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); + auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); + auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); + MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); + MI.eraseFromParent(); + return Legalized; +} diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 4abd0c4df97a..30acac14bc5f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -105,6 +105,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const { static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q, const std::pair<unsigned, LLT> &Mutation) { switch (Rule.getAction()) { + case Legal: case Custom: case Lower: case MoreElements: @@ -122,7 +123,7 @@ static bool mutationIsSane(const LegalizeRule &Rule, std::pair<unsigned, LLT> Mutation) { // If the user wants a custom mutation, then we can't really say much about // it. Return true, and trust that they're doing the right thing. - if (Rule.getAction() == Custom) + if (Rule.getAction() == Custom || Rule.getAction() == Legal) return true; const unsigned TypeIdx = Mutation.first; @@ -147,7 +148,8 @@ static bool mutationIsSane(const LegalizeRule &Rule, if (NewTy.getNumElements() <= OldElts) return false; } - } + } else if (Rule.getAction() == MoreElements) + return false; // Make sure the element type didn't change. return NewTy.getScalarType() == OldTy.getScalarType(); diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index a07416d08614..30c00c63f6f4 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -11,6 +11,7 @@ #include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -56,6 +57,20 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, return InsertMBB == Def.getParent(); } +bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const { + MachineInstr *MI = Op.getParent(); + if (!MI->isPHI()) + return false; + + Register SrcReg = Op.getReg(); + for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) { + auto &MO = MI->getOperand(Idx); + if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg) + return true; + } + return false; +} + bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedSetVecT &LocalizedInstrs) { bool Changed = false; @@ -93,6 +108,14 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedInstrs.insert(&MI); continue; } + + // If the use is a phi operand that's not unique, don't try to localize. + // If we do, we can cause unnecessary instruction bloat by duplicating + // into each predecessor block, when the existing one is sufficient and + // allows for easier optimization later. + if (isNonUniquePhiValue(MOUse)) + continue; + LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); Changed = true; auto MBBAndReg = std::make_pair(InsertMBB, Reg); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 10f696d6a3b3..67ef02a4e7b2 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -9,8 +9,8 @@ /// This file implements the MachineIRBuidler class. //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" - #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -106,8 +106,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, } else if (auto *CFP = dyn_cast<ConstantFP>(&C)) { MIB.addFPImm(CFP); } else { - // Insert %noreg if we didn't find a usable constant and had to drop it. - MIB.addReg(0U); + // Insert $noreg if we didn't find a usable constant and had to drop it. + MIB.addReg(Register()); } MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); @@ -162,6 +162,11 @@ MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy, .addJumpTableIndex(JTI); } +void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) { + assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); + assert((Res == Op0) && "type mismatch"); +} + void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0, const LLT Op1) { assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); @@ -312,17 +317,29 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, return buildFConstant(Res, *CFP); } -MachineInstrBuilder MachineIRBuilder::buildBrCond(Register Tst, +MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst, MachineBasicBlock &Dest) { - assert(getMRI()->getType(Tst).isScalar() && "invalid operand type"); + assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type"); - return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest); + auto MIB = buildInstr(TargetOpcode::G_BRCOND); + Tst.addSrcToMIB(MIB); + MIB.addMBB(&Dest); + return MIB; } -MachineInstrBuilder MachineIRBuilder::buildLoad(const DstOp &Res, - const SrcOp &Addr, - MachineMemOperand &MMO) { - return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO); +MachineInstrBuilder +MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr, + MachinePointerInfo PtrInfo, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo) { + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); + + uint64_t Size = MemoryLocation::getSizeOrUnknown( + TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes())); + MachineMemOperand *MMO = + getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + return buildLoad(Dst, Addr, *MMO); } MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode, @@ -369,6 +386,21 @@ MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val, return MIB; } +MachineInstrBuilder +MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr, + MachinePointerInfo PtrInfo, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo) { + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + uint64_t Size = MemoryLocation::getSizeOrUnknown( + TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes())); + MachineMemOperand *MMO = + getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + return buildStore(Val, Addr, *MMO); +} + MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res, const SrcOp &Op) { return buildInstr(TargetOpcode::G_ANYEXT, Res, Op); @@ -603,6 +635,35 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec); } +MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, + const SrcOp &Src) { + LLT DstTy = Res.getLLTTy(*getMRI()); + assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() && + "Expected Src to match Dst elt ty"); + auto UndefVec = buildUndef(DstTy); + auto Zero = buildConstant(LLT::scalar(64), 0); + auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero); + SmallVector<int, 16> ZeroMask(DstTy.getNumElements()); + return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); +} + +MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, + const SrcOp &Src1, + const SrcOp &Src2, + ArrayRef<int> Mask) { + LLT DstTy = Res.getLLTTy(*getMRI()); + LLT Src1Ty = Src1.getLLTTy(*getMRI()); + LLT Src2Ty = Src2.getLLTTy(*getMRI()); + assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size()); + assert(DstTy.getElementType() == Src1Ty.getElementType() && + DstTy.getElementType() == Src2Ty.getElementType()); + (void)Src1Ty; + (void)Src2Ty; + ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask); + return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2}) + .addShuffleMask(MaskAlloc); +} + MachineInstrBuilder MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) { // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>, @@ -925,6 +986,14 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI())); break; } + case TargetOpcode::G_FNEG: + case TargetOpcode::G_ABS: + // All these are unary ops. + assert(DstOps.size() == 1 && "Invalid Dst"); + assert(SrcOps.size() == 1 && "Invalid Srcs"); + validateUnaryOp(DstOps[0].getLLTTy(*getMRI()), + SrcOps[0].getLLTTy(*getMRI())); + break; case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -953,7 +1022,9 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, } case TargetOpcode::G_SHL: case TargetOpcode::G_ASHR: - case TargetOpcode::G_LSHR: { + case TargetOpcode::G_LSHR: + case TargetOpcode::G_USHLSAT: + case TargetOpcode::G_SSHLSAT: { assert(DstOps.size() == 1 && "Invalid Dst"); assert(SrcOps.size() == 2 && "Invalid Srcs"); validateShiftOp(DstOps[0].getLLTTy(*getMRI()), @@ -1018,11 +1089,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, case TargetOpcode::G_UNMERGE_VALUES: { assert(!DstOps.empty() && "Invalid trivial sequence"); assert(SrcOps.size() == 1 && "Invalid src for Unmerge"); - assert(std::all_of(DstOps.begin(), DstOps.end(), - [&, this](const DstOp &Op) { - return Op.getLLTTy(*getMRI()) == - DstOps[0].getLLTTy(*getMRI()); - }) && + assert(llvm::all_of(DstOps, + [&, this](const DstOp &Op) { + return Op.getLLTTy(*getMRI()) == + DstOps[0].getLLTTy(*getMRI()); + }) && "type mismatch in output list"); assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() == SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && @@ -1032,11 +1103,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, case TargetOpcode::G_MERGE_VALUES: { assert(!SrcOps.empty() && "invalid trivial sequence"); assert(DstOps.size() == 1 && "Invalid Dst"); - assert(std::all_of(SrcOps.begin(), SrcOps.end(), - [&, this](const SrcOp &Op) { - return Op.getLLTTy(*getMRI()) == - SrcOps[0].getLLTTy(*getMRI()); - }) && + assert(llvm::all_of(SrcOps, + [&, this](const SrcOp &Op) { + return Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI()); + }) && "type mismatch in input list"); assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && @@ -1083,11 +1154,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, assert(DstOps.size() == 1 && "Invalid DstOps"); assert(DstOps[0].getLLTTy(*getMRI()).isVector() && "Res type must be a vector"); - assert(std::all_of(SrcOps.begin(), SrcOps.end(), - [&, this](const SrcOp &Op) { - return Op.getLLTTy(*getMRI()) == - SrcOps[0].getLLTTy(*getMRI()); - }) && + assert(llvm::all_of(SrcOps, + [&, this](const SrcOp &Op) { + return Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI()); + }) && "type mismatch in input list"); assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && @@ -1100,11 +1171,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, assert(DstOps.size() == 1 && "Invalid DstOps"); assert(DstOps[0].getLLTTy(*getMRI()).isVector() && "Res type must be a vector"); - assert(std::all_of(SrcOps.begin(), SrcOps.end(), - [&, this](const SrcOp &Op) { - return Op.getLLTTy(*getMRI()) == - SrcOps[0].getLLTTy(*getMRI()); - }) && + assert(llvm::all_of(SrcOps, + [&, this](const SrcOp &Op) { + return Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI()); + }) && "type mismatch in input list"); if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits()) @@ -1115,12 +1186,12 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, assert(DstOps.size() == 1 && "Invalid DstOps"); assert((!SrcOps.empty() || SrcOps.size() < 2) && "Must have at least 2 operands"); - assert(std::all_of(SrcOps.begin(), SrcOps.end(), - [&, this](const SrcOp &Op) { - return (Op.getLLTTy(*getMRI()).isVector() && - Op.getLLTTy(*getMRI()) == - SrcOps[0].getLLTTy(*getMRI())); - }) && + assert(llvm::all_of(SrcOps, + [&, this](const SrcOp &Op) { + return (Op.getLLTTy(*getMRI()).isVector() && + Op.getLLTTy(*getMRI()) == + SrcOps[0].getLLTTy(*getMRI())); + }) && "type mismatch in input list"); assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 255ea693b5c4..e2a963747101 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -421,8 +421,7 @@ RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const { // Then the alternative mapping, if any. InstructionMappings AltMappings = getInstrAlternativeMappings(MI); - for (const InstructionMapping *AltMapping : AltMappings) - PossibleMappings.push_back(AltMapping); + append_range(PossibleMappings, AltMappings); #ifndef NDEBUG for (const InstructionMapping *Mapping : PossibleMappings) assert(Mapping->verify(MI) && "Mapping is invalid"); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 8a7fb4fbbf2d..cd2483224489 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -11,8 +11,11 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/ADT/APFloat.h" -#include "llvm/ADT/Twine.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -20,13 +23,16 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "globalisel-utils" using namespace llvm; +using namespace MIPatternMatch; Register llvm::constrainRegToClass(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, @@ -42,7 +48,7 @@ Register llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, - const TargetRegisterClass &RegClass, const MachineOperand &RegMO) { + const TargetRegisterClass &RegClass, MachineOperand &RegMO) { Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); @@ -63,6 +69,13 @@ Register llvm::constrainOperandRegClass( TII.get(TargetOpcode::COPY), Reg) .addReg(ConstrainedReg); } + if (GISelChangeObserver *Observer = MF.getObserver()) { + Observer->changingInstr(*RegMO.getParent()); + } + RegMO.setReg(ConstrainedReg); + if (GISelChangeObserver *Observer = MF.getObserver()) { + Observer->changedInstr(*RegMO.getParent()); + } } else { if (GISelChangeObserver *Observer = MF.getObserver()) { if (!RegMO.isDef()) { @@ -80,7 +93,7 @@ Register llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, - const MachineOperand &RegMO, unsigned OpIdx) { + MachineOperand &RegMO, unsigned OpIdx) { Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); @@ -150,8 +163,7 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, // If the operand is a vreg, we should constrain its regclass, and only // insert COPYs if that's impossible. // constrainOperandRegClass does that for us. - MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), - MO, OpI)); + constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI); // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been // done. @@ -180,6 +192,14 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg, bool llvm::isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI) { + // FIXME: This logical is mostly duplicated with + // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in + // MachineInstr::isLabel? + + // Don't delete frame allocation labels. + if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) + return false; + // If we can move an instruction, we can remove it. Otherwise, it has // a side-effect of some sort. bool SawStore = false; @@ -242,8 +262,8 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } -Optional<int64_t> llvm::getConstantVRegVal(Register VReg, - const MachineRegisterInfo &MRI) { +Optional<APInt> llvm::getConstantVRegVal(Register VReg, + const MachineRegisterInfo &MRI) { Optional<ValueAndVReg> ValAndVReg = getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false); assert((!ValAndVReg || ValAndVReg->VReg == VReg) && @@ -253,9 +273,17 @@ Optional<int64_t> llvm::getConstantVRegVal(Register VReg, return ValAndVReg->Value; } +Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg, + const MachineRegisterInfo &MRI) { + Optional<APInt> Val = getConstantVRegVal(VReg, MRI); + if (Val && Val->getBitWidth() <= 64) + return Val->getSExtValue(); + return None; +} + Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, - bool HandleFConstant) { + bool HandleFConstant, bool LookThroughAnyExt) { SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes; MachineInstr *MI; auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { @@ -282,6 +310,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && LookThroughInstrs) { switch (MI->getOpcode()) { + case TargetOpcode::G_ANYEXT: + if (!LookThroughAnyExt) + return None; + LLVM_FALLTHROUGH; case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: @@ -315,6 +347,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( case TargetOpcode::G_TRUNC: Val = Val.trunc(OpcodeAndSize.second); break; + case TargetOpcode::G_ANYEXT: case TargetOpcode::G_SEXT: Val = Val.sext(OpcodeAndSize.second); break; @@ -324,13 +357,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( } } - if (Val.getBitWidth() > 64) - return None; - - return ValueAndVReg{Val.getSExtValue(), VReg}; + return ValueAndVReg{Val, VReg}; } -const llvm::ConstantFP * +const ConstantFP * llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { MachineInstr *MI = MRI.getVRegDef(VReg); if (TargetOpcode::G_FCONSTANT != MI->getOpcode()) @@ -338,15 +368,8 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { return MI->getOperand(1).getFPImm(); } -namespace { -struct DefinitionAndSourceRegister { - llvm::MachineInstr *MI; - Register Reg; -}; -} // namespace - -static llvm::Optional<DefinitionAndSourceRegister> -getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { +Optional<DefinitionAndSourceRegister> +llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { Register DefSrcReg = Reg; auto *DefMI = MRI.getVRegDef(Reg); auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); @@ -355,7 +378,7 @@ getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { while (DefMI->getOpcode() == TargetOpcode::COPY) { Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); - if (!SrcTy.isValid() || SrcTy != DstTy) + if (!SrcTy.isValid()) break; DefMI = MRI.getVRegDef(SrcReg); DefSrcReg = SrcReg; @@ -363,8 +386,8 @@ getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { return DefinitionAndSourceRegister{DefMI, DefSrcReg}; } -llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, - const MachineRegisterInfo &MRI) { +MachineInstr *llvm::getDefIgnoringCopies(Register Reg, + const MachineRegisterInfo &MRI) { Optional<DefinitionAndSourceRegister> DefSrcReg = getDefSrcRegIgnoringCopies(Reg, MRI); return DefSrcReg ? DefSrcReg->MI : nullptr; @@ -377,8 +400,8 @@ Register llvm::getSrcRegIgnoringCopies(Register Reg, return DefSrcReg ? DefSrcReg->Reg : Register(); } -llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg, - const MachineRegisterInfo &MRI) { +MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg, + const MachineRegisterInfo &MRI) { MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI); return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr; } @@ -407,9 +430,8 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, if (!MaybeOp1Cst) return None; - LLT Ty = MRI.getType(Op1); - APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); - APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true); + const APInt &C1 = *MaybeOp1Cst; + const APInt &C2 = *MaybeOp2Cst; switch (Opcode) { default: break; @@ -458,7 +480,8 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, if (!DefMI) return false; - if (DefMI->getFlag(MachineInstr::FmNoNans)) + const TargetMachine& TM = DefMI->getMF()->getTarget(); + if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) return true; if (SNaN) { @@ -489,75 +512,304 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF, return Align(1); } +Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, + const TargetInstrInfo &TII, + MCRegister PhysReg, + const TargetRegisterClass &RC, + LLT RegTy) { + DebugLoc DL; // FIXME: Is no location the right choice? + MachineBasicBlock &EntryMBB = MF.front(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register LiveIn = MRI.getLiveInVirtReg(PhysReg); + if (LiveIn) { + MachineInstr *Def = MRI.getVRegDef(LiveIn); + if (Def) { + // FIXME: Should the verifier check this is in the entry block? + assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block"); + return LiveIn; + } + + // It's possible the incoming argument register and copy was added during + // lowering, but later deleted due to being/becoming dead. If this happens, + // re-insert the copy. + } else { + // The live in register was not present, so add it. + LiveIn = MF.addLiveIn(PhysReg, &RC); + if (RegTy.isValid()) + MRI.setType(LiveIn, RegTy); + } + + BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn) + .addReg(PhysReg); + if (!EntryMBB.isLiveIn(PhysReg)) + EntryMBB.addLiveIn(PhysReg); + return LiveIn; +} + Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI) { auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); if (MaybeOp1Cst) { - LLT Ty = MRI.getType(Op1); - APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); switch (Opcode) { default: break; - case TargetOpcode::G_SEXT_INREG: - return C1.trunc(Imm).sext(C1.getBitWidth()); + case TargetOpcode::G_SEXT_INREG: { + LLT Ty = MRI.getType(Op1); + return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits()); + } } } return None; } +bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB) { + Optional<DefinitionAndSourceRegister> DefSrcReg = + getDefSrcRegIgnoringCopies(Reg, MRI); + if (!DefSrcReg) + return false; + + const MachineInstr &MI = *DefSrcReg->MI; + const LLT Ty = MRI.getType(Reg); + + switch (MI.getOpcode()) { + case TargetOpcode::G_CONSTANT: { + unsigned BitWidth = Ty.getScalarSizeInBits(); + const ConstantInt *CI = MI.getOperand(1).getCImm(); + return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2(); + } + case TargetOpcode::G_SHL: { + // A left-shift of a constant one will have exactly one bit set because + // shifting the bit off the end is undefined. + + // TODO: Constant splat + if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { + if (*ConstLHS == 1) + return true; + } + + break; + } + case TargetOpcode::G_LSHR: { + if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) { + if (ConstLHS->isSignMask()) + return true; + } + + break; + } + default: + break; + } + + // TODO: Are all operands of a build vector constant powers of two? + if (!KB) + return false; + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to computeKnownBits to catch other known cases. + KnownBits Known = KB->getKnownBits(Reg); + return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1); +} + void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } -LLT llvm::getLCMType(LLT Ty0, LLT Ty1) { - if (!Ty0.isVector() && !Ty1.isVector()) { - unsigned Mul = Ty0.getSizeInBits() * Ty1.getSizeInBits(); - int GCDSize = greatestCommonDivisor(Ty0.getSizeInBits(), - Ty1.getSizeInBits()); - return LLT::scalar(Mul / GCDSize); - } +static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) { + unsigned Mul = OrigSize * TargetSize; + unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize); + return Mul / GCDSize; +} + +LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { + const unsigned OrigSize = OrigTy.getSizeInBits(); + const unsigned TargetSize = TargetTy.getSizeInBits(); + + if (OrigSize == TargetSize) + return OrigTy; + + if (OrigTy.isVector()) { + const LLT OrigElt = OrigTy.getElementType(); - if (Ty0.isVector() && !Ty1.isVector()) { - assert(Ty0.getElementType() == Ty1 && "not yet handled"); - return Ty0; + if (TargetTy.isVector()) { + const LLT TargetElt = TargetTy.getElementType(); + + if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { + int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + // Prefer the original element type. + int Mul = OrigTy.getNumElements() * TargetTy.getNumElements(); + return LLT::vector(Mul / GCDElts, OrigTy.getElementType()); + } + } else { + if (OrigElt.getSizeInBits() == TargetSize) + return OrigTy; + } + + unsigned LCMSize = getLCMSize(OrigSize, TargetSize); + return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt); } - if (Ty1.isVector() && !Ty0.isVector()) { - assert(Ty1.getElementType() == Ty0 && "not yet handled"); - return Ty1; + if (TargetTy.isVector()) { + unsigned LCMSize = getLCMSize(OrigSize, TargetSize); + return LLT::vector(LCMSize / OrigSize, OrigTy); } - if (Ty0.isVector() && Ty1.isVector()) { - assert(Ty0.getElementType() == Ty1.getElementType() && "not yet handled"); + unsigned LCMSize = getLCMSize(OrigSize, TargetSize); + + // Preserve pointer types. + if (LCMSize == OrigSize) + return OrigTy; + if (LCMSize == TargetSize) + return TargetTy; - int GCDElts = greatestCommonDivisor(Ty0.getNumElements(), - Ty1.getNumElements()); + return LLT::scalar(LCMSize); +} - int Mul = Ty0.getNumElements() * Ty1.getNumElements(); - return LLT::vector(Mul / GCDElts, Ty0.getElementType()); +LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { + const unsigned OrigSize = OrigTy.getSizeInBits(); + const unsigned TargetSize = TargetTy.getSizeInBits(); + + if (OrigSize == TargetSize) + return OrigTy; + + if (OrigTy.isVector()) { + LLT OrigElt = OrigTy.getElementType(); + if (TargetTy.isVector()) { + LLT TargetElt = TargetTy.getElementType(); + if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigElt); + } + } else { + // If the source is a vector of pointers, return a pointer element. + if (OrigElt.getSizeInBits() == TargetSize) + return OrigElt; + } + + unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); + if (GCD == OrigElt.getSizeInBits()) + return OrigElt; + + // If we can't produce the original element type, we have to use a smaller + // scalar. + if (GCD < OrigElt.getSizeInBits()) + return LLT::scalar(GCD); + return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt); + } + + if (TargetTy.isVector()) { + // Try to preserve the original element type. + LLT TargetElt = TargetTy.getElementType(); + if (TargetElt.getSizeInBits() == OrigSize) + return OrigTy; } - llvm_unreachable("not yet handled"); + unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); + return LLT::scalar(GCD); } -LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { - if (OrigTy.isVector() && TargetTy.isVector()) { - assert(OrigTy.getElementType() == TargetTy.getElementType()); - int GCD = greatestCommonDivisor(OrigTy.getNumElements(), - TargetTy.getNumElements()); - return LLT::scalarOrVector(GCD, OrigTy.getElementType()); +Optional<int> llvm::getSplatIndex(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Only G_SHUFFLE_VECTOR can have a splat index!"); + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); + auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; }); + + // If all elements are undefined, this shuffle can be considered a splat. + // Return 0 for better potential for callers to simplify. + if (FirstDefinedIdx == Mask.end()) + return 0; + + // Make sure all remaining elements are either undef or the same + // as the first non-undef value. + int SplatValue = *FirstDefinedIdx; + if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()), + [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; })) + return None; + + return SplatValue; +} + +static bool isBuildVectorOp(unsigned Opcode) { + return Opcode == TargetOpcode::G_BUILD_VECTOR || + Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC; +} + +// TODO: Handle mixed undef elements. +static bool isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + int64_t SplatValue) { + if (!isBuildVectorOp(MI.getOpcode())) + return false; + + const unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I != NumOps; ++I) { + Register Element = MI.getOperand(I).getReg(); + if (!mi_match(Element, MRI, m_SpecificICst(SplatValue))) + return false; } - if (OrigTy.isVector() && !TargetTy.isVector()) { - assert(OrigTy.getElementType() == TargetTy); - return TargetTy; + return true; +} + +Optional<int64_t> +llvm::getBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + if (!isBuildVectorOp(MI.getOpcode())) + return None; + + const unsigned NumOps = MI.getNumOperands(); + Optional<int64_t> Scalar; + for (unsigned I = 1; I != NumOps; ++I) { + Register Element = MI.getOperand(I).getReg(); + int64_t ElementValue; + if (!mi_match(Element, MRI, m_ICst(ElementValue))) + return None; + if (!Scalar) + Scalar = ElementValue; + else if (*Scalar != ElementValue) + return None; } - assert(!OrigTy.isVector() && !TargetTy.isVector() && - "GCD type of vector and scalar not implemented"); + return Scalar; +} + +bool llvm::isBuildVectorAllZeros(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return isBuildVectorConstantSplat(MI, MRI, 0); +} - int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), - TargetTy.getSizeInBits()); - return LLT::scalar(GCD); +bool llvm::isBuildVectorAllOnes(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return isBuildVectorConstantSplat(MI, MRI, -1); +} + +bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, + bool IsFP) { + switch (TLI.getBooleanContents(IsVector, IsFP)) { + case TargetLowering::UndefinedBooleanContent: + return Val & 0x1; + case TargetLowering::ZeroOrOneBooleanContent: + return Val == 1; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return Val == -1; + } + llvm_unreachable("Invalid boolean contents"); +} + +int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, + bool IsFP) { + switch (TLI.getBooleanContents(IsVector, IsFP)) { + case TargetLowering::UndefinedBooleanContent: + case TargetLowering::ZeroOrOneBooleanContent: + return 1; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return -1; + } + llvm_unreachable("Invalid boolean contents"); } diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index 1e20c02ba160..6c1ce4c1efb0 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -223,8 +223,9 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // FIXME: Find better heuristics llvm::stable_sort( Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { - return DL.getTypeAllocSize(GV1->getValueType()) < - DL.getTypeAllocSize(GV2->getValueType()); + // We don't support scalable global variables. + return DL.getTypeAllocSize(GV1->getValueType()).getFixedSize() < + DL.getTypeAllocSize(GV2->getValueType()).getFixedSize(); }); // If we want to just blindly group all globals together, do so. diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp index 0ba7e920e507..810b10c9c82a 100644 --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -165,7 +165,7 @@ namespace { Value *InitLoopCount(); // Insert the set_loop_iteration intrinsic. - void InsertIterationSetup(Value *LoopCountInit); + Value *InsertIterationSetup(Value *LoopCountInit); // Insert the loop_decrement intrinsic. void InsertLoopDec(); @@ -187,7 +187,7 @@ namespace { const DataLayout &DL, OptimizationRemarkEmitter *ORE) : SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), - ExitCount(Info.ExitCount), + TripCount(Info.TripCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), LoopDecrement(Info.LoopDecrement), @@ -202,7 +202,7 @@ namespace { OptimizationRemarkEmitter *ORE = nullptr; Loop *L = nullptr; Module *M = nullptr; - const SCEV *ExitCount = nullptr; + const SCEV *TripCount = nullptr; Type *CountType = nullptr; BranchInst *ExitBranch = nullptr; Value *LoopDecrement = nullptr; @@ -234,7 +234,7 @@ bool HardwareLoops::runOnFunction(Function &F) { for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { Loop *L = *I; - if (!L->getParentLoop()) + if (L->isOutermost()) TryConvertLoop(L); } @@ -298,7 +298,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { } assert( - (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && + (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) && "Hardware Loop must have set exit info."); BasicBlock *Preheader = L->getLoopPreheader(); @@ -325,11 +325,11 @@ void HardwareLoop::Create() { return; } - InsertIterationSetup(LoopCountInit); + Value *Setup = InsertIterationSetup(LoopCountInit); if (UsePHICounter || ForceHardwareLoopPHI) { Instruction *LoopDec = InsertLoopRegDec(LoopCountInit); - Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec); + Value *EltsRem = InsertPHICounter(Setup, LoopDec); LoopDec->setOperand(0, EltsRem); UpdateBranch(LoopDec); } else @@ -383,18 +383,13 @@ Value *HardwareLoop::InitLoopCount() { // loop counter and tests that is not zero? SCEVExpander SCEVE(SE, DL, "loopcnt"); - if (!ExitCount->getType()->isPointerTy() && - ExitCount->getType() != CountType) - ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); - - ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); // If we're trying to use the 'test and set' form of the intrinsic, we need // to replace a conditional branch that is controlling entry to the loop. It // is likely (guaranteed?) that the preheader has an unconditional branch to // the loop header, so also check if it has a single predecessor. - if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, - SE.getZero(ExitCount->getType()))) { + if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount, + SE.getZero(TripCount->getType()))) { LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); UseLoopGuard |= ForceGuardLoopEntry; } else @@ -402,16 +397,23 @@ Value *HardwareLoop::InitLoopCount() { BasicBlock *BB = L->getLoopPreheader(); if (UseLoopGuard && BB->getSinglePredecessor() && - cast<BranchInst>(BB->getTerminator())->isUnconditional()) - BB = BB->getSinglePredecessor(); + cast<BranchInst>(BB->getTerminator())->isUnconditional()) { + BasicBlock *Predecessor = BB->getSinglePredecessor(); + // If it's not safe to create a while loop then don't force it and create a + // do-while loop instead + if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE)) + UseLoopGuard = false; + else + BB = Predecessor; + } - if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { - LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " - << *ExitCount << "\n"); + if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) { + LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " + << *TripCount << "\n"); return nullptr; } - Value *Count = SCEVE.expandCodeFor(ExitCount, CountType, + Value *Count = SCEVE.expandCodeFor(TripCount, CountType, BB->getTerminator()); // FIXME: We've expanded Count where we hope to insert the counter setting @@ -430,11 +432,13 @@ Value *HardwareLoop::InitLoopCount() { return Count; } -void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { +Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { IRBuilder<> Builder(BeginBB->getTerminator()); Type *Ty = LoopCountInit->getType(); - Intrinsic::ID ID = UseLoopGuard ? - Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations; + bool UsePhi = UsePHICounter || ForceHardwareLoopPHI; + Intrinsic::ID ID = UseLoopGuard ? Intrinsic::test_set_loop_iterations + : (UsePhi ? Intrinsic::start_loop_iterations + : Intrinsic::set_loop_iterations); Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty); Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit); @@ -450,6 +454,7 @@ void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { } LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *SetCount << "\n"); + return UseLoopGuard ? LoopCountInit : SetCount; } void HardwareLoop::InsertLoopDec() { diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 1a5c5d685017..37be2eabf5fe 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -751,7 +751,7 @@ bool IfConverter::CountDuplicatedInstructions( // A pred-clobbering instruction in the shared portion prevents // if-conversion. std::vector<MachineOperand> PredDefs; - if (TII->DefinesPredicate(*TIB, PredDefs)) + if (TII->ClobbersPredicate(*TIB, PredDefs, false)) return false; // If we get all the way to the branch instructions, don't count them. if (!TIB->isBranch()) @@ -1146,7 +1146,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI, // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are // still potentially predicable. std::vector<MachineOperand> PredDefs; - if (TII->DefinesPredicate(MI, PredDefs)) + if (TII->ClobbersPredicate(MI, PredDefs, true)) BBI.ClobbersPred = true; if (!TII->isPredicable(MI)) { @@ -2264,8 +2264,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { if (ToBBI.IsBrAnalyzable) ToBBI.BB->normalizeSuccProbs(); - SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(), - FromMBB.succ_end()); + SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.successors()); MachineBasicBlock *NBB = getNextBlock(FromMBB); MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; // The edge probability from ToBBI.BB to FromMBB, which is only needed when diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 16c9bfc672af..5cdaa9b74e80 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -200,10 +200,16 @@ class ImplicitNullChecks : public MachineFunctionPass { unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts); + /// Returns true if \p DependenceMI can clobber the liveIns in NullSucc block + /// if it was hoisted to the NullCheck block. This is used by caller + /// canHoistInst to decide if DependenceMI can be hoisted safely. + bool canDependenceHoistingClobberLiveIns(MachineInstr *DependenceMI, + MachineBasicBlock *NullSucc); + /// Return true if \p FaultingMI can be hoisted from after the /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a - /// non-null value if we also need to (and legally can) hoist a depedency. - bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg, + /// non-null value if we also need to (and legally can) hoist a dependency. + bool canHoistInst(MachineInstr *FaultingMI, ArrayRef<MachineInstr *> InstsSeenSoFar, MachineBasicBlock *NullSucc, MachineInstr *&Dependence); @@ -275,12 +281,12 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A, // between A and B here -- for instance, we should not be dealing with heap // load-store dependencies here. - for (auto MOA : A->operands()) { + for (const auto &MOA : A->operands()) { if (!(MOA.isReg() && MOA.getReg())) continue; Register RegA = MOA.getReg(); - for (auto MOB : B->operands()) { + for (const auto &MOB : B->operands()) { if (!(MOB.isReg() && MOB.getReg())) continue; @@ -347,11 +353,9 @@ ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI, return AR_MayAlias; continue; } - llvm::AliasResult AAResult = - AA->alias(MemoryLocation(MMO1->getValue(), LocationSize::unknown(), - MMO1->getAAInfo()), - MemoryLocation(MMO2->getValue(), LocationSize::unknown(), - MMO2->getAAInfo())); + llvm::AliasResult AAResult = AA->alias( + MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()), + MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo())); if (AAResult != NoAlias) return AR_MayAlias; } @@ -363,23 +367,105 @@ ImplicitNullChecks::SuitabilityResult ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) { - int64_t Offset; - bool OffsetIsScalable; - const MachineOperand *BaseOp; + // Implementation restriction for faulting_op insertion + // TODO: This could be relaxed if we find a test case which warrants it. + if (MI.getDesc().getNumDefs() > 1) + return SR_Unsuitable; + if (!MI.mayLoadOrStore() || MI.isPredicable()) + return SR_Unsuitable; + auto AM = TII->getAddrModeFromMemoryOp(MI, TRI); + if (!AM) + return SR_Unsuitable; + auto AddrMode = *AM; + const Register BaseReg = AddrMode.BaseReg, ScaledReg = AddrMode.ScaledReg; + int64_t Displacement = AddrMode.Displacement; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI) || - !BaseOp->isReg() || BaseOp->getReg() != PointerReg) + // We need the base of the memory instruction to be same as the register + // where the null check is performed (i.e. PointerReg). + if (BaseReg != PointerReg && ScaledReg != PointerReg) + return SR_Unsuitable; + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); + unsigned PointerRegSizeInBits = TRI->getRegSizeInBits(PointerReg, MRI); + // Bail out of the sizes of BaseReg, ScaledReg and PointerReg are not the + // same. + if ((BaseReg && + TRI->getRegSizeInBits(BaseReg, MRI) != PointerRegSizeInBits) || + (ScaledReg && + TRI->getRegSizeInBits(ScaledReg, MRI) != PointerRegSizeInBits)) return SR_Unsuitable; - // FIXME: This algorithm assumes instructions have fixed-size offsets. - if (OffsetIsScalable) + // Returns true if RegUsedInAddr is used for calculating the displacement + // depending on addressing mode. Also calculates the Displacement. + auto CalculateDisplacementFromAddrMode = [&](Register RegUsedInAddr, + int64_t Multiplier) { + // The register can be NoRegister, which is defined as zero for all targets. + // Consider instruction of interest as `movq 8(,%rdi,8), %rax`. Here the + // ScaledReg is %rdi, while there is no BaseReg. + if (!RegUsedInAddr) + return false; + assert(Multiplier && "expected to be non-zero!"); + MachineInstr *ModifyingMI = nullptr; + for (auto It = std::next(MachineBasicBlock::const_reverse_iterator(&MI)); + It != MI.getParent()->rend(); It++) { + const MachineInstr *CurrMI = &*It; + if (CurrMI->modifiesRegister(RegUsedInAddr, TRI)) { + ModifyingMI = const_cast<MachineInstr *>(CurrMI); + break; + } + } + if (!ModifyingMI) + return false; + // Check for the const value defined in register by ModifyingMI. This means + // all other previous values for that register has been invalidated. + int64_t ImmVal; + if (!TII->getConstValDefinedInReg(*ModifyingMI, RegUsedInAddr, ImmVal)) + return false; + // Calculate the reg size in bits, since this is needed for bailing out in + // case of overflow. + int32_t RegSizeInBits = TRI->getRegSizeInBits(RegUsedInAddr, MRI); + APInt ImmValC(RegSizeInBits, ImmVal, true /*IsSigned*/); + APInt MultiplierC(RegSizeInBits, Multiplier); + assert(MultiplierC.isStrictlyPositive() && + "expected to be a positive value!"); + bool IsOverflow; + // Sign of the product depends on the sign of the ImmVal, since Multiplier + // is always positive. + APInt Product = ImmValC.smul_ov(MultiplierC, IsOverflow); + if (IsOverflow) + return false; + APInt DisplacementC(64, Displacement, true /*isSigned*/); + DisplacementC = Product.sadd_ov(DisplacementC, IsOverflow); + if (IsOverflow) + return false; + + // We only handle diplacements upto 64 bits wide. + if (DisplacementC.getActiveBits() > 64) + return false; + Displacement = DisplacementC.getSExtValue(); + return true; + }; + + // If a register used in the address is constant, fold it's effect into the + // displacement for ease of analysis. + bool BaseRegIsConstVal = false, ScaledRegIsConstVal = false; + if (CalculateDisplacementFromAddrMode(BaseReg, 1)) + BaseRegIsConstVal = true; + if (CalculateDisplacementFromAddrMode(ScaledReg, AddrMode.Scale)) + ScaledRegIsConstVal = true; + + // The register which is not null checked should be part of the Displacement + // calculation, otherwise we do not know whether the Displacement is made up + // by some symbolic values. + // This matters because we do not want to incorrectly assume that load from + // falls in the zeroth faulting page in the "sane offset check" below. + if ((BaseReg && BaseReg != PointerReg && !BaseRegIsConstVal) || + (ScaledReg && ScaledReg != PointerReg && !ScaledRegIsConstVal)) return SR_Unsuitable; // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. - if (!(MI.mayLoadOrStore() && !MI.isPredicable() && - -PageSize < Offset && Offset < PageSize)) + if (!(-PageSize < Displacement && Displacement < PageSize)) return SR_Unsuitable; // Finally, check whether the current memory access aliases with previous one. @@ -393,8 +479,39 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, return SR_Suitable; } +bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns( + MachineInstr *DependenceMI, MachineBasicBlock *NullSucc) { + for (const auto &DependenceMO : DependenceMI->operands()) { + if (!(DependenceMO.isReg() && DependenceMO.getReg())) + continue; + + // Make sure that we won't clobber any live ins to the sibling block by + // hoisting Dependency. For instance, we can't hoist INST to before the + // null check (even if it safe, and does not violate any dependencies in + // the non_null_block) if %rdx is live in to _null_block. + // + // test %rcx, %rcx + // je _null_block + // _non_null_block: + // %rdx = INST + // ... + // + // This restriction does not apply to the faulting load inst because in + // case the pointer loaded from is in the null page, the load will not + // semantically execute, and affect machine state. That is, if the load + // was loading into %rax and it faults, the value of %rax should stay the + // same as it would have been had the load not have executed and we'd have + // branched to NullSucc directly. + if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg())) + return true; + + } + + // The dependence does not clobber live-ins in NullSucc block. + return false; +} + bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, - unsigned PointerReg, ArrayRef<MachineInstr *> InstsSeenSoFar, MachineBasicBlock *NullSucc, MachineInstr *&Dependence) { @@ -419,37 +536,8 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, if (DependenceMI->mayLoadOrStore()) return false; - for (auto &DependenceMO : DependenceMI->operands()) { - if (!(DependenceMO.isReg() && DependenceMO.getReg())) - continue; - - // Make sure that we won't clobber any live ins to the sibling block by - // hoisting Dependency. For instance, we can't hoist INST to before the - // null check (even if it safe, and does not violate any dependencies in - // the non_null_block) if %rdx is live in to _null_block. - // - // test %rcx, %rcx - // je _null_block - // _non_null_block: - // %rdx = INST - // ... - // - // This restriction does not apply to the faulting load inst because in - // case the pointer loaded from is in the null page, the load will not - // semantically execute, and affect machine state. That is, if the load - // was loading into %rax and it faults, the value of %rax should stay the - // same as it would have been had the load not have executed and we'd have - // branched to NullSucc directly. - if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg())) - return false; - - // The Dependency can't be re-defining the base register -- then we won't - // get the memory operation on the address we want. This is already - // checked in \c IsSuitableMemoryOp. - assert(!(DependenceMO.isDef() && - TRI->regsOverlap(DependenceMO.getReg(), PointerReg)) && - "Should have been checked before!"); - } + if (canDependenceHoistingClobberLiveIns(DependenceMI, NullSucc)) + return false; auto DepDepResult = computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr}); @@ -486,9 +574,9 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( MBP.Predicate == MachineBranchPredicate::PRED_EQ))) return false; - // If we cannot erase the test instruction itself, then making the null check - // implicit does not buy us much. - if (!MBP.SingleUseCondition) + // If there is a separate condition generation instruction, we chose not to + // transform unless we can remove both condition and consuming branch. + if (MBP.ConditionDef && !MBP.SingleUseCondition) return false; MachineBasicBlock *NotNullSucc, *NullSucc; @@ -506,32 +594,34 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( if (NotNullSucc->pred_size() != 1) return false; - // To prevent the invalid transformation of the following code: - // - // mov %rax, %rcx - // test %rax, %rax - // %rax = ... - // je throw_npe - // mov(%rcx), %r9 - // mov(%rax), %r10 - // - // into: - // - // mov %rax, %rcx - // %rax = .... - // faulting_load_op("movl (%rax), %r10", throw_npe) - // mov(%rcx), %r9 - // - // we must ensure that there are no instructions between the 'test' and - // conditional jump that modify %rax. const Register PointerReg = MBP.LHS.getReg(); - assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block"); - - for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I) - if (I->modifiesRegister(PointerReg, TRI)) - return false; + if (MBP.ConditionDef) { + // To prevent the invalid transformation of the following code: + // + // mov %rax, %rcx + // test %rax, %rax + // %rax = ... + // je throw_npe + // mov(%rcx), %r9 + // mov(%rax), %r10 + // + // into: + // + // mov %rax, %rcx + // %rax = .... + // faulting_load_op("movl (%rax), %r10", throw_npe) + // mov(%rcx), %r9 + // + // we must ensure that there are no instructions between the 'test' and + // conditional jump that modify %rax. + assert(MBP.ConditionDef->getParent() == &MBB && + "Should be in basic block"); + for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I) + if (I->modifiesRegister(PointerReg, TRI)) + return false; + } // Starting with a code fragment like: // // test %rax, %rax @@ -597,17 +687,15 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( if (SR == SR_Impossible) return false; if (SR == SR_Suitable && - canHoistInst(&MI, PointerReg, InstsSeenSoFar, NullSucc, Dependence)) { + canHoistInst(&MI, InstsSeenSoFar, NullSucc, Dependence)) { NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc, NullSucc, Dependence); return true; } - // If MI re-defines the PointerReg then we cannot move further. - if (llvm::any_of(MI.operands(), [&](MachineOperand &MO) { - return MO.isReg() && MO.getReg() && MO.isDef() && - TRI->regsOverlap(MO.getReg(), PointerReg); - })) + // If MI re-defines the PointerReg in a way that changes the value of + // PointerReg if it was null, then we cannot move further. + if (!TII->preservesZeroValueInReg(&MI, PointerReg, TRI)) return false; InstsSeenSoFar.push_back(&MI); } @@ -712,9 +800,11 @@ void ImplicitNullChecks::rewriteNullChecks( } NC.getMemOperation()->eraseFromParent(); - NC.getCheckOperation()->eraseFromParent(); + if (auto *CheckOp = NC.getCheckOperation()) + CheckOp->eraseFromParent(); - // Insert an *unconditional* branch to not-null successor. + // Insert an *unconditional* branch to not-null successor - we expect + // block placement to remove fallthroughs later. TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr, /*Cond=*/None, DL); diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 41eef2fed840..876e1d3f932a 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -153,7 +153,7 @@ public: unsigned Original); bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot); void hoistAllSpills(); - void LRE_DidCloneVirtReg(unsigned, unsigned) override; + void LRE_DidCloneVirtReg(Register, Register) override; }; class InlineSpiller : public Spiller { @@ -269,6 +269,14 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) { return Register(); } +static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) { + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg())) + LIS.getInterval(MO.getReg()); + } +} + /// isSnippet - Identify if a live interval is a snippet that should be spilled. /// It is assumed that SnipLI is a virtual register with the same original as /// Edit->getReg(). @@ -289,8 +297,9 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { // Check that all uses satisfy our criteria. for (MachineRegisterInfo::reg_instr_nodbg_iterator - RI = MRI.reg_instr_nodbg_begin(SnipLI.reg), - E = MRI.reg_instr_nodbg_end(); RI != E; ) { + RI = MRI.reg_instr_nodbg_begin(SnipLI.reg()), + E = MRI.reg_instr_nodbg_end(); + RI != E;) { MachineInstr &MI = *RI++; // Allow copies to/from Reg. @@ -299,11 +308,11 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { // Allow stack slot loads. int FI; - if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) + if (SnipLI.reg() == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) continue; // Allow stack slot stores. - if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) + if (SnipLI.reg() == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) continue; // Allow a single additional instruction. @@ -409,14 +418,21 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, MII = DefMI; ++MII; } + MachineInstrSpan MIS(MII, MBB); // Insert spill without kill flag immediately after def. TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot, MRI.getRegClass(SrcReg), &TRI); + LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); + for (const MachineInstr &MI : make_range(MIS.begin(), MII)) + getVDefInterval(MI, LIS); --MII; // Point to store instruction. - LIS.InsertMachineInstrInMaps(*MII); LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII); - HSpiller.addToMergeableSpills(*MII, StackSlot, Original); + // If there is only 1 store instruction is required for spill, add it + // to mergeable list. In X86 AMX, 2 intructions are required to store. + // We disable the merge for this case. + if (MIS.begin() == MII) + HSpiller.addToMergeableSpills(*MII, StackSlot, Original); ++NumSpills; return true; } @@ -432,7 +448,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { do { LiveInterval *LI; std::tie(LI, VNI) = WorkList.pop_back_val(); - Register Reg = LI->reg; + Register Reg = LI->reg(); LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); @@ -511,7 +527,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { if (!SnippetCopies.count(MI)) continue; LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); - assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); + assert(isRegToSpill(SnipLI.reg()) && "Unexpected register in copy"); VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true)); assert(SnipVNI && "Snippet undefined before copy"); WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); @@ -556,7 +572,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg, bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Analyze instruction SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; - VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg, &Ops); + VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg(), &Ops); if (!RI.Reads) return false; @@ -568,7 +584,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { LLVM_DEBUG(dbgs() << "\tadding <undef> flags: "); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) MO.setIsUndef(); } LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI); @@ -608,7 +624,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // If we can't guarantee that we'll be able to actually assign the new vreg, // we can't remat. - if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) { + if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg(), MI)) { markValueUsed(&VirtReg, ParentVNI); LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); return false; @@ -633,7 +649,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Replace operands for (const auto &OpPair : Ops) { MachineOperand &MO = OpPair.first->getOperand(OpPair.second); - if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) { MO.setReg(NewVReg); MO.setIsKill(); } @@ -810,6 +826,14 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, bool WasCopy = MI->isCopy(); Register ImpReg; + // TII::foldMemoryOperand will do what we need here for statepoint + // (fold load into use and remove corresponding def). We will replace + // uses of removed def with loads (spillAroundUses). + // For that to work we need to untie def and use to pass it through + // foldMemoryOperand and signal foldPatchpoint that it is allowed to + // fold them. + bool UntieRegs = MI->getOpcode() == TargetOpcode::STATEPOINT; + // Spill subregs if the target allows it. // We always want to spill subregs for stackmap/patchpoint pseudos. bool SpillSubRegs = TII.isSubregFoldable() || @@ -835,7 +859,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, if (LoadMI && MO.isDef()) return false; // Tied use operands should not be passed to foldMemoryOperand. - if (!MI->isRegTiedToDefOperand(Idx)) + if (UntieRegs || !MI->isRegTiedToDefOperand(Idx)) FoldOps.push_back(Idx); } @@ -846,11 +870,31 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, MachineInstrSpan MIS(MI, MI->getParent()); + SmallVector<std::pair<unsigned, unsigned> > TiedOps; + if (UntieRegs) + for (unsigned Idx : FoldOps) { + MachineOperand &MO = MI->getOperand(Idx); + if (!MO.isTied()) + continue; + unsigned Tied = MI->findTiedOperandIdx(Idx); + if (MO.isUse()) + TiedOps.emplace_back(Tied, Idx); + else { + assert(MO.isDef() && "Tied to not use and def?"); + TiedOps.emplace_back(Idx, Tied); + } + MI->untieRegOperand(Idx); + } + MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM); - if (!FoldMI) + if (!FoldMI) { + // Re-tie operands. + for (auto Tied : TiedOps) + MI->tieOperands(Tied.first, Tied.second); return false; + } // Remove LIS for any dead defs in the original MI not in FoldMI. for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) { @@ -869,7 +913,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); - LIS.removePhysRegDefAt(Reg, Idx); + LIS.removePhysRegDefAt(Reg.asMCReg(), Idx); } int FI; @@ -906,7 +950,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, ++NumFolded; else if (Ops.front().second == 0) { ++NumSpills; - HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original); + // If there is only 1 store instruction is required for spill, add it + // to mergeable list. In X86 AMX, 2 intructions are required to store. + // We disable the merge for this case. + if (std::distance(MIS.begin(), MIS.end()) <= 1) + HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original); } else ++NumReloads; return true; @@ -953,6 +1001,7 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill, MachineInstrSpan MIS(MI, &MBB); MachineBasicBlock::iterator SpillBefore = std::next(MI); bool IsRealSpill = isRealSpill(*MI); + if (IsRealSpill) TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot, MRI.getRegClass(NewVReg), &TRI); @@ -966,11 +1015,16 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill, MachineBasicBlock::iterator Spill = std::next(MI); LIS.InsertMachineInstrRangeInMaps(Spill, MIS.end()); + for (const MachineInstr &MI : make_range(Spill, MIS.end())) + getVDefInterval(MI, LIS); LLVM_DEBUG( dumpMachineInstrRangeWithSlotIndex(Spill, MIS.end(), LIS, "spill")); ++NumSpills; - if (IsRealSpill) + // If there is only 1 store instruction is required for spill, add it + // to mergeable list. In X86 AMX, 2 intructions are required to store. + // We disable the merge for this case. + if (IsRealSpill && std::distance(Spill, MIS.end()) <= 1) HSpiller.addToMergeableSpills(*Spill, StackSlot, Original); } @@ -1160,7 +1214,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, // save a copy of LiveInterval in StackSlotToOrigLI because the original // LiveInterval may be cleared after all its references are spilled. if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) { - auto LI = std::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight); + auto LI = std::make_unique<LiveInterval>(OrigLI.reg(), OrigLI.weight()); LI->assign(OrigLI, Allocator); StackSlotToOrigLI[StackSlot] = std::move(LI); } @@ -1188,7 +1242,7 @@ bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill, bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, MachineBasicBlock &BB, Register &LiveReg) { SlotIndex Idx; - Register OrigReg = OrigLI.reg; + Register OrigReg = OrigLI.reg(); MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB); if (MI != BB.end()) Idx = LIS.getInstructionIndex(*MI); @@ -1516,10 +1570,13 @@ void HoistSpillHelper::hoistAllSpills() { for (auto const &Insert : SpillsToIns) { MachineBasicBlock *BB = Insert.first; Register LiveReg = Insert.second; - MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB); - TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot, + MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(OrigLI, *BB); + MachineInstrSpan MIS(MII, BB); + TII.storeRegToStackSlot(*BB, MII, LiveReg, false, Slot, MRI.getRegClass(LiveReg), &TRI); - LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI); + LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); + for (const MachineInstr &MI : make_range(MIS.begin(), MII)) + getVDefInterval(MI, LIS); ++NumSpills; } @@ -1539,11 +1596,13 @@ void HoistSpillHelper::hoistAllSpills() { /// For VirtReg clone, the \p New register should have the same physreg or /// stackslot as the \p old register. -void HoistSpillHelper::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { +void HoistSpillHelper::LRE_DidCloneVirtReg(Register New, Register Old) { if (VRM.hasPhys(Old)) VRM.assignVirt2Phys(New, VRM.getPhys(Old)); else if (VRM.getStackSlot(Old) != VirtRegMap::NO_STACK_SLOT) VRM.assignVirt2StackSlot(New, VRM.getStackSlot(Old)); else llvm_unreachable("VReg should be assigned either physreg or stackslot"); + if (VRM.hasShape(Old)) + VRM.assignVirt2Shape(New, VRM.getShape(Old)); } diff --git a/llvm/lib/CodeGen/InterferenceCache.cpp b/llvm/lib/CodeGen/InterferenceCache.cpp index 7b50dac4cd1a..a56485cdbc67 100644 --- a/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/llvm/lib/CodeGen/InterferenceCache.cpp @@ -12,19 +12,15 @@ #include "InterferenceCache.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> #include <cstdint> -#include <cstdlib> #include <tuple> using namespace llvm; @@ -64,8 +60,8 @@ void InterferenceCache::init(MachineFunction *mf, Entries[i].clear(mf, indexes, lis); } -InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { - unsigned E = PhysRegEntries[PhysReg]; +InterferenceCache::Entry *InterferenceCache::get(MCRegister PhysReg) { + unsigned char E = PhysRegEntries[PhysReg.id()]; if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { if (!Entries[E].valid(LIUArray, TRI)) Entries[E].revalidate(LIUArray, TRI); @@ -101,7 +97,7 @@ void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray, RegUnits[i].VirtTag = LIUArray[*Units].getTag(); } -void InterferenceCache::Entry::reset(unsigned physReg, +void InterferenceCache::Entry::reset(MCRegister physReg, LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI, const MachineFunction *MF) { diff --git a/llvm/lib/CodeGen/InterferenceCache.h b/llvm/lib/CodeGen/InterferenceCache.h index 9019e9f61fa0..ace1691c1363 100644 --- a/llvm/lib/CodeGen/InterferenceCache.h +++ b/llvm/lib/CodeGen/InterferenceCache.h @@ -44,7 +44,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { /// of PhysReg in all basic blocks. class Entry { /// PhysReg - The register currently represented. - unsigned PhysReg = 0; + MCRegister PhysReg = 0; /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions /// change. @@ -102,13 +102,13 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) { assert(!hasRefs() && "Cannot clear cache entry with references"); - PhysReg = 0; + PhysReg = MCRegister::NoRegister; MF = mf; Indexes = indexes; LIS = lis; } - unsigned getPhysReg() const { return PhysReg; } + MCRegister getPhysReg() const { return PhysReg; } void addRef(int Delta) { RefCount += Delta; } @@ -120,10 +120,8 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); /// reset - Initialize entry to represent physReg's aliases. - void reset(unsigned physReg, - LiveIntervalUnion *LIUArray, - const TargetRegisterInfo *TRI, - const MachineFunction *MF); + void reset(MCRegister physReg, LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, const MachineFunction *MF); /// get - Return an up to date BlockInterference. BlockInterference *get(unsigned MBBNum) { @@ -154,7 +152,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { Entry Entries[CacheEntries]; // get - Get a valid entry for PhysReg. - Entry *get(unsigned PhysReg); + Entry *get(MCRegister PhysReg); public: InterferenceCache() = default; @@ -207,11 +205,11 @@ public: ~Cursor() { setEntry(nullptr); } /// setPhysReg - Point this cursor to PhysReg's interference. - void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { + void setPhysReg(InterferenceCache &Cache, MCRegister PhysReg) { // Release reference before getting a new one. That guarantees we can // actually have CacheEntries live cursors. setEntry(nullptr); - if (PhysReg) + if (PhysReg.isValid()) setEntry(Cache.get(PhysReg)); } diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index c4d83547a06c..b22e6faeb91c 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -22,8 +22,8 @@ // // E.g. An interleaved load (Factor = 2): // %wide.vec = load <8 x i32>, <8 x i32>* %ptr -// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6> -// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7> +// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6> +// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7> // // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2 // intrinsic in ARM backend. @@ -66,6 +66,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Local.h" #include <cassert> #include <utility> @@ -118,6 +119,15 @@ private: /// replacements are also performed. bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts, ArrayRef<ShuffleVectorInst *> Shuffles); + + /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them + /// to binop(shuffle(x), shuffle(y)) to allow the formation of an + /// interleaving load. Any newly created shuffles that operate on \p LI will + /// be added to \p Shuffles. Returns true, if any changes to the IR have been + /// made. + bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles, + SmallVectorImpl<ShuffleVectorInst *> &Shuffles, + LoadInst *LI); }; } // end anonymous namespace. @@ -283,67 +293,97 @@ bool InterleavedAccess::lowerInterleavedLoad( if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType())) return false; + // Check if all users of this load are shufflevectors. If we encounter any + // users that are extractelement instructions or binary operators, we save + // them to later check if they can be modified to extract from one of the + // shufflevectors instead of the load. + SmallVector<ShuffleVectorInst *, 4> Shuffles; SmallVector<ExtractElementInst *, 4> Extracts; + // BinOpShuffles need to be handled a single time in case both operands of the + // binop are the same load. + SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles; - // Check if all users of this load are shufflevectors. If we encounter any - // users that are extractelement instructions, we save them to later check if - // they can be modifed to extract from one of the shufflevectors instead of - // the load. - for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) { - auto *Extract = dyn_cast<ExtractElementInst>(*UI); + for (auto *User : LI->users()) { + auto *Extract = dyn_cast<ExtractElementInst>(User); if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) { Extracts.push_back(Extract); continue; } - ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI); + auto *BI = dyn_cast<BinaryOperator>(User); + if (BI && BI->hasOneUse()) { + if (auto *SVI = dyn_cast<ShuffleVectorInst>(*BI->user_begin())) { + BinOpShuffles.insert(SVI); + continue; + } + } + auto *SVI = dyn_cast<ShuffleVectorInst>(User); if (!SVI || !isa<UndefValue>(SVI->getOperand(1))) return false; Shuffles.push_back(SVI); } - if (Shuffles.empty()) + if (Shuffles.empty() && BinOpShuffles.empty()) return false; unsigned Factor, Index; unsigned NumLoadElements = cast<FixedVectorType>(LI->getType())->getNumElements(); + auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0]; // Check if the first shufflevector is DE-interleave shuffle. - if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index, - MaxFactor, NumLoadElements)) + if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor, + NumLoadElements)) return false; // Holds the corresponding index for each DE-interleave shuffle. SmallVector<unsigned, 4> Indices; - Indices.push_back(Index); - Type *VecTy = Shuffles[0]->getType(); + Type *VecTy = FirstSVI->getType(); // Check if other shufflevectors are also DE-interleaved of the same type // and factor as the first shufflevector. - for (unsigned i = 1; i < Shuffles.size(); i++) { - if (Shuffles[i]->getType() != VecTy) + for (auto *Shuffle : Shuffles) { + if (Shuffle->getType() != VecTy) return false; - - if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor, + if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor, Index)) return false; + assert(Shuffle->getShuffleMask().size() <= NumLoadElements); Indices.push_back(Index); } + for (auto *Shuffle : BinOpShuffles) { + if (Shuffle->getType() != VecTy) + return false; + if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor, + Index)) + return false; + + assert(Shuffle->getShuffleMask().size() <= NumLoadElements); + + if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI) + Indices.push_back(Index); + if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI) + Indices.push_back(Index); + } // Try and modify users of the load that are extractelement instructions to // use the shufflevector instructions instead of the load. if (!tryReplaceExtracts(Extracts, Shuffles)) return false; + bool BinOpShuffleChanged = + replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI); + LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); // Try to create target specific intrinsics to replace the load and shuffles. - if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) - return false; + if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) { + // If Extracts is not empty, tryReplaceExtracts made changes earlier. + return !Extracts.empty() || BinOpShuffleChanged; + } for (auto SVI : Shuffles) DeadInsts.push_back(SVI); @@ -352,6 +392,39 @@ bool InterleavedAccess::lowerInterleavedLoad( return true; } +bool InterleavedAccess::replaceBinOpShuffles( + ArrayRef<ShuffleVectorInst *> BinOpShuffles, + SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) { + for (auto *SVI : BinOpShuffles) { + BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0)); + Type *BIOp0Ty = BI->getOperand(0)->getType(); + ArrayRef<int> Mask = SVI->getShuffleMask(); + assert(all_of(Mask, [&](int Idx) { + return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements(); + })); + + auto *NewSVI1 = + new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty), + Mask, SVI->getName(), SVI); + auto *NewSVI2 = new ShuffleVectorInst( + BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask, + SVI->getName(), SVI); + Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2, + BI->getName(), SVI); + SVI->replaceAllUsesWith(NewBI); + LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI + << "\n With : " << *NewSVI1 << "\n And : " + << *NewSVI2 << "\n And : " << *NewBI << "\n"); + RecursivelyDeleteTriviallyDeadInstructions(SVI); + if (NewSVI1->getOperand(0) == LI) + Shuffles.push_back(NewSVI1); + if (NewSVI2->getOperand(0) == LI) + Shuffles.push_back(NewSVI2); + } + + return !BinOpShuffles.empty(); +} + bool InterleavedAccess::tryReplaceExtracts( ArrayRef<ExtractElementInst *> Extracts, ArrayRef<ShuffleVectorInst *> Shuffles) { @@ -421,7 +494,7 @@ bool InterleavedAccess::lowerInterleavedStore( if (!SI->isSimple()) return false; - ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand()); + auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand()); if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType())) return false; @@ -461,10 +534,10 @@ bool InterleavedAccess::runOnFunction(Function &F) { bool Changed = false; for (auto &I : instructions(F)) { - if (LoadInst *LI = dyn_cast<LoadInst>(&I)) + if (auto *LI = dyn_cast<LoadInst>(&I)) Changed |= lowerInterleavedLoad(LI, DeadInsts); - if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + if (auto *SI = dyn_cast<StoreInst>(&I)) Changed |= lowerInterleavedStore(SI, DeadInsts); } diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index f7131926ee65..ff3f93d51ea8 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -1104,10 +1104,8 @@ InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) { // All LIs are within the same BB. Select the first for a reference. BasicBlock *BB = (*LIs.begin())->getParent(); - BasicBlock::iterator FLI = - std::find_if(BB->begin(), BB->end(), [&LIs](Instruction &I) -> bool { - return is_contained(LIs, &I); - }); + BasicBlock::iterator FLI = llvm::find_if( + *BB, [&LIs](Instruction &I) -> bool { return is_contained(LIs, &I); }); assert(FLI != BB->end()); return cast<LoadInst>(FLI); @@ -1130,8 +1128,8 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, std::set<Instruction *> Is; std::set<Instruction *> SVIs; - unsigned InterleavedCost; - unsigned InstructionCost = 0; + InstructionCost InterleavedCost; + InstructionCost InstructionCost = 0; // Get the interleave factor unsigned Factor = InterleavedLoad.size(); @@ -1174,6 +1172,10 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, } } + // We need to have a valid cost in order to proceed. + if (!InstructionCost.isValid()) + return false; + // We know that all LoadInst are within the same BB. This guarantees that // either everything or nothing is loaded. LoadInst *First = findFirstLoad(LIs); @@ -1236,8 +1238,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, Mask.push_back(i + j * Factor); Builder.SetInsertPoint(VI.SVI); - auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()), - Mask, "interleaved.shuffle"); + auto SVI = Builder.CreateShuffleVector(LI, Mask, "interleaved.shuffle"); VI.SVI->replaceAllUsesWith(SVI); i++; } diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp index e37c21e76597..55089d3b90d0 100644 --- a/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -329,6 +329,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; case Intrinsic::assume: + case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::var_annotation: break; // Strip out these intrinsics diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index b485f2cf7261..f9b7bf613ff6 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -40,13 +40,16 @@ static cl::opt<bool> EnableTrapUnreachable("trap-unreachable", void LLVMTargetMachine::initAsmInfo() { MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str())); + assert(MRI && "Unable to create reg info"); MII.reset(TheTarget.createMCInstrInfo()); + assert(MII && "Unable to create instruction info"); // FIXME: Having an MCSubtargetInfo on the target machine is a hack due // to some backends having subtarget feature dependent module level // code generation. This is similar to the hack in the AsmPrinter for // module level assembly etc. STI.reset(TheTarget.createMCSubtargetInfo( getTargetTriple().str(), getTargetCPU(), getTargetFeatureString())); + assert(STI && "Unable to create subtarget info"); MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo( *MRI, getTargetTriple().str(), Options.MCOptions); @@ -58,6 +61,9 @@ void LLVMTargetMachine::initAsmInfo() { "Make sure you include the correct TargetSelect.h" "and that InitializeAllTargetMCs() is being invoked!"); + if (Options.BinutilsVersion.first > 0) + TmpAsmInfo->setBinutilsVersion(Options.BinutilsVersion); + if (Options.DisableIntegratedAS) TmpAsmInfo->setUseIntegratedAssembler(false); @@ -118,6 +124,24 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context) { + Expected<std::unique_ptr<MCStreamer>> MCStreamerOrErr = + createMCStreamer(Out, DwoOut, FileType, Context); + if (auto Err = MCStreamerOrErr.takeError()) + return true; + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = + getTarget().createAsmPrinter(*this, std::move(*MCStreamerOrErr)); + if (!Printer) + return true; + + PM.add(Printer); + return false; +} + +Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer( + raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, + MCContext &Context) { if (Options.MCOptions.MCSaveTempLabels) Context.setAllowTemporaryLabels(false); @@ -152,10 +176,14 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); + if (!MCE) + return make_error<StringError>("createMCCodeEmitter failed", + inconvertibleErrorCode()); MCAsmBackend *MAB = getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); - if (!MCE || !MAB) - return true; + if (!MAB) + return make_error<StringError>("createMCAsmBackend failed", + inconvertibleErrorCode()); Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( @@ -174,14 +202,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, break; } - // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. - FunctionPass *Printer = - getTarget().createAsmPrinter(*this, std::move(AsmStreamer)); - if (!Printer) - return true; - - PM.add(Printer); - return false; + return std::move(AsmStreamer); } bool LLVMTargetMachine::addPassesToEmitFile( @@ -196,20 +217,14 @@ bool LLVMTargetMachine::addPassesToEmitFile( if (!PassConfig) return true; - if (!TargetPassConfig::willCompleteCodeGenPipeline()) { - if (this->getTargetTriple().isOSAIX()) { - // On AIX, we might manifest MCSymbols during SDAG lowering. For MIR - // testing to be meaningful, we need to ensure that the symbols created - // are MCSymbolXCOFF variants, which requires that - // the TargetLoweringObjectFile instance has been initialized. - MCContext &Ctx = MMIWP->getMMI().getContext(); - const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering()) - .Initialize(Ctx, *this); - } - PM.add(createPrintMIRPass(Out)); - } else if (addAsmPrinter(PM, Out, DwoOut, FileType, - MMIWP->getMMI().getContext())) - return true; + if (TargetPassConfig::willCompleteCodeGenPipeline()) { + if (addAsmPrinter(PM, Out, DwoOut, FileType, MMIWP->getMMI().getContext())) + return true; + } else { + // MIR printing is redundant with -filetype=null. + if (FileType != CGFT_Null) + PM.add(createPrintMIRPass(Out)); + } PM.add(createFreeMachineFunctionPass()); return false; diff --git a/llvm/lib/CodeGen/LexicalScopes.cpp b/llvm/lib/CodeGen/LexicalScopes.cpp index 690b429832a5..8139c2cbb6cd 100644 --- a/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/llvm/lib/CodeGen/LexicalScopes.cpp @@ -324,7 +324,7 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) { Set = std::make_unique<BlockSetT>(); getMachineBasicBlocks(DL, *Set); } - return Set->count(MBB) != 0; + return Set->contains(MBB); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp new file mode 100644 index 000000000000..18ffe8ba0669 --- /dev/null +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -0,0 +1,3363 @@ +//===- InstrRefBasedImpl.cpp - Tracking Debug Value MIs -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file InstrRefBasedImpl.cpp +/// +/// This is a separate implementation of LiveDebugValues, see +/// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information. +/// +/// This pass propagates variable locations between basic blocks, resolving +/// control flow conflicts between them. The problem is much like SSA +/// construction, where each DBG_VALUE instruction assigns the *value* that +/// a variable has, and every instruction where the variable is in scope uses +/// that variable. The resulting map of instruction-to-value is then translated +/// into a register (or spill) location for each variable over each instruction. +/// +/// This pass determines which DBG_VALUE dominates which instructions, or if +/// none do, where values must be merged (like PHI nodes). The added +/// complication is that because codegen has already finished, a PHI node may +/// be needed for a variable location to be correct, but no register or spill +/// slot merges the necessary values. In these circumstances, the variable +/// location is dropped. +/// +/// What makes this analysis non-trivial is loops: we cannot tell in advance +/// whether a variable location is live throughout a loop, or whether its +/// location is clobbered (or redefined by another DBG_VALUE), without +/// exploring all the way through. +/// +/// To make this simpler we perform two kinds of analysis. First, we identify +/// every value defined by every instruction (ignoring those that only move +/// another value), then compute a map of which values are available for each +/// instruction. This is stronger than a reaching-def analysis, as we create +/// PHI values where other values merge. +/// +/// Secondly, for each variable, we effectively re-construct SSA using each +/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the +/// first analysis from the location they refer to. We can then compute the +/// dominance frontiers of where a variable has a value, and create PHI nodes +/// where they merge. +/// This isn't precisely SSA-construction though, because the function shape +/// is pre-defined. If a variable location requires a PHI node, but no +/// PHI for the relevant values is present in the function (as computed by the +/// first analysis), the location must be dropped. +/// +/// Once both are complete, we can pass back over all instructions knowing: +/// * What _value_ each variable should contain, either defined by an +/// instruction or where control flow merges +/// * What the location of that value is (if any). +/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when +/// a value moves location. After this pass runs, all variable locations within +/// a block should be specified by DBG_VALUEs within that block, allowing +/// DbgEntityHistoryCalculator to focus on individual blocks. +/// +/// This pass is able to go fast because the size of the first +/// reaching-definition analysis is proportional to the working-set size of +/// the function, which the compiler tries to keep small. (It's also +/// proportional to the number of blocks). Additionally, we repeatedly perform +/// the second reaching-definition analysis with only the variables and blocks +/// in a single lexical scope, exploiting their locality. +/// +/// Determining where PHIs happen is trickier with this approach, and it comes +/// to a head in the major problem for LiveDebugValues: is a value live-through +/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of +/// facts about a function, however this analysis needs to generate new value +/// numbers at joins. +/// +/// To do this, consider a lattice of all definition values, from instructions +/// and from PHIs. Each PHI is characterised by the RPO number of the block it +/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B): +/// with non-PHI values at the top, and any PHI value in the last block (by RPO +/// order) at the bottom. +/// +/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below, +/// "rank" always refers to the former). +/// +/// At any join, for each register, we consider: +/// * All incoming values, and +/// * The PREVIOUS live-in value at this join. +/// If all incoming values agree: that's the live-in value. If they do not, the +/// incoming values are ranked according to the partial order, and the NEXT +/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of +/// the same rank are ignored as conflicting). If there are no candidate values, +/// or if the rank of the live-in would be lower than the rank of the current +/// blocks PHIs, create a new PHI value. +/// +/// Intuitively: if it's not immediately obvious what value a join should result +/// in, we iteratively descend from instruction-definitions down through PHI +/// values, getting closer to the current block each time. If the current block +/// is a loop head, this ordering is effectively searching outer levels of +/// loops, to find a value that's live-through the current loop. +/// +/// If there is no value that's live-through this loop, a PHI is created for +/// this location instead. We can't use a lower-ranked PHI because by definition +/// it doesn't dominate the current block. We can't create a PHI value any +/// earlier, because we risk creating a PHI value at a location where values do +/// not in fact merge, thus misrepresenting the truth, and not making the true +/// live-through value for variable locations. +/// +/// This algorithm applies to both calculating the availability of values in +/// the first analysis, and the location of variables in the second. However +/// for the second we add an extra dimension of pain: creating a variable +/// location PHI is only valid if, for each incoming edge, +/// * There is a value for the variable on the incoming edge, and +/// * All the edges have that value in the same register. +/// Or put another way: we can only create a variable-location PHI if there is +/// a matching machine-location PHI, each input to which is the variables value +/// in the predecessor block. +/// +/// To accommodate this difference, each point on the lattice is split in +/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately +/// have a location determined are "definite" PHIs, and no further work is +/// needed. Otherwise, a location that all non-backedge predecessors agree +/// on is picked and propagated as a "proposed" PHI value. If that PHI value +/// is truly live-through, it'll appear on the loop backedges on the next +/// dataflow iteration, after which the block live-in moves to be a "definite" +/// PHI. If it's not truly live-through, the variable value will be downgraded +/// further as we explore the lattice, or remains "proposed" and is considered +/// invalid once dataflow completes. +/// +/// ### Terminology +/// +/// A machine location is a register or spill slot, a value is something that's +/// defined by an instruction or PHI node, while a variable value is the value +/// assigned to a variable. A variable location is a machine location, that must +/// contain the appropriate variable value. A value that is a PHI node is +/// occasionally called an mphi. +/// +/// The first dataflow problem is the "machine value location" problem, +/// because we're determining which machine locations contain which values. +/// The "locations" are constant: what's unknown is what value they contain. +/// +/// The second dataflow problem (the one for variables) is the "variable value +/// problem", because it's determining what values a variable has, rather than +/// what location those values are placed in. Unfortunately, it's not that +/// simple, because producing a PHI value always involves picking a location. +/// This is an imperfection that we just have to accept, at least for now. +/// +/// TODO: +/// Overlapping fragments +/// Entry values +/// Add back DEBUG statements for debugging this +/// Collect statistics +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TypeSize.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <queue> +#include <tuple> +#include <utility> +#include <vector> +#include <limits.h> +#include <limits> + +#include "LiveDebugValues.h" + +using namespace llvm; + +#define DEBUG_TYPE "livedebugvalues" + +STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); +STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed"); + +// Act more like the VarLoc implementation, by propagating some locations too +// far and ignoring some transfers. +static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden, + cl::desc("Act like old LiveDebugValues did"), + cl::init(false)); + +// Rely on isStoreToStackSlotPostFE and similar to observe all stack spills. +static cl::opt<bool> + ObserveAllStackops("observe-all-stack-ops", cl::Hidden, + cl::desc("Allow non-kill spill and restores"), + cl::init(false)); + +namespace { + +// The location at which a spilled value resides. It consists of a register and +// an offset. +struct SpillLoc { + unsigned SpillBase; + StackOffset SpillOffset; + bool operator==(const SpillLoc &Other) const { + return std::make_pair(SpillBase, SpillOffset) == + std::make_pair(Other.SpillBase, Other.SpillOffset); + } + bool operator<(const SpillLoc &Other) const { + return std::make_tuple(SpillBase, SpillOffset.getFixed(), + SpillOffset.getScalable()) < + std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(), + Other.SpillOffset.getScalable()); + } +}; + +class LocIdx { + unsigned Location; + + // Default constructor is private, initializing to an illegal location number. + // Use only for "not an entry" elements in IndexedMaps. + LocIdx() : Location(UINT_MAX) { } + +public: + #define NUM_LOC_BITS 24 + LocIdx(unsigned L) : Location(L) { + assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits"); + } + + static LocIdx MakeIllegalLoc() { + return LocIdx(); + } + + bool isIllegal() const { + return Location == UINT_MAX; + } + + uint64_t asU64() const { + return Location; + } + + bool operator==(unsigned L) const { + return Location == L; + } + + bool operator==(const LocIdx &L) const { + return Location == L.Location; + } + + bool operator!=(unsigned L) const { + return !(*this == L); + } + + bool operator!=(const LocIdx &L) const { + return !(*this == L); + } + + bool operator<(const LocIdx &Other) const { + return Location < Other.Location; + } +}; + +class LocIdxToIndexFunctor { +public: + using argument_type = LocIdx; + unsigned operator()(const LocIdx &L) const { + return L.asU64(); + } +}; + +/// Unique identifier for a value defined by an instruction, as a value type. +/// Casts back and forth to a uint64_t. Probably replacable with something less +/// bit-constrained. Each value identifies the instruction and machine location +/// where the value is defined, although there may be no corresponding machine +/// operand for it (ex: regmasks clobbering values). The instructions are +/// one-based, and definitions that are PHIs have instruction number zero. +/// +/// The obvious limits of a 1M block function or 1M instruction blocks are +/// problematic; but by that point we should probably have bailed out of +/// trying to analyse the function. +class ValueIDNum { + uint64_t BlockNo : 20; /// The block where the def happens. + uint64_t InstNo : 20; /// The Instruction where the def happens. + /// One based, is distance from start of block. + uint64_t LocNo : NUM_LOC_BITS; /// The machine location where the def happens. + +public: + // XXX -- temporarily enabled while the live-in / live-out tables are moved + // to something more type-y + ValueIDNum() : BlockNo(0xFFFFF), + InstNo(0xFFFFF), + LocNo(0xFFFFFF) { } + + ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc) + : BlockNo(Block), InstNo(Inst), LocNo(Loc) { } + + ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc) + : BlockNo(Block), InstNo(Inst), LocNo(Loc.asU64()) { } + + uint64_t getBlock() const { return BlockNo; } + uint64_t getInst() const { return InstNo; } + uint64_t getLoc() const { return LocNo; } + bool isPHI() const { return InstNo == 0; } + + uint64_t asU64() const { + uint64_t TmpBlock = BlockNo; + uint64_t TmpInst = InstNo; + return TmpBlock << 44ull | TmpInst << NUM_LOC_BITS | LocNo; + } + + static ValueIDNum fromU64(uint64_t v) { + uint64_t L = (v & 0x3FFF); + return {v >> 44ull, ((v >> NUM_LOC_BITS) & 0xFFFFF), L}; + } + + bool operator<(const ValueIDNum &Other) const { + return asU64() < Other.asU64(); + } + + bool operator==(const ValueIDNum &Other) const { + return std::tie(BlockNo, InstNo, LocNo) == + std::tie(Other.BlockNo, Other.InstNo, Other.LocNo); + } + + bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); } + + std::string asString(const std::string &mlocname) const { + return Twine("Value{bb: ") + .concat(Twine(BlockNo).concat( + Twine(", inst: ") + .concat((InstNo ? Twine(InstNo) : Twine("live-in")) + .concat(Twine(", loc: ").concat(Twine(mlocname))) + .concat(Twine("}"))))) + .str(); + } + + static ValueIDNum EmptyValue; +}; + +} // end anonymous namespace + +namespace { + +/// Meta qualifiers for a value. Pair of whatever expression is used to qualify +/// the the value, and Boolean of whether or not it's indirect. +class DbgValueProperties { +public: + DbgValueProperties(const DIExpression *DIExpr, bool Indirect) + : DIExpr(DIExpr), Indirect(Indirect) {} + + /// Extract properties from an existing DBG_VALUE instruction. + DbgValueProperties(const MachineInstr &MI) { + assert(MI.isDebugValue()); + DIExpr = MI.getDebugExpression(); + Indirect = MI.getOperand(1).isImm(); + } + + bool operator==(const DbgValueProperties &Other) const { + return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect); + } + + bool operator!=(const DbgValueProperties &Other) const { + return !(*this == Other); + } + + const DIExpression *DIExpr; + bool Indirect; +}; + +/// Tracker for what values are in machine locations. Listens to the Things +/// being Done by various instructions, and maintains a table of what machine +/// locations have what values (as defined by a ValueIDNum). +/// +/// There are potentially a much larger number of machine locations on the +/// target machine than the actual working-set size of the function. On x86 for +/// example, we're extremely unlikely to want to track values through control +/// or debug registers. To avoid doing so, MLocTracker has several layers of +/// indirection going on, with two kinds of ``location'': +/// * A LocID uniquely identifies a register or spill location, with a +/// predictable value. +/// * A LocIdx is a key (in the database sense) for a LocID and a ValueIDNum. +/// Whenever a location is def'd or used by a MachineInstr, we automagically +/// create a new LocIdx for a location, but not otherwise. This ensures we only +/// account for locations that are actually used or defined. The cost is another +/// vector lookup (of LocID -> LocIdx) over any other implementation. This is +/// fairly cheap, and the compiler tries to reduce the working-set at any one +/// time in the function anyway. +/// +/// Register mask operands completely blow this out of the water; I've just +/// piled hacks on top of hacks to get around that. +class MLocTracker { +public: + MachineFunction &MF; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + const TargetLowering &TLI; + + /// IndexedMap type, mapping from LocIdx to ValueIDNum. + using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>; + + /// Map of LocIdxes to the ValueIDNums that they store. This is tightly + /// packed, entries only exist for locations that are being tracked. + LocToValueType LocIdxToIDNum; + + /// "Map" of machine location IDs (i.e., raw register or spill number) to the + /// LocIdx key / number for that location. There are always at least as many + /// as the number of registers on the target -- if the value in the register + /// is not being tracked, then the LocIdx value will be zero. New entries are + /// appended if a new spill slot begins being tracked. + /// This, and the corresponding reverse map persist for the analysis of the + /// whole function, and is necessarying for decoding various vectors of + /// values. + std::vector<LocIdx> LocIDToLocIdx; + + /// Inverse map of LocIDToLocIdx. + IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID; + + /// Unique-ification of spill slots. Used to number them -- their LocID + /// number is the index in SpillLocs minus one plus NumRegs. + UniqueVector<SpillLoc> SpillLocs; + + // If we discover a new machine location, assign it an mphi with this + // block number. + unsigned CurBB; + + /// Cached local copy of the number of registers the target has. + unsigned NumRegs; + + /// Collection of register mask operands that have been observed. Second part + /// of pair indicates the instruction that they happened in. Used to + /// reconstruct where defs happened if we start tracking a location later + /// on. + SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks; + + /// Iterator for locations and the values they contain. Dereferencing + /// produces a struct/pair containing the LocIdx key for this location, + /// and a reference to the value currently stored. Simplifies the process + /// of seeking a particular location. + class MLocIterator { + LocToValueType &ValueMap; + LocIdx Idx; + + public: + class value_type { + public: + value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) { } + const LocIdx Idx; /// Read-only index of this location. + ValueIDNum &Value; /// Reference to the stored value at this location. + }; + + MLocIterator(LocToValueType &ValueMap, LocIdx Idx) + : ValueMap(ValueMap), Idx(Idx) { } + + bool operator==(const MLocIterator &Other) const { + assert(&ValueMap == &Other.ValueMap); + return Idx == Other.Idx; + } + + bool operator!=(const MLocIterator &Other) const { + return !(*this == Other); + } + + void operator++() { + Idx = LocIdx(Idx.asU64() + 1); + } + + value_type operator*() { + return value_type(Idx, ValueMap[LocIdx(Idx)]); + } + }; + + MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, const TargetLowering &TLI) + : MF(MF), TII(TII), TRI(TRI), TLI(TLI), + LocIdxToIDNum(ValueIDNum::EmptyValue), + LocIdxToLocID(0) { + NumRegs = TRI.getNumRegs(); + reset(); + LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); + assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure + + // Always track SP. This avoids the implicit clobbering caused by regmasks + // from affectings its values. (LiveDebugValues disbelieves calls and + // regmasks that claim to clobber SP). + Register SP = TLI.getStackPointerRegisterToSaveRestore(); + if (SP) { + unsigned ID = getLocID(SP, false); + (void)lookupOrTrackRegister(ID); + } + } + + /// Produce location ID number for indexing LocIDToLocIdx. Takes the register + /// or spill number, and flag for whether it's a spill or not. + unsigned getLocID(Register RegOrSpill, bool isSpill) { + return (isSpill) ? RegOrSpill.id() + NumRegs - 1 : RegOrSpill.id(); + } + + /// Accessor for reading the value at Idx. + ValueIDNum getNumAtPos(LocIdx Idx) const { + assert(Idx.asU64() < LocIdxToIDNum.size()); + return LocIdxToIDNum[Idx]; + } + + unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); } + + /// Reset all locations to contain a PHI value at the designated block. Used + /// sometimes for actual PHI values, othertimes to indicate the block entry + /// value (before any more information is known). + void setMPhis(unsigned NewCurBB) { + CurBB = NewCurBB; + for (auto Location : locations()) + Location.Value = {CurBB, 0, Location.Idx}; + } + + /// Load values for each location from array of ValueIDNums. Take current + /// bbnum just in case we read a value from a hitherto untouched register. + void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) { + CurBB = NewCurBB; + // Iterate over all tracked locations, and load each locations live-in + // value into our local index. + for (auto Location : locations()) + Location.Value = Locs[Location.Idx.asU64()]; + } + + /// Wipe any un-necessary location records after traversing a block. + void reset(void) { + // We could reset all the location values too; however either loadFromArray + // or setMPhis should be called before this object is re-used. Just + // clear Masks, they're definitely not needed. + Masks.clear(); + } + + /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of + /// the information in this pass uninterpretable. + void clear(void) { + reset(); + LocIDToLocIdx.clear(); + LocIdxToLocID.clear(); + LocIdxToIDNum.clear(); + //SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from 0 + SpillLocs = decltype(SpillLocs)(); + + LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc()); + } + + /// Set a locaiton to a certain value. + void setMLoc(LocIdx L, ValueIDNum Num) { + assert(L.asU64() < LocIdxToIDNum.size()); + LocIdxToIDNum[L] = Num; + } + + /// Create a LocIdx for an untracked register ID. Initialize it to either an + /// mphi value representing a live-in, or a recent register mask clobber. + LocIdx trackRegister(unsigned ID) { + assert(ID != 0); + LocIdx NewIdx = LocIdx(LocIdxToIDNum.size()); + LocIdxToIDNum.grow(NewIdx); + LocIdxToLocID.grow(NewIdx); + + // Default: it's an mphi. + ValueIDNum ValNum = {CurBB, 0, NewIdx}; + // Was this reg ever touched by a regmask? + for (const auto &MaskPair : reverse(Masks)) { + if (MaskPair.first->clobbersPhysReg(ID)) { + // There was an earlier def we skipped. + ValNum = {CurBB, MaskPair.second, NewIdx}; + break; + } + } + + LocIdxToIDNum[NewIdx] = ValNum; + LocIdxToLocID[NewIdx] = ID; + return NewIdx; + } + + LocIdx lookupOrTrackRegister(unsigned ID) { + LocIdx &Index = LocIDToLocIdx[ID]; + if (Index.isIllegal()) + Index = trackRegister(ID); + return Index; + } + + /// Record a definition of the specified register at the given block / inst. + /// This doesn't take a ValueIDNum, because the definition and its location + /// are synonymous. + void defReg(Register R, unsigned BB, unsigned Inst) { + unsigned ID = getLocID(R, false); + LocIdx Idx = lookupOrTrackRegister(ID); + ValueIDNum ValueID = {BB, Inst, Idx}; + LocIdxToIDNum[Idx] = ValueID; + } + + /// Set a register to a value number. To be used if the value number is + /// known in advance. + void setReg(Register R, ValueIDNum ValueID) { + unsigned ID = getLocID(R, false); + LocIdx Idx = lookupOrTrackRegister(ID); + LocIdxToIDNum[Idx] = ValueID; + } + + ValueIDNum readReg(Register R) { + unsigned ID = getLocID(R, false); + LocIdx Idx = lookupOrTrackRegister(ID); + return LocIdxToIDNum[Idx]; + } + + /// Reset a register value to zero / empty. Needed to replicate the + /// VarLoc implementation where a copy to/from a register effectively + /// clears the contents of the source register. (Values can only have one + /// machine location in VarLocBasedImpl). + void wipeRegister(Register R) { + unsigned ID = getLocID(R, false); + LocIdx Idx = LocIDToLocIdx[ID]; + LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue; + } + + /// Determine the LocIdx of an existing register. + LocIdx getRegMLoc(Register R) { + unsigned ID = getLocID(R, false); + return LocIDToLocIdx[ID]; + } + + /// Record a RegMask operand being executed. Defs any register we currently + /// track, stores a pointer to the mask in case we have to account for it + /// later. + void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID) { + // Ensure SP exists, so that we don't override it later. + Register SP = TLI.getStackPointerRegisterToSaveRestore(); + + // Def any register we track have that isn't preserved. The regmask + // terminates the liveness of a register, meaning its value can't be + // relied upon -- we represent this by giving it a new value. + for (auto Location : locations()) { + unsigned ID = LocIdxToLocID[Location.Idx]; + // Don't clobber SP, even if the mask says it's clobbered. + if (ID < NumRegs && ID != SP && MO->clobbersPhysReg(ID)) + defReg(ID, CurBB, InstID); + } + Masks.push_back(std::make_pair(MO, InstID)); + } + + /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked. + LocIdx getOrTrackSpillLoc(SpillLoc L) { + unsigned SpillID = SpillLocs.idFor(L); + if (SpillID == 0) { + SpillID = SpillLocs.insert(L); + unsigned L = getLocID(SpillID, true); + LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx + LocIdxToIDNum.grow(Idx); + LocIdxToLocID.grow(Idx); + LocIDToLocIdx.push_back(Idx); + LocIdxToLocID[Idx] = L; + return Idx; + } else { + unsigned L = getLocID(SpillID, true); + LocIdx Idx = LocIDToLocIdx[L]; + return Idx; + } + } + + /// Set the value stored in a spill slot. + void setSpill(SpillLoc L, ValueIDNum ValueID) { + LocIdx Idx = getOrTrackSpillLoc(L); + LocIdxToIDNum[Idx] = ValueID; + } + + /// Read whatever value is in a spill slot, or None if it isn't tracked. + Optional<ValueIDNum> readSpill(SpillLoc L) { + unsigned SpillID = SpillLocs.idFor(L); + if (SpillID == 0) + return None; + + unsigned LocID = getLocID(SpillID, true); + LocIdx Idx = LocIDToLocIdx[LocID]; + return LocIdxToIDNum[Idx]; + } + + /// Determine the LocIdx of a spill slot. Return None if it previously + /// hasn't had a value assigned. + Optional<LocIdx> getSpillMLoc(SpillLoc L) { + unsigned SpillID = SpillLocs.idFor(L); + if (SpillID == 0) + return None; + unsigned LocNo = getLocID(SpillID, true); + return LocIDToLocIdx[LocNo]; + } + + /// Return true if Idx is a spill machine location. + bool isSpill(LocIdx Idx) const { + return LocIdxToLocID[Idx] >= NumRegs; + } + + MLocIterator begin() { + return MLocIterator(LocIdxToIDNum, 0); + } + + MLocIterator end() { + return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size()); + } + + /// Return a range over all locations currently tracked. + iterator_range<MLocIterator> locations() { + return llvm::make_range(begin(), end()); + } + + std::string LocIdxToName(LocIdx Idx) const { + unsigned ID = LocIdxToLocID[Idx]; + if (ID >= NumRegs) + return Twine("slot ").concat(Twine(ID - NumRegs)).str(); + else + return TRI.getRegAsmName(ID).str(); + } + + std::string IDAsString(const ValueIDNum &Num) const { + std::string DefName = LocIdxToName(Num.getLoc()); + return Num.asString(DefName); + } + + LLVM_DUMP_METHOD + void dump() { + for (auto Location : locations()) { + std::string MLocName = LocIdxToName(Location.Value.getLoc()); + std::string DefName = Location.Value.asString(MLocName); + dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n"; + } + } + + LLVM_DUMP_METHOD + void dump_mloc_map() { + for (auto Location : locations()) { + std::string foo = LocIdxToName(Location.Idx); + dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n"; + } + } + + /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the + /// information in \pProperties, for variable Var. Don't insert it anywhere, + /// just return the builder for it. + MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var, + const DbgValueProperties &Properties) { + DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0, + Var.getVariable()->getScope(), + const_cast<DILocation *>(Var.getInlinedAt())); + auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE)); + + const DIExpression *Expr = Properties.DIExpr; + if (!MLoc) { + // No location -> DBG_VALUE $noreg + MIB.addReg(0, RegState::Debug); + MIB.addReg(0, RegState::Debug); + } else if (LocIdxToLocID[*MLoc] >= NumRegs) { + unsigned LocID = LocIdxToLocID[*MLoc]; + const SpillLoc &Spill = SpillLocs[LocID - NumRegs + 1]; + + auto *TRI = MF.getSubtarget().getRegisterInfo(); + Expr = TRI->prependOffsetExpression(Expr, DIExpression::ApplyOffset, + Spill.SpillOffset); + unsigned Base = Spill.SpillBase; + MIB.addReg(Base, RegState::Debug); + MIB.addImm(0); + } else { + unsigned LocID = LocIdxToLocID[*MLoc]; + MIB.addReg(LocID, RegState::Debug); + if (Properties.Indirect) + MIB.addImm(0); + else + MIB.addReg(0, RegState::Debug); + } + + MIB.addMetadata(Var.getVariable()); + MIB.addMetadata(Expr); + return MIB; + } +}; + +/// Class recording the (high level) _value_ of a variable. Identifies either +/// the value of the variable as a ValueIDNum, or a constant MachineOperand. +/// This class also stores meta-information about how the value is qualified. +/// Used to reason about variable values when performing the second +/// (DebugVariable specific) dataflow analysis. +class DbgValue { +public: + union { + /// If Kind is Def, the value number that this value is based on. + ValueIDNum ID; + /// If Kind is Const, the MachineOperand defining this value. + MachineOperand MO; + /// For a NoVal DbgValue, which block it was generated in. + unsigned BlockNo; + }; + /// Qualifiers for the ValueIDNum above. + DbgValueProperties Properties; + + typedef enum { + Undef, // Represents a DBG_VALUE $noreg in the transfer function only. + Def, // This value is defined by an inst, or is a PHI value. + Const, // A constant value contained in the MachineOperand field. + Proposed, // This is a tentative PHI value, which may be confirmed or + // invalidated later. + NoVal // Empty DbgValue, generated during dataflow. BlockNo stores + // which block this was generated in. + } KindT; + /// Discriminator for whether this is a constant or an in-program value. + KindT Kind; + + DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind) + : ID(Val), Properties(Prop), Kind(Kind) { + assert(Kind == Def || Kind == Proposed); + } + + DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind) + : BlockNo(BlockNo), Properties(Prop), Kind(Kind) { + assert(Kind == NoVal); + } + + DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind) + : MO(MO), Properties(Prop), Kind(Kind) { + assert(Kind == Const); + } + + DbgValue(const DbgValueProperties &Prop, KindT Kind) + : Properties(Prop), Kind(Kind) { + assert(Kind == Undef && + "Empty DbgValue constructor must pass in Undef kind"); + } + + void dump(const MLocTracker *MTrack) const { + if (Kind == Const) { + MO.dump(); + } else if (Kind == NoVal) { + dbgs() << "NoVal(" << BlockNo << ")"; + } else if (Kind == Proposed) { + dbgs() << "VPHI(" << MTrack->IDAsString(ID) << ")"; + } else { + assert(Kind == Def); + dbgs() << MTrack->IDAsString(ID); + } + if (Properties.Indirect) + dbgs() << " indir"; + if (Properties.DIExpr) + dbgs() << " " << *Properties.DIExpr; + } + + bool operator==(const DbgValue &Other) const { + if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties)) + return false; + else if (Kind == Proposed && ID != Other.ID) + return false; + else if (Kind == Def && ID != Other.ID) + return false; + else if (Kind == NoVal && BlockNo != Other.BlockNo) + return false; + else if (Kind == Const) + return MO.isIdenticalTo(Other.MO); + + return true; + } + + bool operator!=(const DbgValue &Other) const { return !(*this == Other); } +}; + +/// Types for recording sets of variable fragments that overlap. For a given +/// local variable, we record all other fragments of that variable that could +/// overlap it, to reduce search time. +using FragmentOfVar = + std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; +using OverlapMap = + DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; + +/// Collection of DBG_VALUEs observed when traversing a block. Records each +/// variable and the value the DBG_VALUE refers to. Requires the machine value +/// location dataflow algorithm to have run already, so that values can be +/// identified. +class VLocTracker { +public: + /// Map DebugVariable to the latest Value it's defined to have. + /// Needs to be a MapVector because we determine order-in-the-input-MIR from + /// the order in this container. + /// We only retain the last DbgValue in each block for each variable, to + /// determine the blocks live-out variable value. The Vars container forms the + /// transfer function for this block, as part of the dataflow analysis. The + /// movement of values between locations inside of a block is handled at a + /// much later stage, in the TransferTracker class. + MapVector<DebugVariable, DbgValue> Vars; + DenseMap<DebugVariable, const DILocation *> Scopes; + MachineBasicBlock *MBB; + +public: + VLocTracker() {} + + void defVar(const MachineInstr &MI, const DbgValueProperties &Properties, + Optional<ValueIDNum> ID) { + assert(MI.isDebugValue() || MI.isDebugRef()); + DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def) + : DbgValue(Properties, DbgValue::Undef); + + // Attempt insertion; overwrite if it's already mapped. + auto Result = Vars.insert(std::make_pair(Var, Rec)); + if (!Result.second) + Result.first->second = Rec; + Scopes[Var] = MI.getDebugLoc().get(); + } + + void defVar(const MachineInstr &MI, const MachineOperand &MO) { + // Only DBG_VALUEs can define constant-valued variables. + assert(MI.isDebugValue()); + DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + DbgValueProperties Properties(MI); + DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const); + + // Attempt insertion; overwrite if it's already mapped. + auto Result = Vars.insert(std::make_pair(Var, Rec)); + if (!Result.second) + Result.first->second = Rec; + Scopes[Var] = MI.getDebugLoc().get(); + } +}; + +/// Tracker for converting machine value locations and variable values into +/// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs +/// specifying block live-in locations and transfers within blocks. +/// +/// Operating on a per-block basis, this class takes a (pre-loaded) MLocTracker +/// and must be initialized with the set of variable values that are live-in to +/// the block. The caller then repeatedly calls process(). TransferTracker picks +/// out variable locations for the live-in variable values (if there _is_ a +/// location) and creates the corresponding DBG_VALUEs. Then, as the block is +/// stepped through, transfers of values between machine locations are +/// identified and if profitable, a DBG_VALUE created. +/// +/// This is where debug use-before-defs would be resolved: a variable with an +/// unavailable value could materialize in the middle of a block, when the +/// value becomes available. Or, we could detect clobbers and re-specify the +/// variable in a backup location. (XXX these are unimplemented). +class TransferTracker { +public: + const TargetInstrInfo *TII; + /// This machine location tracker is assumed to always contain the up-to-date + /// value mapping for all machine locations. TransferTracker only reads + /// information from it. (XXX make it const?) + MLocTracker *MTracker; + MachineFunction &MF; + + /// Record of all changes in variable locations at a block position. Awkwardly + /// we allow inserting either before or after the point: MBB != nullptr + /// indicates it's before, otherwise after. + struct Transfer { + MachineBasicBlock::iterator Pos; /// Position to insert DBG_VALUes + MachineBasicBlock *MBB; /// non-null if we should insert after. + SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert. + }; + + typedef struct { + LocIdx Loc; + DbgValueProperties Properties; + } LocAndProperties; + + /// Collection of transfers (DBG_VALUEs) to be inserted. + SmallVector<Transfer, 32> Transfers; + + /// Local cache of what-value-is-in-what-LocIdx. Used to identify differences + /// between TransferTrackers view of variable locations and MLocTrackers. For + /// example, MLocTracker observes all clobbers, but TransferTracker lazily + /// does not. + std::vector<ValueIDNum> VarLocs; + + /// Map from LocIdxes to which DebugVariables are based that location. + /// Mantained while stepping through the block. Not accurate if + /// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx]. + std::map<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs; + + /// Map from DebugVariable to it's current location and qualifying meta + /// information. To be used in conjunction with ActiveMLocs to construct + /// enough information for the DBG_VALUEs for a particular LocIdx. + DenseMap<DebugVariable, LocAndProperties> ActiveVLocs; + + /// Temporary cache of DBG_VALUEs to be entered into the Transfers collection. + SmallVector<MachineInstr *, 4> PendingDbgValues; + + /// Record of a use-before-def: created when a value that's live-in to the + /// current block isn't available in any machine location, but it will be + /// defined in this block. + struct UseBeforeDef { + /// Value of this variable, def'd in block. + ValueIDNum ID; + /// Identity of this variable. + DebugVariable Var; + /// Additional variable properties. + DbgValueProperties Properties; + }; + + /// Map from instruction index (within the block) to the set of UseBeforeDefs + /// that become defined at that instruction. + DenseMap<unsigned, SmallVector<UseBeforeDef, 1>> UseBeforeDefs; + + /// The set of variables that are in UseBeforeDefs and can become a location + /// once the relevant value is defined. An element being erased from this + /// collection prevents the use-before-def materializing. + DenseSet<DebugVariable> UseBeforeDefVariables; + + const TargetRegisterInfo &TRI; + const BitVector &CalleeSavedRegs; + + TransferTracker(const TargetInstrInfo *TII, MLocTracker *MTracker, + MachineFunction &MF, const TargetRegisterInfo &TRI, + const BitVector &CalleeSavedRegs) + : TII(TII), MTracker(MTracker), MF(MF), TRI(TRI), + CalleeSavedRegs(CalleeSavedRegs) {} + + /// Load object with live-in variable values. \p mlocs contains the live-in + /// values in each machine location, while \p vlocs the live-in variable + /// values. This method picks variable locations for the live-in variables, + /// creates DBG_VALUEs and puts them in #Transfers, then prepares the other + /// object fields to track variable locations as we step through the block. + /// FIXME: could just examine mloctracker instead of passing in \p mlocs? + void loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs, + SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs, + unsigned NumLocs) { + ActiveMLocs.clear(); + ActiveVLocs.clear(); + VarLocs.clear(); + VarLocs.reserve(NumLocs); + UseBeforeDefs.clear(); + UseBeforeDefVariables.clear(); + + auto isCalleeSaved = [&](LocIdx L) { + unsigned Reg = MTracker->LocIdxToLocID[L]; + if (Reg >= MTracker->NumRegs) + return false; + for (MCRegAliasIterator RAI(Reg, &TRI, true); RAI.isValid(); ++RAI) + if (CalleeSavedRegs.test(*RAI)) + return true; + return false; + }; + + // Map of the preferred location for each value. + std::map<ValueIDNum, LocIdx> ValueToLoc; + + // Produce a map of value numbers to the current machine locs they live + // in. When emulating VarLocBasedImpl, there should only be one + // location; when not, we get to pick. + for (auto Location : MTracker->locations()) { + LocIdx Idx = Location.Idx; + ValueIDNum &VNum = MLocs[Idx.asU64()]; + VarLocs.push_back(VNum); + auto it = ValueToLoc.find(VNum); + // In order of preference, pick: + // * Callee saved registers, + // * Other registers, + // * Spill slots. + if (it == ValueToLoc.end() || MTracker->isSpill(it->second) || + (!isCalleeSaved(it->second) && isCalleeSaved(Idx.asU64()))) { + // Insert, or overwrite if insertion failed. + auto PrefLocRes = ValueToLoc.insert(std::make_pair(VNum, Idx)); + if (!PrefLocRes.second) + PrefLocRes.first->second = Idx; + } + } + + // Now map variables to their picked LocIdxes. + for (auto Var : VLocs) { + if (Var.second.Kind == DbgValue::Const) { + PendingDbgValues.push_back( + emitMOLoc(Var.second.MO, Var.first, Var.second.Properties)); + continue; + } + + // If the value has no location, we can't make a variable location. + const ValueIDNum &Num = Var.second.ID; + auto ValuesPreferredLoc = ValueToLoc.find(Num); + if (ValuesPreferredLoc == ValueToLoc.end()) { + // If it's a def that occurs in this block, register it as a + // use-before-def to be resolved as we step through the block. + if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI()) + addUseBeforeDef(Var.first, Var.second.Properties, Num); + continue; + } + + LocIdx M = ValuesPreferredLoc->second; + auto NewValue = LocAndProperties{M, Var.second.Properties}; + auto Result = ActiveVLocs.insert(std::make_pair(Var.first, NewValue)); + if (!Result.second) + Result.first->second = NewValue; + ActiveMLocs[M].insert(Var.first); + PendingDbgValues.push_back( + MTracker->emitLoc(M, Var.first, Var.second.Properties)); + } + flushDbgValues(MBB.begin(), &MBB); + } + + /// Record that \p Var has value \p ID, a value that becomes available + /// later in the function. + void addUseBeforeDef(const DebugVariable &Var, + const DbgValueProperties &Properties, ValueIDNum ID) { + UseBeforeDef UBD = {ID, Var, Properties}; + UseBeforeDefs[ID.getInst()].push_back(UBD); + UseBeforeDefVariables.insert(Var); + } + + /// After the instruction at index \p Inst and position \p pos has been + /// processed, check whether it defines a variable value in a use-before-def. + /// If so, and the variable value hasn't changed since the start of the + /// block, create a DBG_VALUE. + void checkInstForNewValues(unsigned Inst, MachineBasicBlock::iterator pos) { + auto MIt = UseBeforeDefs.find(Inst); + if (MIt == UseBeforeDefs.end()) + return; + + for (auto &Use : MIt->second) { + LocIdx L = Use.ID.getLoc(); + + // If something goes very wrong, we might end up labelling a COPY + // instruction or similar with an instruction number, where it doesn't + // actually define a new value, instead it moves a value. In case this + // happens, discard. + if (MTracker->LocIdxToIDNum[L] != Use.ID) + continue; + + // If a different debug instruction defined the variable value / location + // since the start of the block, don't materialize this use-before-def. + if (!UseBeforeDefVariables.count(Use.Var)) + continue; + + PendingDbgValues.push_back(MTracker->emitLoc(L, Use.Var, Use.Properties)); + } + flushDbgValues(pos, nullptr); + } + + /// Helper to move created DBG_VALUEs into Transfers collection. + void flushDbgValues(MachineBasicBlock::iterator Pos, MachineBasicBlock *MBB) { + if (PendingDbgValues.size() > 0) { + Transfers.push_back({Pos, MBB, PendingDbgValues}); + PendingDbgValues.clear(); + } + } + + /// Change a variable value after encountering a DBG_VALUE inside a block. + void redefVar(const MachineInstr &MI) { + DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + DbgValueProperties Properties(MI); + + const MachineOperand &MO = MI.getOperand(0); + + // Ignore non-register locations, we don't transfer those. + if (!MO.isReg() || MO.getReg() == 0) { + auto It = ActiveVLocs.find(Var); + if (It != ActiveVLocs.end()) { + ActiveMLocs[It->second.Loc].erase(Var); + ActiveVLocs.erase(It); + } + // Any use-before-defs no longer apply. + UseBeforeDefVariables.erase(Var); + return; + } + + Register Reg = MO.getReg(); + LocIdx NewLoc = MTracker->getRegMLoc(Reg); + redefVar(MI, Properties, NewLoc); + } + + /// Handle a change in variable location within a block. Terminate the + /// variables current location, and record the value it now refers to, so + /// that we can detect location transfers later on. + void redefVar(const MachineInstr &MI, const DbgValueProperties &Properties, + Optional<LocIdx> OptNewLoc) { + DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + // Any use-before-defs no longer apply. + UseBeforeDefVariables.erase(Var); + + // Erase any previous location, + auto It = ActiveVLocs.find(Var); + if (It != ActiveVLocs.end()) + ActiveMLocs[It->second.Loc].erase(Var); + + // If there _is_ no new location, all we had to do was erase. + if (!OptNewLoc) + return; + LocIdx NewLoc = *OptNewLoc; + + // Check whether our local copy of values-by-location in #VarLocs is out of + // date. Wipe old tracking data for the location if it's been clobbered in + // the meantime. + if (MTracker->getNumAtPos(NewLoc) != VarLocs[NewLoc.asU64()]) { + for (auto &P : ActiveMLocs[NewLoc]) { + ActiveVLocs.erase(P); + } + ActiveMLocs[NewLoc.asU64()].clear(); + VarLocs[NewLoc.asU64()] = MTracker->getNumAtPos(NewLoc); + } + + ActiveMLocs[NewLoc].insert(Var); + if (It == ActiveVLocs.end()) { + ActiveVLocs.insert( + std::make_pair(Var, LocAndProperties{NewLoc, Properties})); + } else { + It->second.Loc = NewLoc; + It->second.Properties = Properties; + } + } + + /// Explicitly terminate variable locations based on \p mloc. Creates undef + /// DBG_VALUEs for any variables that were located there, and clears + /// #ActiveMLoc / #ActiveVLoc tracking information for that location. + void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos) { + assert(MTracker->isSpill(MLoc)); + auto ActiveMLocIt = ActiveMLocs.find(MLoc); + if (ActiveMLocIt == ActiveMLocs.end()) + return; + + VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue; + + for (auto &Var : ActiveMLocIt->second) { + auto ActiveVLocIt = ActiveVLocs.find(Var); + // Create an undef. We can't feed in a nullptr DIExpression alas, + // so use the variables last expression. Pass None as the location. + const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr; + DbgValueProperties Properties(Expr, false); + PendingDbgValues.push_back(MTracker->emitLoc(None, Var, Properties)); + ActiveVLocs.erase(ActiveVLocIt); + } + flushDbgValues(Pos, nullptr); + + ActiveMLocIt->second.clear(); + } + + /// Transfer variables based on \p Src to be based on \p Dst. This handles + /// both register copies as well as spills and restores. Creates DBG_VALUEs + /// describing the movement. + void transferMlocs(LocIdx Src, LocIdx Dst, MachineBasicBlock::iterator Pos) { + // Does Src still contain the value num we expect? If not, it's been + // clobbered in the meantime, and our variable locations are stale. + if (VarLocs[Src.asU64()] != MTracker->getNumAtPos(Src)) + return; + + // assert(ActiveMLocs[Dst].size() == 0); + //^^^ Legitimate scenario on account of un-clobbered slot being assigned to? + ActiveMLocs[Dst] = ActiveMLocs[Src]; + VarLocs[Dst.asU64()] = VarLocs[Src.asU64()]; + + // For each variable based on Src; create a location at Dst. + for (auto &Var : ActiveMLocs[Src]) { + auto ActiveVLocIt = ActiveVLocs.find(Var); + assert(ActiveVLocIt != ActiveVLocs.end()); + ActiveVLocIt->second.Loc = Dst; + + assert(Dst != 0); + MachineInstr *MI = + MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties); + PendingDbgValues.push_back(MI); + } + ActiveMLocs[Src].clear(); + flushDbgValues(Pos, nullptr); + + // XXX XXX XXX "pretend to be old LDV" means dropping all tracking data + // about the old location. + if (EmulateOldLDV) + VarLocs[Src.asU64()] = ValueIDNum::EmptyValue; + } + + MachineInstrBuilder emitMOLoc(const MachineOperand &MO, + const DebugVariable &Var, + const DbgValueProperties &Properties) { + DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0, + Var.getVariable()->getScope(), + const_cast<DILocation *>(Var.getInlinedAt())); + auto MIB = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)); + MIB.add(MO); + if (Properties.Indirect) + MIB.addImm(0); + else + MIB.addReg(0); + MIB.addMetadata(Var.getVariable()); + MIB.addMetadata(Properties.DIExpr); + return MIB; + } +}; + +class InstrRefBasedLDV : public LDVImpl { +private: + using FragmentInfo = DIExpression::FragmentInfo; + using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; + + // Helper while building OverlapMap, a map of all fragments seen for a given + // DILocalVariable. + using VarToFragments = + DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>; + + /// Machine location/value transfer function, a mapping of which locations + /// are assigned which new values. + using MLocTransferMap = std::map<LocIdx, ValueIDNum>; + + /// Live in/out structure for the variable values: a per-block map of + /// variables to their values. XXX, better name? + using LiveIdxT = + DenseMap<const MachineBasicBlock *, DenseMap<DebugVariable, DbgValue> *>; + + using VarAndLoc = std::pair<DebugVariable, DbgValue>; + + /// Type for a live-in value: the predecessor block, and its value. + using InValueT = std::pair<MachineBasicBlock *, DbgValue *>; + + /// Vector (per block) of a collection (inner smallvector) of live-ins. + /// Used as the result type for the variable value dataflow problem. + using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>; + + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + const TargetFrameLowering *TFI; + BitVector CalleeSavedRegs; + LexicalScopes LS; + TargetPassConfig *TPC; + + /// Object to track machine locations as we step through a block. Could + /// probably be a field rather than a pointer, as it's always used. + MLocTracker *MTracker; + + /// Number of the current block LiveDebugValues is stepping through. + unsigned CurBB; + + /// Number of the current instruction LiveDebugValues is evaluating. + unsigned CurInst; + + /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl + /// steps through a block. Reads the values at each location from the + /// MLocTracker object. + VLocTracker *VTracker; + + /// Tracker for transfers, listens to DBG_VALUEs and transfers of values + /// between locations during stepping, creates new DBG_VALUEs when values move + /// location. + TransferTracker *TTracker; + + /// Blocks which are artificial, i.e. blocks which exclusively contain + /// instructions without DebugLocs, or with line 0 locations. + SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks; + + // Mapping of blocks to and from their RPOT order. + DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; + DenseMap<MachineBasicBlock *, unsigned int> BBToOrder; + DenseMap<unsigned, unsigned> BBNumToRPO; + + /// Pair of MachineInstr, and its 1-based offset into the containing block. + using InstAndNum = std::pair<const MachineInstr *, unsigned>; + /// Map from debug instruction number to the MachineInstr labelled with that + /// number, and its location within the function. Used to transform + /// instruction numbers in DBG_INSTR_REFs into machine value numbers. + std::map<uint64_t, InstAndNum> DebugInstrNumToInstr; + + // Map of overlapping variable fragments. + OverlapMap OverlapFragments; + VarToFragments SeenFragments; + + /// Tests whether this instruction is a spill to a stack slot. + bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); + + /// Decide if @MI is a spill instruction and return true if it is. We use 2 + /// criteria to make this decision: + /// - Is this instruction a store to a spill slot? + /// - Is there a register operand that is both used and killed? + /// TODO: Store optimization can fold spills into other stores (including + /// other spills). We do not handle this yet (more than one memory operand). + bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, + unsigned &Reg); + + /// If a given instruction is identified as a spill, return the spill slot + /// and set \p Reg to the spilled register. + Optional<SpillLoc> isRestoreInstruction(const MachineInstr &MI, + MachineFunction *MF, unsigned &Reg); + + /// Given a spill instruction, extract the register and offset used to + /// address the spill slot in a target independent way. + SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI); + + /// Observe a single instruction while stepping through a block. + void process(MachineInstr &MI); + + /// Examines whether \p MI is a DBG_VALUE and notifies trackers. + /// \returns true if MI was recognized and processed. + bool transferDebugValue(const MachineInstr &MI); + + /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers. + /// \returns true if MI was recognized and processed. + bool transferDebugInstrRef(MachineInstr &MI); + + /// Examines whether \p MI is copy instruction, and notifies trackers. + /// \returns true if MI was recognized and processed. + bool transferRegisterCopy(MachineInstr &MI); + + /// Examines whether \p MI is stack spill or restore instruction, and + /// notifies trackers. \returns true if MI was recognized and processed. + bool transferSpillOrRestoreInst(MachineInstr &MI); + + /// Examines \p MI for any registers that it defines, and notifies trackers. + void transferRegisterDef(MachineInstr &MI); + + /// Copy one location to the other, accounting for movement of subregisters + /// too. + void performCopy(Register Src, Register Dst); + + void accumulateFragmentMap(MachineInstr &MI); + + /// Step through the function, recording register definitions and movements + /// in an MLocTracker. Convert the observations into a per-block transfer + /// function in \p MLocTransfer, suitable for using with the machine value + /// location dataflow problem. + void + produceMLocTransferFunction(MachineFunction &MF, + SmallVectorImpl<MLocTransferMap> &MLocTransfer, + unsigned MaxNumBlocks); + + /// Solve the machine value location dataflow problem. Takes as input the + /// transfer functions in \p MLocTransfer. Writes the output live-in and + /// live-out arrays to the (initialized to zero) multidimensional arrays in + /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block + /// number, the inner by LocIdx. + void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs, + SmallVectorImpl<MLocTransferMap> &MLocTransfer); + + /// Perform a control flow join (lattice value meet) of the values in machine + /// locations at \p MBB. Follows the algorithm described in the file-comment, + /// reading live-outs of predecessors from \p OutLocs, the current live ins + /// from \p InLocs, and assigning the newly computed live ins back into + /// \p InLocs. \returns two bools -- the first indicates whether a change + /// was made, the second whether a lattice downgrade occurred. If the latter + /// is true, revisiting this block is necessary. + std::tuple<bool, bool> + mlocJoin(MachineBasicBlock &MBB, + SmallPtrSet<const MachineBasicBlock *, 16> &Visited, + ValueIDNum **OutLocs, ValueIDNum *InLocs); + + /// Solve the variable value dataflow problem, for a single lexical scope. + /// Uses the algorithm from the file comment to resolve control flow joins, + /// although there are extra hacks, see vlocJoin. Reads the + /// locations of values from the \p MInLocs and \p MOutLocs arrays (see + /// mlocDataflow) and reads the variable values transfer function from + /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally, + /// with the live-ins permanently stored to \p Output once the fixedpoint is + /// reached. + /// \p VarsWeCareAbout contains a collection of the variables in \p Scope + /// that we should be tracking. + /// \p AssignBlocks contains the set of blocks that aren't in \p Scope, but + /// which do contain DBG_VALUEs, which VarLocBasedImpl tracks locations + /// through. + void vlocDataflow(const LexicalScope *Scope, const DILocation *DILoc, + const SmallSet<DebugVariable, 4> &VarsWeCareAbout, + SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, + LiveInsT &Output, ValueIDNum **MOutLocs, + ValueIDNum **MInLocs, + SmallVectorImpl<VLocTracker> &AllTheVLocs); + + /// Compute the live-ins to a block, considering control flow merges according + /// to the method in the file comment. Live out and live in variable values + /// are stored in \p VLOCOutLocs and \p VLOCInLocs. The live-ins for \p MBB + /// are computed and stored into \p VLOCInLocs. \returns true if the live-ins + /// are modified. + /// \p InLocsT Output argument, storage for calculated live-ins. + /// \returns two bools -- the first indicates whether a change + /// was made, the second whether a lattice downgrade occurred. If the latter + /// is true, revisiting this block is necessary. + std::tuple<bool, bool> + vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs, + SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, + unsigned BBNum, const SmallSet<DebugVariable, 4> &AllVars, + ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, + SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, + DenseMap<DebugVariable, DbgValue> &InLocsT); + + /// Continue exploration of the variable-value lattice, as explained in the + /// file-level comment. \p OldLiveInLocation contains the current + /// exploration position, from which we need to descend further. \p Values + /// contains the set of live-in values, \p CurBlockRPONum the RPO number of + /// the current block, and \p CandidateLocations a set of locations that + /// should be considered as PHI locations, if we reach the bottom of the + /// lattice. \returns true if we should downgrade; the value is the agreeing + /// value number in a non-backedge predecessor. + bool vlocDowngradeLattice(const MachineBasicBlock &MBB, + const DbgValue &OldLiveInLocation, + const SmallVectorImpl<InValueT> &Values, + unsigned CurBlockRPONum); + + /// For the given block and live-outs feeding into it, try to find a + /// machine location where they all join. If a solution for all predecessors + /// can't be found, a location where all non-backedge-predecessors join + /// will be returned instead. While this method finds a join location, this + /// says nothing as to whether it should be used. + /// \returns Pair of value ID if found, and true when the correct value + /// is available on all predecessor edges, or false if it's only available + /// for non-backedge predecessors. + std::tuple<Optional<ValueIDNum>, bool> + pickVPHILoc(MachineBasicBlock &MBB, const DebugVariable &Var, + const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, + ValueIDNum **MInLocs, + const SmallVectorImpl<MachineBasicBlock *> &BlockOrders); + + /// Given the solutions to the two dataflow problems, machine value locations + /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the + /// TransferTracker class over the function to produce live-in and transfer + /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the + /// order given by AllVarsNumbering -- this could be any stable order, but + /// right now "order of appearence in function, when explored in RPO", so + /// that we can compare explictly against VarLocBasedImpl. + void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns, + ValueIDNum **MInLocs, + DenseMap<DebugVariable, unsigned> &AllVarsNumbering); + + /// Boilerplate computation of some initial sets, artifical blocks and + /// RPOT block ordering. + void initialSetup(MachineFunction &MF); + + bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override; + +public: + /// Default construct and initialize the pass. + InstrRefBasedLDV(); + + LLVM_DUMP_METHOD + void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const; + + bool isCalleeSaved(LocIdx L) { + unsigned Reg = MTracker->LocIdxToLocID[L]; + for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) + if (CalleeSavedRegs.test(*RAI)) + return true; + return false; + } +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + +ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX}; + +/// Default construct and initialize the pass. +InstrRefBasedLDV::InstrRefBasedLDV() {} + +//===----------------------------------------------------------------------===// +// Debug Range Extension Implementation +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +// Something to restore in the future. +// void InstrRefBasedLDV::printVarLocInMBB(..) +#endif + +SpillLoc +InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { + assert(MI.hasOneMemOperand() && + "Spill instruction does not have exactly one memory operand?"); + auto MMOI = MI.memoperands_begin(); + const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue(); + assert(PVal->kind() == PseudoSourceValue::FixedStack && + "Inconsistent memory operand in spill instruction"); + int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex(); + const MachineBasicBlock *MBB = MI.getParent(); + Register Reg; + StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg); + return {Reg, Offset}; +} + +/// End all previous ranges related to @MI and start a new range from @MI +/// if it is a DBG_VALUE instr. +bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { + if (!MI.isDebugValue()) + return false; + + const DILocalVariable *Var = MI.getDebugVariable(); + const DIExpression *Expr = MI.getDebugExpression(); + const DILocation *DebugLoc = MI.getDebugLoc(); + const DILocation *InlinedAt = DebugLoc->getInlinedAt(); + assert(Var->isValidLocationForIntrinsic(DebugLoc) && + "Expected inlined-at fields to agree"); + + DebugVariable V(Var, Expr, InlinedAt); + DbgValueProperties Properties(MI); + + // If there are no instructions in this lexical scope, do no location tracking + // at all, this variable shouldn't get a legitimate location range. + auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get()); + if (Scope == nullptr) + return true; // handled it; by doing nothing + + const MachineOperand &MO = MI.getOperand(0); + + // MLocTracker needs to know that this register is read, even if it's only + // read by a debug inst. + if (MO.isReg() && MO.getReg() != 0) + (void)MTracker->readReg(MO.getReg()); + + // If we're preparing for the second analysis (variables), the machine value + // locations are already solved, and we report this DBG_VALUE and the value + // it refers to to VLocTracker. + if (VTracker) { + if (MO.isReg()) { + // Feed defVar the new variable location, or if this is a + // DBG_VALUE $noreg, feed defVar None. + if (MO.getReg()) + VTracker->defVar(MI, Properties, MTracker->readReg(MO.getReg())); + else + VTracker->defVar(MI, Properties, None); + } else if (MI.getOperand(0).isImm() || MI.getOperand(0).isFPImm() || + MI.getOperand(0).isCImm()) { + VTracker->defVar(MI, MI.getOperand(0)); + } + } + + // If performing final tracking of transfers, report this variable definition + // to the TransferTracker too. + if (TTracker) + TTracker->redefVar(MI); + return true; +} + +bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { + if (!MI.isDebugRef()) + return false; + + // Only handle this instruction when we are building the variable value + // transfer function. + if (!VTracker) + return false; + + unsigned InstNo = MI.getOperand(0).getImm(); + unsigned OpNo = MI.getOperand(1).getImm(); + + const DILocalVariable *Var = MI.getDebugVariable(); + const DIExpression *Expr = MI.getDebugExpression(); + const DILocation *DebugLoc = MI.getDebugLoc(); + const DILocation *InlinedAt = DebugLoc->getInlinedAt(); + assert(Var->isValidLocationForIntrinsic(DebugLoc) && + "Expected inlined-at fields to agree"); + + DebugVariable V(Var, Expr, InlinedAt); + + auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get()); + if (Scope == nullptr) + return true; // Handled by doing nothing. This variable is never in scope. + + const MachineFunction &MF = *MI.getParent()->getParent(); + + // Various optimizations may have happened to the value during codegen, + // recorded in the value substitution table. Apply any substitutions to + // the instruction / operand number in this DBG_INSTR_REF. + auto Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo)); + while (Sub != MF.DebugValueSubstitutions.end()) { + InstNo = Sub->second.first; + OpNo = Sub->second.second; + Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo)); + } + + // Default machine value number is <None> -- if no instruction defines + // the corresponding value, it must have been optimized out. + Optional<ValueIDNum> NewID = None; + + // Try to lookup the instruction number, and find the machine value number + // that it defines. + auto InstrIt = DebugInstrNumToInstr.find(InstNo); + if (InstrIt != DebugInstrNumToInstr.end()) { + const MachineInstr &TargetInstr = *InstrIt->second.first; + uint64_t BlockNo = TargetInstr.getParent()->getNumber(); + + // Pick out the designated operand. + assert(OpNo < TargetInstr.getNumOperands()); + const MachineOperand &MO = TargetInstr.getOperand(OpNo); + + // Today, this can only be a register. + assert(MO.isReg() && MO.isDef()); + + unsigned LocID = MTracker->getLocID(MO.getReg(), false); + LocIdx L = MTracker->LocIDToLocIdx[LocID]; + NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); + } + + // We, we have a value number or None. Tell the variable value tracker about + // it. The rest of this LiveDebugValues implementation acts exactly the same + // for DBG_INSTR_REFs as DBG_VALUEs (just, the former can refer to values that + // aren't immediately available). + DbgValueProperties Properties(Expr, false); + VTracker->defVar(MI, Properties, NewID); + + // If we're on the final pass through the function, decompose this INSTR_REF + // into a plain DBG_VALUE. + if (!TTracker) + return true; + + // Pick a location for the machine value number, if such a location exists. + // (This information could be stored in TransferTracker to make it faster). + Optional<LocIdx> FoundLoc = None; + for (auto Location : MTracker->locations()) { + LocIdx CurL = Location.Idx; + ValueIDNum ID = MTracker->LocIdxToIDNum[CurL]; + if (NewID && ID == NewID) { + // If this is the first location with that value, pick it. Otherwise, + // consider whether it's a "longer term" location. + if (!FoundLoc) { + FoundLoc = CurL; + continue; + } + + if (MTracker->isSpill(CurL)) + FoundLoc = CurL; // Spills are a longer term location. + else if (!MTracker->isSpill(*FoundLoc) && + !MTracker->isSpill(CurL) && + !isCalleeSaved(*FoundLoc) && + isCalleeSaved(CurL)) + FoundLoc = CurL; // Callee saved regs are longer term than normal. + } + } + + // Tell transfer tracker that the variable value has changed. + TTracker->redefVar(MI, Properties, FoundLoc); + + // If there was a value with no location; but the value is defined in a + // later instruction in this block, this is a block-local use-before-def. + if (!FoundLoc && NewID && NewID->getBlock() == CurBB && + NewID->getInst() > CurInst) + TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false}, *NewID); + + // Produce a DBG_VALUE representing what this DBG_INSTR_REF meant. + // This DBG_VALUE is potentially a $noreg / undefined location, if + // FoundLoc is None. + // (XXX -- could morph the DBG_INSTR_REF in the future). + MachineInstr *DbgMI = MTracker->emitLoc(FoundLoc, V, Properties); + TTracker->PendingDbgValues.push_back(DbgMI); + TTracker->flushDbgValues(MI.getIterator(), nullptr); + + return true; +} + +void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { + // Meta Instructions do not affect the debug liveness of any register they + // define. + if (MI.isImplicitDef()) { + // Except when there's an implicit def, and the location it's defining has + // no value number. The whole point of an implicit def is to announce that + // the register is live, without be specific about it's value. So define + // a value if there isn't one already. + ValueIDNum Num = MTracker->readReg(MI.getOperand(0).getReg()); + // Has a legitimate value -> ignore the implicit def. + if (Num.getLoc() != 0) + return; + // Otherwise, def it here. + } else if (MI.isMetaInstruction()) + return; + + MachineFunction *MF = MI.getMF(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + Register SP = TLI->getStackPointerRegisterToSaveRestore(); + + // Find the regs killed by MI, and find regmasks of preserved regs. + // Max out the number of statically allocated elements in `DeadRegs`, as this + // prevents fallback to std::set::count() operations. + SmallSet<uint32_t, 32> DeadRegs; + SmallVector<const uint32_t *, 4> RegMasks; + SmallVector<const MachineOperand *, 4> RegMaskPtrs; + for (const MachineOperand &MO : MI.operands()) { + // Determine whether the operand is a register def. + if (MO.isReg() && MO.isDef() && MO.getReg() && + Register::isPhysicalRegister(MO.getReg()) && + !(MI.isCall() && MO.getReg() == SP)) { + // Remove ranges of all aliased registers. + for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) + // FIXME: Can we break out of this loop early if no insertion occurs? + DeadRegs.insert(*RAI); + } else if (MO.isRegMask()) { + RegMasks.push_back(MO.getRegMask()); + RegMaskPtrs.push_back(&MO); + } + } + + // Tell MLocTracker about all definitions, of regmasks and otherwise. + for (uint32_t DeadReg : DeadRegs) + MTracker->defReg(DeadReg, CurBB, CurInst); + + for (auto *MO : RegMaskPtrs) + MTracker->writeRegMask(MO, CurBB, CurInst); +} + +void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { + ValueIDNum SrcValue = MTracker->readReg(SrcRegNum); + + MTracker->setReg(DstRegNum, SrcValue); + + // In all circumstances, re-def the super registers. It's definitely a new + // value now. This doesn't uniquely identify the composition of subregs, for + // example, two identical values in subregisters composed in different + // places would not get equal value numbers. + for (MCSuperRegIterator SRI(DstRegNum, TRI); SRI.isValid(); ++SRI) + MTracker->defReg(*SRI, CurBB, CurInst); + + // If we're emulating VarLocBasedImpl, just define all the subregisters. + // DBG_VALUEs of them will expect to be tracked from the DBG_VALUE, not + // through prior copies. + if (EmulateOldLDV) { + for (MCSubRegIndexIterator DRI(DstRegNum, TRI); DRI.isValid(); ++DRI) + MTracker->defReg(DRI.getSubReg(), CurBB, CurInst); + return; + } + + // Otherwise, actually copy subregisters from one location to another. + // XXX: in addition, any subregisters of DstRegNum that don't line up with + // the source register should be def'd. + for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) { + unsigned SrcSubReg = SRI.getSubReg(); + unsigned SubRegIdx = SRI.getSubRegIndex(); + unsigned DstSubReg = TRI->getSubReg(DstRegNum, SubRegIdx); + if (!DstSubReg) + continue; + + // Do copy. There are two matching subregisters, the source value should + // have been def'd when the super-reg was, the latter might not be tracked + // yet. + // This will force SrcSubReg to be tracked, if it isn't yet. + (void)MTracker->readReg(SrcSubReg); + LocIdx SrcL = MTracker->getRegMLoc(SrcSubReg); + assert(SrcL.asU64()); + (void)MTracker->readReg(DstSubReg); + LocIdx DstL = MTracker->getRegMLoc(DstSubReg); + assert(DstL.asU64()); + (void)DstL; + ValueIDNum CpyValue = {SrcValue.getBlock(), SrcValue.getInst(), SrcL}; + + MTracker->setReg(DstSubReg, CpyValue); + } +} + +bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI, + MachineFunction *MF) { + // TODO: Handle multiple stores folded into one. + if (!MI.hasOneMemOperand()) + return false; + + if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII)) + return false; // This is not a spill instruction, since no valid size was + // returned from either function. + + return true; +} + +bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI, + MachineFunction *MF, unsigned &Reg) { + if (!isSpillInstruction(MI, MF)) + return false; + + // XXX FIXME: On x86, isStoreToStackSlotPostFE returns '1' instead of an + // actual register number. + if (ObserveAllStackops) { + int FI; + Reg = TII->isStoreToStackSlotPostFE(MI, FI); + return Reg != 0; + } + + auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { + if (!MO.isReg() || !MO.isUse()) { + Reg = 0; + return false; + } + Reg = MO.getReg(); + return MO.isKill(); + }; + + for (const MachineOperand &MO : MI.operands()) { + // In a spill instruction generated by the InlineSpiller the spilled + // register has its kill flag set. + if (isKilledReg(MO, Reg)) + return true; + if (Reg != 0) { + // Check whether next instruction kills the spilled register. + // FIXME: Current solution does not cover search for killed register in + // bundles and instructions further down the chain. + auto NextI = std::next(MI.getIterator()); + // Skip next instruction that points to basic block end iterator. + if (MI.getParent()->end() == NextI) + continue; + unsigned RegNext; + for (const MachineOperand &MONext : NextI->operands()) { + // Return true if we came across the register from the + // previous spill instruction that is killed in NextI. + if (isKilledReg(MONext, RegNext) && RegNext == Reg) + return true; + } + } + } + // Return false if we didn't find spilled register. + return false; +} + +Optional<SpillLoc> +InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI, + MachineFunction *MF, unsigned &Reg) { + if (!MI.hasOneMemOperand()) + return None; + + // FIXME: Handle folded restore instructions with more than one memory + // operand. + if (MI.getRestoreSize(TII)) { + Reg = MI.getOperand(0).getReg(); + return extractSpillBaseRegAndOffset(MI); + } + return None; +} + +bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { + // XXX -- it's too difficult to implement VarLocBasedImpl's stack location + // limitations under the new model. Therefore, when comparing them, compare + // versions that don't attempt spills or restores at all. + if (EmulateOldLDV) + return false; + + MachineFunction *MF = MI.getMF(); + unsigned Reg; + Optional<SpillLoc> Loc; + + LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump();); + + // First, if there are any DBG_VALUEs pointing at a spill slot that is + // written to, terminate that variable location. The value in memory + // will have changed. DbgEntityHistoryCalculator doesn't try to detect this. + if (isSpillInstruction(MI, MF)) { + Loc = extractSpillBaseRegAndOffset(MI); + + if (TTracker) { + Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc); + if (MLoc) + TTracker->clobberMloc(*MLoc, MI.getIterator()); + } + } + + // Try to recognise spill and restore instructions that may transfer a value. + if (isLocationSpill(MI, MF, Reg)) { + Loc = extractSpillBaseRegAndOffset(MI); + auto ValueID = MTracker->readReg(Reg); + + // If the location is empty, produce a phi, signify it's the live-in value. + if (ValueID.getLoc() == 0) + ValueID = {CurBB, 0, MTracker->getRegMLoc(Reg)}; + + MTracker->setSpill(*Loc, ValueID); + auto OptSpillLocIdx = MTracker->getSpillMLoc(*Loc); + assert(OptSpillLocIdx && "Spill slot set but has no LocIdx?"); + LocIdx SpillLocIdx = *OptSpillLocIdx; + + // Tell TransferTracker about this spill, produce DBG_VALUEs for it. + if (TTracker) + TTracker->transferMlocs(MTracker->getRegMLoc(Reg), SpillLocIdx, + MI.getIterator()); + } else { + if (!(Loc = isRestoreInstruction(MI, MF, Reg))) + return false; + + // Is there a value to be restored? + auto OptValueID = MTracker->readSpill(*Loc); + if (OptValueID) { + ValueIDNum ValueID = *OptValueID; + LocIdx SpillLocIdx = *MTracker->getSpillMLoc(*Loc); + // XXX -- can we recover sub-registers of this value? Until we can, first + // overwrite all defs of the register being restored to. + for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) + MTracker->defReg(*RAI, CurBB, CurInst); + + // Now override the reg we're restoring to. + MTracker->setReg(Reg, ValueID); + + // Report this restore to the transfer tracker too. + if (TTracker) + TTracker->transferMlocs(SpillLocIdx, MTracker->getRegMLoc(Reg), + MI.getIterator()); + } else { + // There isn't anything in the location; not clear if this is a code path + // that still runs. Def this register anyway just in case. + for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) + MTracker->defReg(*RAI, CurBB, CurInst); + + // Force the spill slot to be tracked. + LocIdx L = MTracker->getOrTrackSpillLoc(*Loc); + + // Set the restored value to be a machine phi number, signifying that it's + // whatever the spills live-in value is in this block. Definitely has + // a LocIdx due to the setSpill above. + ValueIDNum ValueID = {CurBB, 0, L}; + MTracker->setReg(Reg, ValueID); + MTracker->setSpill(*Loc, ValueID); + } + } + return true; +} + +bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { + auto DestSrc = TII->isCopyInstr(MI); + if (!DestSrc) + return false; + + const MachineOperand *DestRegOp = DestSrc->Destination; + const MachineOperand *SrcRegOp = DestSrc->Source; + + auto isCalleeSavedReg = [&](unsigned Reg) { + for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) + if (CalleeSavedRegs.test(*RAI)) + return true; + return false; + }; + + Register SrcReg = SrcRegOp->getReg(); + Register DestReg = DestRegOp->getReg(); + + // Ignore identity copies. Yep, these make it as far as LiveDebugValues. + if (SrcReg == DestReg) + return true; + + // For emulating VarLocBasedImpl: + // We want to recognize instructions where destination register is callee + // saved register. If register that could be clobbered by the call is + // included, there would be a great chance that it is going to be clobbered + // soon. It is more likely that previous register, which is callee saved, is + // going to stay unclobbered longer, even if it is killed. + // + // For InstrRefBasedImpl, we can track multiple locations per value, so + // ignore this condition. + if (EmulateOldLDV && !isCalleeSavedReg(DestReg)) + return false; + + // InstrRefBasedImpl only followed killing copies. + if (EmulateOldLDV && !SrcRegOp->isKill()) + return false; + + // Copy MTracker info, including subregs if available. + InstrRefBasedLDV::performCopy(SrcReg, DestReg); + + // Only produce a transfer of DBG_VALUE within a block where old LDV + // would have. We might make use of the additional value tracking in some + // other way, later. + if (TTracker && isCalleeSavedReg(DestReg) && SrcRegOp->isKill()) + TTracker->transferMlocs(MTracker->getRegMLoc(SrcReg), + MTracker->getRegMLoc(DestReg), MI.getIterator()); + + // VarLocBasedImpl would quit tracking the old location after copying. + if (EmulateOldLDV && SrcReg != DestReg) + MTracker->defReg(SrcReg, CurBB, CurInst); + + return true; +} + +/// Accumulate a mapping between each DILocalVariable fragment and other +/// fragments of that DILocalVariable which overlap. This reduces work during +/// the data-flow stage from "Find any overlapping fragments" to "Check if the +/// known-to-overlap fragments are present". +/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for +/// fragment usage. +void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { + DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + FragmentInfo ThisFragment = MIVar.getFragmentOrDefault(); + + // If this is the first sighting of this variable, then we are guaranteed + // there are currently no overlapping fragments either. Initialize the set + // of seen fragments, record no overlaps for the current one, and return. + auto SeenIt = SeenFragments.find(MIVar.getVariable()); + if (SeenIt == SeenFragments.end()) { + SmallSet<FragmentInfo, 4> OneFragment; + OneFragment.insert(ThisFragment); + SeenFragments.insert({MIVar.getVariable(), OneFragment}); + + OverlapFragments.insert({{MIVar.getVariable(), ThisFragment}, {}}); + return; + } + + // If this particular Variable/Fragment pair already exists in the overlap + // map, it has already been accounted for. + auto IsInOLapMap = + OverlapFragments.insert({{MIVar.getVariable(), ThisFragment}, {}}); + if (!IsInOLapMap.second) + return; + + auto &ThisFragmentsOverlaps = IsInOLapMap.first->second; + auto &AllSeenFragments = SeenIt->second; + + // Otherwise, examine all other seen fragments for this variable, with "this" + // fragment being a previously unseen fragment. Record any pair of + // overlapping fragments. + for (auto &ASeenFragment : AllSeenFragments) { + // Does this previously seen fragment overlap? + if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) { + // Yes: Mark the current fragment as being overlapped. + ThisFragmentsOverlaps.push_back(ASeenFragment); + // Mark the previously seen fragment as being overlapped by the current + // one. + auto ASeenFragmentsOverlaps = + OverlapFragments.find({MIVar.getVariable(), ASeenFragment}); + assert(ASeenFragmentsOverlaps != OverlapFragments.end() && + "Previously seen var fragment has no vector of overlaps"); + ASeenFragmentsOverlaps->second.push_back(ThisFragment); + } + } + + AllSeenFragments.insert(ThisFragment); +} + +void InstrRefBasedLDV::process(MachineInstr &MI) { + // Try to interpret an MI as a debug or transfer instruction. Only if it's + // none of these should we interpret it's register defs as new value + // definitions. + if (transferDebugValue(MI)) + return; + if (transferDebugInstrRef(MI)) + return; + if (transferRegisterCopy(MI)) + return; + if (transferSpillOrRestoreInst(MI)) + return; + transferRegisterDef(MI); +} + +void InstrRefBasedLDV::produceMLocTransferFunction( + MachineFunction &MF, SmallVectorImpl<MLocTransferMap> &MLocTransfer, + unsigned MaxNumBlocks) { + // Because we try to optimize around register mask operands by ignoring regs + // that aren't currently tracked, we set up something ugly for later: RegMask + // operands that are seen earlier than the first use of a register, still need + // to clobber that register in the transfer function. But this information + // isn't actively recorded. Instead, we track each RegMask used in each block, + // and accumulated the clobbered but untracked registers in each block into + // the following bitvector. Later, if new values are tracked, we can add + // appropriate clobbers. + SmallVector<BitVector, 32> BlockMasks; + BlockMasks.resize(MaxNumBlocks); + + // Reserve one bit per register for the masks described above. + unsigned BVWords = MachineOperand::getRegMaskSize(TRI->getNumRegs()); + for (auto &BV : BlockMasks) + BV.resize(TRI->getNumRegs(), true); + + // Step through all instructions and inhale the transfer function. + for (auto &MBB : MF) { + // Object fields that are read by trackers to know where we are in the + // function. + CurBB = MBB.getNumber(); + CurInst = 1; + + // Set all machine locations to a PHI value. For transfer function + // production only, this signifies the live-in value to the block. + MTracker->reset(); + MTracker->setMPhis(CurBB); + + // Step through each instruction in this block. + for (auto &MI : MBB) { + process(MI); + // Also accumulate fragment map. + if (MI.isDebugValue()) + accumulateFragmentMap(MI); + + // Create a map from the instruction number (if present) to the + // MachineInstr and its position. + if (uint64_t InstrNo = MI.peekDebugInstrNum()) { + auto InstrAndPos = std::make_pair(&MI, CurInst); + auto InsertResult = + DebugInstrNumToInstr.insert(std::make_pair(InstrNo, InstrAndPos)); + + // There should never be duplicate instruction numbers. + assert(InsertResult.second); + (void)InsertResult; + } + + ++CurInst; + } + + // Produce the transfer function, a map of machine location to new value. If + // any machine location has the live-in phi value from the start of the + // block, it's live-through and doesn't need recording in the transfer + // function. + for (auto Location : MTracker->locations()) { + LocIdx Idx = Location.Idx; + ValueIDNum &P = Location.Value; + if (P.isPHI() && P.getLoc() == Idx.asU64()) + continue; + + // Insert-or-update. + auto &TransferMap = MLocTransfer[CurBB]; + auto Result = TransferMap.insert(std::make_pair(Idx.asU64(), P)); + if (!Result.second) + Result.first->second = P; + } + + // Accumulate any bitmask operands into the clobberred reg mask for this + // block. + for (auto &P : MTracker->Masks) { + BlockMasks[CurBB].clearBitsNotInMask(P.first->getRegMask(), BVWords); + } + } + + // Compute a bitvector of all the registers that are tracked in this block. + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); + Register SP = TLI->getStackPointerRegisterToSaveRestore(); + BitVector UsedRegs(TRI->getNumRegs()); + for (auto Location : MTracker->locations()) { + unsigned ID = MTracker->LocIdxToLocID[Location.Idx]; + if (ID >= TRI->getNumRegs() || ID == SP) + continue; + UsedRegs.set(ID); + } + + // Check that any regmask-clobber of a register that gets tracked, is not + // live-through in the transfer function. It needs to be clobbered at the + // very least. + for (unsigned int I = 0; I < MaxNumBlocks; ++I) { + BitVector &BV = BlockMasks[I]; + BV.flip(); + BV &= UsedRegs; + // This produces all the bits that we clobber, but also use. Check that + // they're all clobbered or at least set in the designated transfer + // elem. + for (unsigned Bit : BV.set_bits()) { + unsigned ID = MTracker->getLocID(Bit, false); + LocIdx Idx = MTracker->LocIDToLocIdx[ID]; + auto &TransferMap = MLocTransfer[I]; + + // Install a value representing the fact that this location is effectively + // written to in this block. As there's no reserved value, instead use + // a value number that is never generated. Pick the value number for the + // first instruction in the block, def'ing this location, which we know + // this block never used anyway. + ValueIDNum NotGeneratedNum = ValueIDNum(I, 1, Idx); + auto Result = + TransferMap.insert(std::make_pair(Idx.asU64(), NotGeneratedNum)); + if (!Result.second) { + ValueIDNum &ValueID = Result.first->second; + if (ValueID.getBlock() == I && ValueID.isPHI()) + // It was left as live-through. Set it to clobbered. + ValueID = NotGeneratedNum; + } + } + } +} + +std::tuple<bool, bool> +InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB, + SmallPtrSet<const MachineBasicBlock *, 16> &Visited, + ValueIDNum **OutLocs, ValueIDNum *InLocs) { + LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); + bool Changed = false; + bool DowngradeOccurred = false; + + // Collect predecessors that have been visited. Anything that hasn't been + // visited yet is a backedge on the first iteration, and the meet of it's + // lattice value for all locations will be unaffected. + SmallVector<const MachineBasicBlock *, 8> BlockOrders; + for (auto Pred : MBB.predecessors()) { + if (Visited.count(Pred)) { + BlockOrders.push_back(Pred); + } + } + + // Visit predecessors in RPOT order. + auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { + return BBToOrder.find(A)->second < BBToOrder.find(B)->second; + }; + llvm::sort(BlockOrders, Cmp); + + // Skip entry block. + if (BlockOrders.size() == 0) + return std::tuple<bool, bool>(false, false); + + // Step through all machine locations, then look at each predecessor and + // detect disagreements. + unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second; + for (auto Location : MTracker->locations()) { + LocIdx Idx = Location.Idx; + // Pick out the first predecessors live-out value for this location. It's + // guaranteed to be not a backedge, as we order by RPO. + ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()]; + + // Some flags for whether there's a disagreement, and whether it's a + // disagreement with a backedge or not. + bool Disagree = false; + bool NonBackEdgeDisagree = false; + + // Loop around everything that wasn't 'base'. + for (unsigned int I = 1; I < BlockOrders.size(); ++I) { + auto *MBB = BlockOrders[I]; + if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) { + // Live-out of a predecessor disagrees with the first predecessor. + Disagree = true; + + // Test whether it's a disagreemnt in the backedges or not. + if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e + NonBackEdgeDisagree = true; + } + } + + bool OverRide = false; + if (Disagree && !NonBackEdgeDisagree) { + // Only the backedges disagree. Consider demoting the livein + // lattice value, as per the file level comment. The value we consider + // demoting to is the value that the non-backedge predecessors agree on. + // The order of values is that non-PHIs are \top, a PHI at this block + // \bot, and phis between the two are ordered by their RPO number. + // If there's no agreement, or we've already demoted to this PHI value + // before, replace with a PHI value at this block. + + // Calculate order numbers: zero means normal def, nonzero means RPO + // number. + unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1; + if (!BaseVal.isPHI()) + BaseBlockRPONum = 0; + + ValueIDNum &InLocID = InLocs[Idx.asU64()]; + unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1; + if (!InLocID.isPHI()) + InLocRPONum = 0; + + // Should we ignore the disagreeing backedges, and override with the + // value the other predecessors agree on (in "base")? + unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1; + if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) { + // Override. + OverRide = true; + DowngradeOccurred = true; + } + } + // else: if we disagree in the non-backedges, then this is definitely + // a control flow merge where different values merge. Make it a PHI. + + // Generate a phi... + ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx}; + ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal; + if (InLocs[Idx.asU64()] != NewVal) { + Changed |= true; + InLocs[Idx.asU64()] = NewVal; + } + } + + // TODO: Reimplement NumInserted and NumRemoved. + return std::tuple<bool, bool>(Changed, DowngradeOccurred); +} + +void InstrRefBasedLDV::mlocDataflow( + ValueIDNum **MInLocs, ValueIDNum **MOutLocs, + SmallVectorImpl<MLocTransferMap> &MLocTransfer) { + std::priority_queue<unsigned int, std::vector<unsigned int>, + std::greater<unsigned int>> + Worklist, Pending; + + // We track what is on the current and pending worklist to avoid inserting + // the same thing twice. We could avoid this with a custom priority queue, + // but this is probably not worth it. + SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist; + + // Initialize worklist with every block to be visited. + for (unsigned int I = 0; I < BBToOrder.size(); ++I) { + Worklist.push(I); + OnWorklist.insert(OrderToBB[I]); + } + + MTracker->reset(); + + // Set inlocs for entry block -- each as a PHI at the entry block. Represents + // the incoming value to the function. + MTracker->setMPhis(0); + for (auto Location : MTracker->locations()) + MInLocs[0][Location.Idx.asU64()] = Location.Value; + + SmallPtrSet<const MachineBasicBlock *, 16> Visited; + while (!Worklist.empty() || !Pending.empty()) { + // Vector for storing the evaluated block transfer function. + SmallVector<std::pair<LocIdx, ValueIDNum>, 32> ToRemap; + + while (!Worklist.empty()) { + MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; + CurBB = MBB->getNumber(); + Worklist.pop(); + + // Join the values in all predecessor blocks. + bool InLocsChanged, DowngradeOccurred; + std::tie(InLocsChanged, DowngradeOccurred) = + mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]); + InLocsChanged |= Visited.insert(MBB).second; + + // If a downgrade occurred, book us in for re-examination on the next + // iteration. + if (DowngradeOccurred && OnPending.insert(MBB).second) + Pending.push(BBToOrder[MBB]); + + // Don't examine transfer function if we've visited this loc at least + // once, and inlocs haven't changed. + if (!InLocsChanged) + continue; + + // Load the current set of live-ins into MLocTracker. + MTracker->loadFromArray(MInLocs[CurBB], CurBB); + + // Each element of the transfer function can be a new def, or a read of + // a live-in value. Evaluate each element, and store to "ToRemap". + ToRemap.clear(); + for (auto &P : MLocTransfer[CurBB]) { + if (P.second.getBlock() == CurBB && P.second.isPHI()) { + // This is a movement of whatever was live in. Read it. + ValueIDNum NewID = MTracker->getNumAtPos(P.second.getLoc()); + ToRemap.push_back(std::make_pair(P.first, NewID)); + } else { + // It's a def. Just set it. + assert(P.second.getBlock() == CurBB); + ToRemap.push_back(std::make_pair(P.first, P.second)); + } + } + + // Commit the transfer function changes into mloc tracker, which + // transforms the contents of the MLocTracker into the live-outs. + for (auto &P : ToRemap) + MTracker->setMLoc(P.first, P.second); + + // Now copy out-locs from mloc tracker into out-loc vector, checking + // whether changes have occurred. These changes can have come from both + // the transfer function, and mlocJoin. + bool OLChanged = false; + for (auto Location : MTracker->locations()) { + OLChanged |= MOutLocs[CurBB][Location.Idx.asU64()] != Location.Value; + MOutLocs[CurBB][Location.Idx.asU64()] = Location.Value; + } + + MTracker->reset(); + + // No need to examine successors again if out-locs didn't change. + if (!OLChanged) + continue; + + // All successors should be visited: put any back-edges on the pending + // list for the next dataflow iteration, and any other successors to be + // visited this iteration, if they're not going to be already. + for (auto s : MBB->successors()) { + // Does branching to this successor represent a back-edge? + if (BBToOrder[s] > BBToOrder[MBB]) { + // No: visit it during this dataflow iteration. + if (OnWorklist.insert(s).second) + Worklist.push(BBToOrder[s]); + } else { + // Yes: visit it on the next iteration. + if (OnPending.insert(s).second) + Pending.push(BBToOrder[s]); + } + } + } + + Worklist.swap(Pending); + std::swap(OnPending, OnWorklist); + OnPending.clear(); + // At this point, pending must be empty, since it was just the empty + // worklist + assert(Pending.empty() && "Pending should be empty"); + } + + // Once all the live-ins don't change on mlocJoin(), we've reached a + // fixedpoint. +} + +bool InstrRefBasedLDV::vlocDowngradeLattice( + const MachineBasicBlock &MBB, const DbgValue &OldLiveInLocation, + const SmallVectorImpl<InValueT> &Values, unsigned CurBlockRPONum) { + // Ranking value preference: see file level comment, the highest rank is + // a plain def, followed by PHI values in reverse post-order. Numerically, + // we assign all defs the rank '0', all PHIs their blocks RPO number plus + // one, and consider the lowest value the highest ranked. + int OldLiveInRank = BBNumToRPO[OldLiveInLocation.ID.getBlock()] + 1; + if (!OldLiveInLocation.ID.isPHI()) + OldLiveInRank = 0; + + // Allow any unresolvable conflict to be over-ridden. + if (OldLiveInLocation.Kind == DbgValue::NoVal) { + // Although if it was an unresolvable conflict from _this_ block, then + // all other seeking of downgrades and PHIs must have failed before hand. + if (OldLiveInLocation.BlockNo == (unsigned)MBB.getNumber()) + return false; + OldLiveInRank = INT_MIN; + } + + auto &InValue = *Values[0].second; + + if (InValue.Kind == DbgValue::Const || InValue.Kind == DbgValue::NoVal) + return false; + + unsigned ThisRPO = BBNumToRPO[InValue.ID.getBlock()]; + int ThisRank = ThisRPO + 1; + if (!InValue.ID.isPHI()) + ThisRank = 0; + + // Too far down the lattice? + if (ThisRPO >= CurBlockRPONum) + return false; + + // Higher in the lattice than what we've already explored? + if (ThisRank <= OldLiveInRank) + return false; + + return true; +} + +std::tuple<Optional<ValueIDNum>, bool> InstrRefBasedLDV::pickVPHILoc( + MachineBasicBlock &MBB, const DebugVariable &Var, const LiveIdxT &LiveOuts, + ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + const SmallVectorImpl<MachineBasicBlock *> &BlockOrders) { + // Collect a set of locations from predecessor where its live-out value can + // be found. + SmallVector<SmallVector<LocIdx, 4>, 8> Locs; + unsigned NumLocs = MTracker->getNumLocs(); + unsigned BackEdgesStart = 0; + + for (auto p : BlockOrders) { + // Pick out where backedges start in the list of predecessors. Relies on + // BlockOrders being sorted by RPO. + if (BBToOrder[p] < BBToOrder[&MBB]) + ++BackEdgesStart; + + // For each predecessor, create a new set of locations. + Locs.resize(Locs.size() + 1); + unsigned ThisBBNum = p->getNumber(); + auto LiveOutMap = LiveOuts.find(p); + if (LiveOutMap == LiveOuts.end()) + // This predecessor isn't in scope, it must have no live-in/live-out + // locations. + continue; + + auto It = LiveOutMap->second->find(Var); + if (It == LiveOutMap->second->end()) + // There's no value recorded for this variable in this predecessor, + // leave an empty set of locations. + continue; + + const DbgValue &OutVal = It->second; + + if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal) + // Consts and no-values cannot have locations we can join on. + continue; + + assert(OutVal.Kind == DbgValue::Proposed || OutVal.Kind == DbgValue::Def); + ValueIDNum ValToLookFor = OutVal.ID; + + // Search the live-outs of the predecessor for the specified value. + for (unsigned int I = 0; I < NumLocs; ++I) { + if (MOutLocs[ThisBBNum][I] == ValToLookFor) + Locs.back().push_back(LocIdx(I)); + } + } + + // If there were no locations at all, return an empty result. + if (Locs.empty()) + return std::tuple<Optional<ValueIDNum>, bool>(None, false); + + // Lambda for seeking a common location within a range of location-sets. + using LocsIt = SmallVector<SmallVector<LocIdx, 4>, 8>::iterator; + auto SeekLocation = + [&Locs](llvm::iterator_range<LocsIt> SearchRange) -> Optional<LocIdx> { + // Starting with the first set of locations, take the intersection with + // subsequent sets. + SmallVector<LocIdx, 4> base = Locs[0]; + for (auto &S : SearchRange) { + SmallVector<LocIdx, 4> new_base; + std::set_intersection(base.begin(), base.end(), S.begin(), S.end(), + std::inserter(new_base, new_base.begin())); + base = new_base; + } + if (base.empty()) + return None; + + // We now have a set of LocIdxes that contain the right output value in + // each of the predecessors. Pick the lowest; if there's a register loc, + // that'll be it. + return *base.begin(); + }; + + // Search for a common location for all predecessors. If we can't, then fall + // back to only finding a common location between non-backedge predecessors. + bool ValidForAllLocs = true; + auto TheLoc = SeekLocation(Locs); + if (!TheLoc) { + ValidForAllLocs = false; + TheLoc = + SeekLocation(make_range(Locs.begin(), Locs.begin() + BackEdgesStart)); + } + + if (!TheLoc) + return std::tuple<Optional<ValueIDNum>, bool>(None, false); + + // Return a PHI-value-number for the found location. + LocIdx L = *TheLoc; + ValueIDNum PHIVal = {(unsigned)MBB.getNumber(), 0, L}; + return std::tuple<Optional<ValueIDNum>, bool>(PHIVal, ValidForAllLocs); +} + +std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin( + MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs, + SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, unsigned BBNum, + const SmallSet<DebugVariable, 4> &AllVars, ValueIDNum **MOutLocs, + ValueIDNum **MInLocs, + SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, + SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, + DenseMap<DebugVariable, DbgValue> &InLocsT) { + bool DowngradeOccurred = false; + + // To emulate VarLocBasedImpl, process this block if it's not in scope but + // _does_ assign a variable value. No live-ins for this scope are transferred + // in though, so we can return immediately. + if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) { + if (VLOCVisited) + return std::tuple<bool, bool>(true, false); + return std::tuple<bool, bool>(false, false); + } + + LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); + bool Changed = false; + + // Find any live-ins computed in a prior iteration. + auto ILSIt = VLOCInLocs.find(&MBB); + assert(ILSIt != VLOCInLocs.end()); + auto &ILS = *ILSIt->second; + + // Order predecessors by RPOT order, for exploring them in that order. + SmallVector<MachineBasicBlock *, 8> BlockOrders; + for (auto p : MBB.predecessors()) + BlockOrders.push_back(p); + + auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) { + return BBToOrder[A] < BBToOrder[B]; + }; + + llvm::sort(BlockOrders, Cmp); + + unsigned CurBlockRPONum = BBToOrder[&MBB]; + + // Force a re-visit to loop heads in the first dataflow iteration. + // FIXME: if we could "propose" Const values this wouldn't be needed, + // because they'd need to be confirmed before being emitted. + if (!BlockOrders.empty() && + BBToOrder[BlockOrders[BlockOrders.size() - 1]] >= CurBlockRPONum && + VLOCVisited) + DowngradeOccurred = true; + + auto ConfirmValue = [&InLocsT](const DebugVariable &DV, DbgValue VR) { + auto Result = InLocsT.insert(std::make_pair(DV, VR)); + (void)Result; + assert(Result.second); + }; + + auto ConfirmNoVal = [&ConfirmValue, &MBB](const DebugVariable &Var, const DbgValueProperties &Properties) { + DbgValue NoLocPHIVal(MBB.getNumber(), Properties, DbgValue::NoVal); + + ConfirmValue(Var, NoLocPHIVal); + }; + + // Attempt to join the values for each variable. + for (auto &Var : AllVars) { + // Collect all the DbgValues for this variable. + SmallVector<InValueT, 8> Values; + bool Bail = false; + unsigned BackEdgesStart = 0; + for (auto p : BlockOrders) { + // If the predecessor isn't in scope / to be explored, we'll never be + // able to join any locations. + if (!BlocksToExplore.contains(p)) { + Bail = true; + break; + } + + // Don't attempt to handle unvisited predecessors: they're implicitly + // "unknown"s in the lattice. + if (VLOCVisited && !VLOCVisited->count(p)) + continue; + + // If the predecessors OutLocs is absent, there's not much we can do. + auto OL = VLOCOutLocs.find(p); + if (OL == VLOCOutLocs.end()) { + Bail = true; + break; + } + + // No live-out value for this predecessor also means we can't produce + // a joined value. + auto VIt = OL->second->find(Var); + if (VIt == OL->second->end()) { + Bail = true; + break; + } + + // Keep track of where back-edges begin in the Values vector. Relies on + // BlockOrders being sorted by RPO. + unsigned ThisBBRPONum = BBToOrder[p]; + if (ThisBBRPONum < CurBlockRPONum) + ++BackEdgesStart; + + Values.push_back(std::make_pair(p, &VIt->second)); + } + + // If there were no values, or one of the predecessors couldn't have a + // value, then give up immediately. It's not safe to produce a live-in + // value. + if (Bail || Values.size() == 0) + continue; + + // Enumeration identifying the current state of the predecessors values. + enum { + Unset = 0, + Agreed, // All preds agree on the variable value. + PropDisagree, // All preds agree, but the value kind is Proposed in some. + BEDisagree, // Only back-edges disagree on variable value. + PHINeeded, // Non-back-edge predecessors have conflicing values. + NoSolution // Conflicting Value metadata makes solution impossible. + } OurState = Unset; + + // All (non-entry) blocks have at least one non-backedge predecessor. + // Pick the variable value from the first of these, to compare against + // all others. + const DbgValue &FirstVal = *Values[0].second; + const ValueIDNum &FirstID = FirstVal.ID; + + // Scan for variable values that can't be resolved: if they have different + // DIExpressions, different indirectness, or are mixed constants / + // non-constants. + for (auto &V : Values) { + if (V.second->Properties != FirstVal.Properties) + OurState = NoSolution; + if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const) + OurState = NoSolution; + } + + // Flags diagnosing _how_ the values disagree. + bool NonBackEdgeDisagree = false; + bool DisagreeOnPHINess = false; + bool IDDisagree = false; + bool Disagree = false; + if (OurState == Unset) { + for (auto &V : Values) { + if (*V.second == FirstVal) + continue; // No disagreement. + + Disagree = true; + + // Flag whether the value number actually diagrees. + if (V.second->ID != FirstID) + IDDisagree = true; + + // Distinguish whether disagreement happens in backedges or not. + // Relies on Values (and BlockOrders) being sorted by RPO. + unsigned ThisBBRPONum = BBToOrder[V.first]; + if (ThisBBRPONum < CurBlockRPONum) + NonBackEdgeDisagree = true; + + // Is there a difference in whether the value is definite or only + // proposed? + if (V.second->Kind != FirstVal.Kind && + (V.second->Kind == DbgValue::Proposed || + V.second->Kind == DbgValue::Def) && + (FirstVal.Kind == DbgValue::Proposed || + FirstVal.Kind == DbgValue::Def)) + DisagreeOnPHINess = true; + } + + // Collect those flags together and determine an overall state for + // what extend the predecessors agree on a live-in value. + if (!Disagree) + OurState = Agreed; + else if (!IDDisagree && DisagreeOnPHINess) + OurState = PropDisagree; + else if (!NonBackEdgeDisagree) + OurState = BEDisagree; + else + OurState = PHINeeded; + } + + // An extra indicator: if we only disagree on whether the value is a + // Def, or proposed, then also flag whether that disagreement happens + // in backedges only. + bool PropOnlyInBEs = Disagree && !IDDisagree && DisagreeOnPHINess && + !NonBackEdgeDisagree && FirstVal.Kind == DbgValue::Def; + + const auto &Properties = FirstVal.Properties; + + auto OldLiveInIt = ILS.find(Var); + const DbgValue *OldLiveInLocation = + (OldLiveInIt != ILS.end()) ? &OldLiveInIt->second : nullptr; + + bool OverRide = false; + if (OurState == BEDisagree && OldLiveInLocation) { + // Only backedges disagree: we can consider downgrading. If there was a + // previous live-in value, use it to work out whether the current + // incoming value represents a lattice downgrade or not. + OverRide = + vlocDowngradeLattice(MBB, *OldLiveInLocation, Values, CurBlockRPONum); + } + + // Use the current state of predecessor agreement and other flags to work + // out what to do next. Possibilities include: + // * Accept a value all predecessors agree on, or accept one that + // represents a step down the exploration lattice, + // * Use a PHI value number, if one can be found, + // * Propose a PHI value number, and see if it gets confirmed later, + // * Emit a 'NoVal' value, indicating we couldn't resolve anything. + if (OurState == Agreed) { + // Easiest solution: all predecessors agree on the variable value. + ConfirmValue(Var, FirstVal); + } else if (OurState == BEDisagree && OverRide) { + // Only backedges disagree, and the other predecessors have produced + // a new live-in value further down the exploration lattice. + DowngradeOccurred = true; + ConfirmValue(Var, FirstVal); + } else if (OurState == PropDisagree) { + // Predecessors agree on value, but some say it's only a proposed value. + // Propagate it as proposed: unless it was proposed in this block, in + // which case we're able to confirm the value. + if (FirstID.getBlock() == (uint64_t)MBB.getNumber() && FirstID.isPHI()) { + ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def)); + } else if (PropOnlyInBEs) { + // If only backedges disagree, a higher (in RPO) block confirmed this + // location, and we need to propagate it into this loop. + ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def)); + } else { + // Otherwise; a Def meeting a Proposed is still a Proposed. + ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Proposed)); + } + } else if ((OurState == PHINeeded || OurState == BEDisagree)) { + // Predecessors disagree and can't be downgraded: this can only be + // solved with a PHI. Use pickVPHILoc to go look for one. + Optional<ValueIDNum> VPHI; + bool AllEdgesVPHI = false; + std::tie(VPHI, AllEdgesVPHI) = + pickVPHILoc(MBB, Var, VLOCOutLocs, MOutLocs, MInLocs, BlockOrders); + + if (VPHI && AllEdgesVPHI) { + // There's a PHI value that's valid for all predecessors -- we can use + // it. If any of the non-backedge predecessors have proposed values + // though, this PHI is also only proposed, until the predecessors are + // confirmed. + DbgValue::KindT K = DbgValue::Def; + for (unsigned int I = 0; I < BackEdgesStart; ++I) + if (Values[I].second->Kind == DbgValue::Proposed) + K = DbgValue::Proposed; + + ConfirmValue(Var, DbgValue(*VPHI, Properties, K)); + } else if (VPHI) { + // There's a PHI value, but it's only legal for backedges. Leave this + // as a proposed PHI value: it might come back on the backedges, + // and allow us to confirm it in the future. + DbgValue NoBEValue = DbgValue(*VPHI, Properties, DbgValue::Proposed); + ConfirmValue(Var, NoBEValue); + } else { + ConfirmNoVal(Var, Properties); + } + } else { + // Otherwise: we don't know. Emit a "phi but no real loc" phi. + ConfirmNoVal(Var, Properties); + } + } + + // Store newly calculated in-locs into VLOCInLocs, if they've changed. + Changed = ILS != InLocsT; + if (Changed) + ILS = InLocsT; + + return std::tuple<bool, bool>(Changed, DowngradeOccurred); +} + +void InstrRefBasedLDV::vlocDataflow( + const LexicalScope *Scope, const DILocation *DILoc, + const SmallSet<DebugVariable, 4> &VarsWeCareAbout, + SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output, + ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + SmallVectorImpl<VLocTracker> &AllTheVLocs) { + // This method is much like mlocDataflow: but focuses on a single + // LexicalScope at a time. Pick out a set of blocks and variables that are + // to have their value assignments solved, then run our dataflow algorithm + // until a fixedpoint is reached. + std::priority_queue<unsigned int, std::vector<unsigned int>, + std::greater<unsigned int>> + Worklist, Pending; + SmallPtrSet<MachineBasicBlock *, 16> OnWorklist, OnPending; + + // The set of blocks we'll be examining. + SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore; + + // The order in which to examine them (RPO). + SmallVector<MachineBasicBlock *, 8> BlockOrders; + + // RPO ordering function. + auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) { + return BBToOrder[A] < BBToOrder[B]; + }; + + LS.getMachineBasicBlocks(DILoc, BlocksToExplore); + + // A separate container to distinguish "blocks we're exploring" versus + // "blocks that are potentially in scope. See comment at start of vlocJoin. + SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore; + + // Old LiveDebugValues tracks variable locations that come out of blocks + // not in scope, where DBG_VALUEs occur. This is something we could + // legitimately ignore, but lets allow it for now. + if (EmulateOldLDV) + BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end()); + + // We also need to propagate variable values through any artificial blocks + // that immediately follow blocks in scope. + DenseSet<const MachineBasicBlock *> ToAdd; + + // Helper lambda: For a given block in scope, perform a depth first search + // of all the artificial successors, adding them to the ToAdd collection. + auto AccumulateArtificialBlocks = + [this, &ToAdd, &BlocksToExplore, + &InScopeBlocks](const MachineBasicBlock *MBB) { + // Depth-first-search state: each node is a block and which successor + // we're currently exploring. + SmallVector<std::pair<const MachineBasicBlock *, + MachineBasicBlock::const_succ_iterator>, + 8> + DFS; + + // Find any artificial successors not already tracked. + for (auto *succ : MBB->successors()) { + if (BlocksToExplore.count(succ) || InScopeBlocks.count(succ)) + continue; + if (!ArtificialBlocks.count(succ)) + continue; + DFS.push_back(std::make_pair(succ, succ->succ_begin())); + ToAdd.insert(succ); + } + + // Search all those blocks, depth first. + while (!DFS.empty()) { + const MachineBasicBlock *CurBB = DFS.back().first; + MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second; + // Walk back if we've explored this blocks successors to the end. + if (CurSucc == CurBB->succ_end()) { + DFS.pop_back(); + continue; + } + + // If the current successor is artificial and unexplored, descend into + // it. + if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) { + DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin())); + ToAdd.insert(*CurSucc); + continue; + } + + ++CurSucc; + } + }; + + // Search in-scope blocks and those containing a DBG_VALUE from this scope + // for artificial successors. + for (auto *MBB : BlocksToExplore) + AccumulateArtificialBlocks(MBB); + for (auto *MBB : InScopeBlocks) + AccumulateArtificialBlocks(MBB); + + BlocksToExplore.insert(ToAdd.begin(), ToAdd.end()); + InScopeBlocks.insert(ToAdd.begin(), ToAdd.end()); + + // Single block scope: not interesting! No propagation at all. Note that + // this could probably go above ArtificialBlocks without damage, but + // that then produces output differences from original-live-debug-values, + // which propagates from a single block into many artificial ones. + if (BlocksToExplore.size() == 1) + return; + + // Picks out relevants blocks RPO order and sort them. + for (auto *MBB : BlocksToExplore) + BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB)); + + llvm::sort(BlockOrders, Cmp); + unsigned NumBlocks = BlockOrders.size(); + + // Allocate some vectors for storing the live ins and live outs. Large. + SmallVector<DenseMap<DebugVariable, DbgValue>, 32> LiveIns, LiveOuts; + LiveIns.resize(NumBlocks); + LiveOuts.resize(NumBlocks); + + // Produce by-MBB indexes of live-in/live-outs, to ease lookup within + // vlocJoin. + LiveIdxT LiveOutIdx, LiveInIdx; + LiveOutIdx.reserve(NumBlocks); + LiveInIdx.reserve(NumBlocks); + for (unsigned I = 0; I < NumBlocks; ++I) { + LiveOutIdx[BlockOrders[I]] = &LiveOuts[I]; + LiveInIdx[BlockOrders[I]] = &LiveIns[I]; + } + + for (auto *MBB : BlockOrders) { + Worklist.push(BBToOrder[MBB]); + OnWorklist.insert(MBB); + } + + // Iterate over all the blocks we selected, propagating variable values. + bool FirstTrip = true; + SmallPtrSet<const MachineBasicBlock *, 16> VLOCVisited; + while (!Worklist.empty() || !Pending.empty()) { + while (!Worklist.empty()) { + auto *MBB = OrderToBB[Worklist.top()]; + CurBB = MBB->getNumber(); + Worklist.pop(); + + DenseMap<DebugVariable, DbgValue> JoinedInLocs; + + // Join values from predecessors. Updates LiveInIdx, and writes output + // into JoinedInLocs. + bool InLocsChanged, DowngradeOccurred; + std::tie(InLocsChanged, DowngradeOccurred) = vlocJoin( + *MBB, LiveOutIdx, LiveInIdx, (FirstTrip) ? &VLOCVisited : nullptr, + CurBB, VarsWeCareAbout, MOutLocs, MInLocs, InScopeBlocks, + BlocksToExplore, JoinedInLocs); + + bool FirstVisit = VLOCVisited.insert(MBB).second; + + // Always explore transfer function if inlocs changed, or if we've not + // visited this block before. + InLocsChanged |= FirstVisit; + + // If a downgrade occurred, book us in for re-examination on the next + // iteration. + if (DowngradeOccurred && OnPending.insert(MBB).second) + Pending.push(BBToOrder[MBB]); + + if (!InLocsChanged) + continue; + + // Do transfer function. + auto &VTracker = AllTheVLocs[MBB->getNumber()]; + for (auto &Transfer : VTracker.Vars) { + // Is this var we're mangling in this scope? + if (VarsWeCareAbout.count(Transfer.first)) { + // Erase on empty transfer (DBG_VALUE $noreg). + if (Transfer.second.Kind == DbgValue::Undef) { + JoinedInLocs.erase(Transfer.first); + } else { + // Insert new variable value; or overwrite. + auto NewValuePair = std::make_pair(Transfer.first, Transfer.second); + auto Result = JoinedInLocs.insert(NewValuePair); + if (!Result.second) + Result.first->second = Transfer.second; + } + } + } + + // Did the live-out locations change? + bool OLChanged = JoinedInLocs != *LiveOutIdx[MBB]; + + // If they haven't changed, there's no need to explore further. + if (!OLChanged) + continue; + + // Commit to the live-out record. + *LiveOutIdx[MBB] = JoinedInLocs; + + // We should visit all successors. Ensure we'll visit any non-backedge + // successors during this dataflow iteration; book backedge successors + // to be visited next time around. + for (auto s : MBB->successors()) { + // Ignore out of scope / not-to-be-explored successors. + if (LiveInIdx.find(s) == LiveInIdx.end()) + continue; + + if (BBToOrder[s] > BBToOrder[MBB]) { + if (OnWorklist.insert(s).second) + Worklist.push(BBToOrder[s]); + } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) { + Pending.push(BBToOrder[s]); + } + } + } + Worklist.swap(Pending); + std::swap(OnWorklist, OnPending); + OnPending.clear(); + assert(Pending.empty()); + FirstTrip = false; + } + + // Dataflow done. Now what? Save live-ins. Ignore any that are still marked + // as being variable-PHIs, because those did not have their machine-PHI + // value confirmed. Such variable values are places that could have been + // PHIs, but are not. + for (auto *MBB : BlockOrders) { + auto &VarMap = *LiveInIdx[MBB]; + for (auto &P : VarMap) { + if (P.second.Kind == DbgValue::Proposed || + P.second.Kind == DbgValue::NoVal) + continue; + Output[MBB->getNumber()].push_back(P); + } + } + + BlockOrders.clear(); + BlocksToExplore.clear(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void InstrRefBasedLDV::dump_mloc_transfer( + const MLocTransferMap &mloc_transfer) const { + for (auto &P : mloc_transfer) { + std::string foo = MTracker->LocIdxToName(P.first); + std::string bar = MTracker->IDAsString(P.second); + dbgs() << "Loc " << foo << " --> " << bar << "\n"; + } +} +#endif + +void InstrRefBasedLDV::emitLocations( + MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MInLocs, + DenseMap<DebugVariable, unsigned> &AllVarsNumbering) { + TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs); + unsigned NumLocs = MTracker->getNumLocs(); + + // For each block, load in the machine value locations and variable value + // live-ins, then step through each instruction in the block. New DBG_VALUEs + // to be inserted will be created along the way. + for (MachineBasicBlock &MBB : MF) { + unsigned bbnum = MBB.getNumber(); + MTracker->reset(); + MTracker->loadFromArray(MInLocs[bbnum], bbnum); + TTracker->loadInlocs(MBB, MInLocs[bbnum], SavedLiveIns[MBB.getNumber()], + NumLocs); + + CurBB = bbnum; + CurInst = 1; + for (auto &MI : MBB) { + process(MI); + TTracker->checkInstForNewValues(CurInst, MI.getIterator()); + ++CurInst; + } + } + + // We have to insert DBG_VALUEs in a consistent order, otherwise they appeaer + // in DWARF in different orders. Use the order that they appear when walking + // through each block / each instruction, stored in AllVarsNumbering. + auto OrderDbgValues = [&](const MachineInstr *A, + const MachineInstr *B) -> bool { + DebugVariable VarA(A->getDebugVariable(), A->getDebugExpression(), + A->getDebugLoc()->getInlinedAt()); + DebugVariable VarB(B->getDebugVariable(), B->getDebugExpression(), + B->getDebugLoc()->getInlinedAt()); + return AllVarsNumbering.find(VarA)->second < + AllVarsNumbering.find(VarB)->second; + }; + + // Go through all the transfers recorded in the TransferTracker -- this is + // both the live-ins to a block, and any movements of values that happen + // in the middle. + for (auto &P : TTracker->Transfers) { + // Sort them according to appearance order. + llvm::sort(P.Insts, OrderDbgValues); + // Insert either before or after the designated point... + if (P.MBB) { + MachineBasicBlock &MBB = *P.MBB; + for (auto *MI : P.Insts) { + MBB.insert(P.Pos, MI); + } + } else { + MachineBasicBlock &MBB = *P.Pos->getParent(); + for (auto *MI : P.Insts) { + MBB.insertAfter(P.Pos, MI); + } + } + } +} + +void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { + // Build some useful data structures. + auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool { + if (const DebugLoc &DL = MI.getDebugLoc()) + return DL.getLine() != 0; + return false; + }; + // Collect a set of all the artificial blocks. + for (auto &MBB : MF) + if (none_of(MBB.instrs(), hasNonArtificialLocation)) + ArtificialBlocks.insert(&MBB); + + // Compute mappings of block <=> RPO order. + ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); + unsigned int RPONumber = 0; + for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) { + OrderToBB[RPONumber] = *RI; + BBToOrder[*RI] = RPONumber; + BBNumToRPO[(*RI)->getNumber()] = RPONumber; + ++RPONumber; + } +} + +/// Calculate the liveness information for the given machine function and +/// extend ranges across basic blocks. +bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, + TargetPassConfig *TPC) { + // No subprogram means this function contains no debuginfo. + if (!MF.getFunction().getSubprogram()) + return false; + + LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n"); + this->TPC = TPC; + + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TFI = MF.getSubtarget().getFrameLowering(); + TFI->getCalleeSaves(MF, CalleeSavedRegs); + LS.initialize(MF); + + MTracker = + new MLocTracker(MF, *TII, *TRI, *MF.getSubtarget().getTargetLowering()); + VTracker = nullptr; + TTracker = nullptr; + + SmallVector<MLocTransferMap, 32> MLocTransfer; + SmallVector<VLocTracker, 8> vlocs; + LiveInsT SavedLiveIns; + + int MaxNumBlocks = -1; + for (auto &MBB : MF) + MaxNumBlocks = std::max(MBB.getNumber(), MaxNumBlocks); + assert(MaxNumBlocks >= 0); + ++MaxNumBlocks; + + MLocTransfer.resize(MaxNumBlocks); + vlocs.resize(MaxNumBlocks); + SavedLiveIns.resize(MaxNumBlocks); + + initialSetup(MF); + + produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks); + + // Allocate and initialize two array-of-arrays for the live-in and live-out + // machine values. The outer dimension is the block number; while the inner + // dimension is a LocIdx from MLocTracker. + ValueIDNum **MOutLocs = new ValueIDNum *[MaxNumBlocks]; + ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks]; + unsigned NumLocs = MTracker->getNumLocs(); + for (int i = 0; i < MaxNumBlocks; ++i) { + MOutLocs[i] = new ValueIDNum[NumLocs]; + MInLocs[i] = new ValueIDNum[NumLocs]; + } + + // Solve the machine value dataflow problem using the MLocTransfer function, + // storing the computed live-ins / live-outs into the array-of-arrays. We use + // both live-ins and live-outs for decision making in the variable value + // dataflow problem. + mlocDataflow(MInLocs, MOutLocs, MLocTransfer); + + // Walk back through each block / instruction, collecting DBG_VALUE + // instructions and recording what machine value their operands refer to. + for (auto &OrderPair : OrderToBB) { + MachineBasicBlock &MBB = *OrderPair.second; + CurBB = MBB.getNumber(); + VTracker = &vlocs[CurBB]; + VTracker->MBB = &MBB; + MTracker->loadFromArray(MInLocs[CurBB], CurBB); + CurInst = 1; + for (auto &MI : MBB) { + process(MI); + ++CurInst; + } + MTracker->reset(); + } + + // Number all variables in the order that they appear, to be used as a stable + // insertion order later. + DenseMap<DebugVariable, unsigned> AllVarsNumbering; + + // Map from one LexicalScope to all the variables in that scope. + DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>> ScopeToVars; + + // Map from One lexical scope to all blocks in that scope. + DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>> + ScopeToBlocks; + + // Store a DILocation that describes a scope. + DenseMap<const LexicalScope *, const DILocation *> ScopeToDILocation; + + // To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise + // the order is unimportant, it just has to be stable. + for (unsigned int I = 0; I < OrderToBB.size(); ++I) { + auto *MBB = OrderToBB[I]; + auto *VTracker = &vlocs[MBB->getNumber()]; + // Collect each variable with a DBG_VALUE in this block. + for (auto &idx : VTracker->Vars) { + const auto &Var = idx.first; + const DILocation *ScopeLoc = VTracker->Scopes[Var]; + assert(ScopeLoc != nullptr); + auto *Scope = LS.findLexicalScope(ScopeLoc); + + // No insts in scope -> shouldn't have been recorded. + assert(Scope != nullptr); + + AllVarsNumbering.insert(std::make_pair(Var, AllVarsNumbering.size())); + ScopeToVars[Scope].insert(Var); + ScopeToBlocks[Scope].insert(VTracker->MBB); + ScopeToDILocation[Scope] = ScopeLoc; + } + } + + // OK. Iterate over scopes: there might be something to be said for + // ordering them by size/locality, but that's for the future. For each scope, + // solve the variable value problem, producing a map of variables to values + // in SavedLiveIns. + for (auto &P : ScopeToVars) { + vlocDataflow(P.first, ScopeToDILocation[P.first], P.second, + ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs, + vlocs); + } + + // Using the computed value locations and variable values for each block, + // create the DBG_VALUE instructions representing the extended variable + // locations. + emitLocations(MF, SavedLiveIns, MInLocs, AllVarsNumbering); + + for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) { + delete[] MOutLocs[Idx]; + delete[] MInLocs[Idx]; + } + delete[] MOutLocs; + delete[] MInLocs; + + // Did we actually make any changes? If we created any DBG_VALUEs, then yes. + bool Changed = TTracker->Transfers.size() != 0; + + delete MTracker; + delete TTracker; + MTracker = nullptr; + VTracker = nullptr; + TTracker = nullptr; + + ArtificialBlocks.clear(); + OrderToBB.clear(); + BBToOrder.clear(); + BBNumToRPO.clear(); + DebugInstrNumToInstr.clear(); + + return Changed; +} + +LDVImpl *llvm::makeInstrRefBasedLiveDebugValues() { + return new InstrRefBasedLDV(); +} diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp new file mode 100644 index 000000000000..770c46ec8436 --- /dev/null +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -0,0 +1,97 @@ +//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LiveDebugValues.h" + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" + +/// \file LiveDebugValues.cpp +/// +/// The LiveDebugValues pass extends the range of variable locations +/// (specified by DBG_VALUE instructions) from single blocks to successors +/// and any other code locations where the variable location is valid. +/// There are currently two implementations: the "VarLoc" implementation +/// explicitly tracks the location of a variable, while the "InstrRef" +/// implementation tracks the values defined by instructions through locations. +/// +/// This file implements neither; it merely registers the pass, allows the +/// user to pick which implementation will be used to propagate variable +/// locations. + +#define DEBUG_TYPE "livedebugvalues" + +using namespace llvm; + +/// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or +/// InstrRefBasedLDV to perform location propagation, via the LDVImpl +/// base class. +class LiveDebugValues : public MachineFunctionPass { +public: + static char ID; + + LiveDebugValues(); + ~LiveDebugValues() { + if (TheImpl) + delete TheImpl; + } + + /// Calculate the liveness information for the given machine function. + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + LDVImpl *TheImpl; + TargetPassConfig *TPC; +}; + +char LiveDebugValues::ID = 0; + +char &llvm::LiveDebugValuesID = LiveDebugValues::ID; + +INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false, + false) + +/// Default construct and initialize the pass. +LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) { + initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry()); + TheImpl = nullptr; +} + +bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { + if (!TheImpl) { + TPC = getAnalysisIfAvailable<TargetPassConfig>(); + + bool InstrRefBased = false; + if (TPC) { + auto &TM = TPC->getTM<TargetMachine>(); + InstrRefBased = TM.Options.ValueTrackingVariableLocations; + } + + if (InstrRefBased) + TheImpl = llvm::makeInstrRefBasedLiveDebugValues(); + else + TheImpl = llvm::makeVarLocBasedLiveDebugValues(); + } + + return TheImpl->ExtendRanges(MF, TPC); +} diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h new file mode 100644 index 000000000000..6b05bc68d74d --- /dev/null +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h @@ -0,0 +1,32 @@ +//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetPassConfig.h" + +namespace llvm { + +// Inline namespace for types / symbols shared between different +// LiveDebugValues implementations. +inline namespace SharedLiveDebugValues { + +// Expose a base class for LiveDebugValues interfaces to inherit from. This +// allows the generic LiveDebugValues pass handles to call into the +// implementation. +class LDVImpl { +public: + virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) = 0; + virtual ~LDVImpl() {} +}; + +} // namespace SharedLiveDebugValues + +// Factory functions for LiveDebugValues implementations. +extern LDVImpl *makeVarLocBasedLiveDebugValues(); +extern LDVImpl *makeInstrRefBasedLiveDebugValues(); +} // namespace llvm diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 07a275b546f6..e2daa46fe6b9 100644 --- a/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -1,4 +1,4 @@ -//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===// +//===- VarLocBasedImpl.cpp - Tracking Debug Value MIs with VarLoc class----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// /// -/// \file LiveDebugValues.cpp +/// \file VarLocBasedImpl.cpp /// /// LiveDebugValues is an optimistic "available expressions" dataflow /// algorithm. The set of expressions is the set of machine locations @@ -17,7 +17,12 @@ /// DebugVariable, and continues until that location is clobbered or /// re-specified by a different DBG_VALUE for the same DebugVariable. /// -/// The cannonical "available expressions" problem doesn't have expression +/// The output of LiveDebugValues is additional DBG_VALUE instructions, +/// placed to extend variable locations as far they're available. This file +/// and the VarLocBasedLDV class is an implementation that explicitly tracks +/// locations, using the VarLoc class. +/// +/// The canonical "available expressions" problem doesn't have expression /// clobbering, instead when a variable is re-assigned, any expressions using /// that variable get invalidated. LiveDebugValues can map onto "available /// expressions" by having every register represented by a variable, which is @@ -101,6 +106,8 @@ /// //===----------------------------------------------------------------------===// +#include "LiveDebugValues.h" + #include "llvm/ADT/CoalescingBitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" @@ -138,6 +145,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> @@ -264,11 +272,12 @@ struct LocIndex { } }; -class LiveDebugValues : public MachineFunctionPass { +class VarLocBasedLDV : public LDVImpl { private: const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; const TargetFrameLowering *TFI; + TargetPassConfig *TPC; BitVector CalleeSavedRegs; LexicalScopes LS; VarLocSet::Allocator Alloc; @@ -284,7 +293,7 @@ private: // register and an offset. struct SpillLoc { unsigned SpillBase; - int SpillOffset; + StackOffset SpillOffset; bool operator==(const SpillLoc &Other) const { return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset; } @@ -315,21 +324,20 @@ private: /// The value location. Stored separately to avoid repeatedly /// extracting it from MI. - union { + union LocUnion { uint64_t RegNo; SpillLoc SpillLocation; uint64_t Hash; int64_t Immediate; const ConstantFP *FPImm; const ConstantInt *CImm; + LocUnion() : Hash(0) {} } Loc; VarLoc(const MachineInstr &MI, LexicalScopes &LS) : Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()), Expr(MI.getDebugExpression()), MI(MI) { - static_assert((sizeof(Loc) == sizeof(uint64_t)), - "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); if (int RegNo = isDbgValueDescribedByReg(MI)) { @@ -405,7 +413,7 @@ private: /// Take the variable described by DBG_VALUE MI, and create a VarLoc /// locating it in the specified spill location. static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase, - int SpillOffset, LexicalScopes &LS) { + StackOffset SpillOffset, LexicalScopes &LS) { VarLoc VL(MI, LS); assert(VL.Kind == RegisterKind); VL.Kind = SpillLocKind; @@ -442,7 +450,8 @@ private: // Use the original DBG_VALUEs expression to build the spilt location // on top of. FIXME: spill locations created before this pass runs // are not recognized, and not handled here. - auto *SpillExpr = DIExpression::prepend( + auto *TRI = MF.getSubtarget().getRegisterInfo(); + auto *SpillExpr = TRI->prependOffsetExpression( DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset); unsigned Base = Loc.SpillLocation.SpillBase; return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr); @@ -457,7 +466,7 @@ private: llvm_unreachable( "Tried to produce DBG_VALUE for invalid or backup VarLoc"); } - llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum"); + llvm_unreachable("Unrecognized VarLocBasedLDV.VarLoc.Kind enum"); } /// Is the Loc field a constant or constant object? @@ -511,7 +520,9 @@ private: break; case SpillLocKind: Out << printReg(Loc.SpillLocation.SpillBase, TRI); - Out << "[" << Loc.SpillLocation.SpillOffset << "]"; + Out << "[" << Loc.SpillLocation.SpillOffset.getFixed() << " + " + << Loc.SpillLocation.SpillOffset.getScalable() << "x vscale" + << "]"; break; case ImmediateKind: Out << Loc.Immediate; @@ -534,14 +545,46 @@ private: #endif bool operator==(const VarLoc &Other) const { - return Kind == Other.Kind && Var == Other.Var && - Loc.Hash == Other.Loc.Hash && Expr == Other.Expr; + if (Kind != Other.Kind || !(Var == Other.Var) || Expr != Other.Expr) + return false; + + switch (Kind) { + case SpillLocKind: + return Loc.SpillLocation == Other.Loc.SpillLocation; + case RegisterKind: + case ImmediateKind: + case EntryValueKind: + case EntryValueBackupKind: + case EntryValueCopyBackupKind: + return Loc.Hash == Other.Loc.Hash; + default: + llvm_unreachable("Invalid kind"); + } } /// This operator guarantees that VarLocs are sorted by Variable first. bool operator<(const VarLoc &Other) const { - return std::tie(Var, Kind, Loc.Hash, Expr) < - std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr); + switch (Kind) { + case SpillLocKind: + return std::make_tuple(Var, Kind, Loc.SpillLocation.SpillBase, + Loc.SpillLocation.SpillOffset.getFixed(), + Loc.SpillLocation.SpillOffset.getScalable(), + Expr) < + std::make_tuple( + Other.Var, Other.Kind, Other.Loc.SpillLocation.SpillBase, + Other.Loc.SpillLocation.SpillOffset.getFixed(), + Other.Loc.SpillLocation.SpillOffset.getScalable(), + Other.Expr); + case RegisterKind: + case ImmediateKind: + case EntryValueKind: + case EntryValueBackupKind: + case EntryValueCopyBackupKind: + return std::tie(Var, Kind, Loc.Hash, Expr) < + std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr); + default: + llvm_unreachable("Invalid kind"); + } } }; @@ -793,30 +836,18 @@ private: /// had their instruction creation deferred. void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs); - bool ExtendRanges(MachineFunction &MF); + bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override; public: - static char ID; - /// Default construct and initialize the pass. - LiveDebugValues(); + VarLocBasedLDV(); - /// Tell the pass manager which passes we depend on and what - /// information we preserve. - void getAnalysisUsage(AnalysisUsage &AU) const override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } + ~VarLocBasedLDV(); /// Print to ostream with a message. void printVarLocInMBB(const MachineFunction &MF, const VarLocInMBB &V, const VarLocMap &VarLocIDs, const char *msg, raw_ostream &Out) const; - - /// Calculate the liveness information for the given machine function. - bool runOnMachineFunction(MachineFunction &MF) override; }; } // end anonymous namespace @@ -825,31 +856,16 @@ public: // Implementation //===----------------------------------------------------------------------===// -char LiveDebugValues::ID = 0; - -char &llvm::LiveDebugValuesID = LiveDebugValues::ID; - -INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", - false, false) +VarLocBasedLDV::VarLocBasedLDV() { } -/// Default construct and initialize the pass. -LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) { - initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry()); -} - -/// Tell the pass manager which passes we depend on and what information we -/// preserve. -void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); -} +VarLocBasedLDV::~VarLocBasedLDV() { } /// Erase a variable from the set of open ranges, and additionally erase any -/// fragments that may overlap it. If the VarLoc is a buckup location, erase +/// fragments that may overlap it. If the VarLoc is a backup location, erase /// the variable from the EntryValuesBackupVars set, indicating we should stop /// tracking its backup entry location. Otherwise, if the VarLoc is primary /// location, erase the variable from the Vars set. -void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) { +void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) { // Erasure helper. auto DoErase = [VL, this](DebugVariable VarToErase) { auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; @@ -875,15 +891,15 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) { auto MapIt = OverlappingFragments.find({Var.getVariable(), ThisFragment}); if (MapIt != OverlappingFragments.end()) { for (auto Fragment : MapIt->second) { - LiveDebugValues::OptFragmentInfo FragmentHolder; + VarLocBasedLDV::OptFragmentInfo FragmentHolder; if (!DebugVariable::isDefaultFragment(Fragment)) - FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment); + FragmentHolder = VarLocBasedLDV::OptFragmentInfo(Fragment); DoErase({Var.getVariable(), FragmentHolder, Var.getInlinedAt()}); } } } -void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet, +void VarLocBasedLDV::OpenRangesSet::erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) { VarLocs.intersectWithComplement(KillSet); for (uint64_t ID : KillSet) { @@ -893,7 +909,7 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet, } } -void LiveDebugValues::OpenRangesSet::insert(LocIndex VarLocID, +void VarLocBasedLDV::OpenRangesSet::insert(LocIndex VarLocID, const VarLoc &VL) { auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; VarLocs.set(VarLocID.getAsRawInteger()); @@ -903,7 +919,7 @@ void LiveDebugValues::OpenRangesSet::insert(LocIndex VarLocID, /// Return the Loc ID of an entry value backup location, if it exists for the /// variable. llvm::Optional<LocIndex> -LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { +VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { auto It = EntryValuesBackupVars.find(Var); if (It != EntryValuesBackupVars.end()) return It->second; @@ -911,7 +927,7 @@ LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { return llvm::None; } -void LiveDebugValues::collectIDsForRegs(VarLocSet &Collected, +void VarLocBasedLDV::collectIDsForRegs(VarLocSet &Collected, const DefinedRegsSet &Regs, const VarLocSet &CollectFrom) const { assert(!Regs.empty() && "Nothing to collect"); @@ -937,7 +953,7 @@ void LiveDebugValues::collectIDsForRegs(VarLocSet &Collected, } } -void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom, +void VarLocBasedLDV::getUsedRegs(const VarLocSet &CollectFrom, SmallVectorImpl<uint32_t> &UsedRegs) const { // All register-based VarLocs are assigned indices greater than or equal to // FirstRegIndex. @@ -967,7 +983,7 @@ void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom, //===----------------------------------------------------------------------===// #ifndef NDEBUG -void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, +void VarLocBasedLDV::printVarLocInMBB(const MachineFunction &MF, const VarLocInMBB &V, const VarLocMap &VarLocIDs, const char *msg, @@ -991,8 +1007,8 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, } #endif -LiveDebugValues::VarLoc::SpillLoc -LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) { +VarLocBasedLDV::VarLoc::SpillLoc +VarLocBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { assert(MI.hasOneMemOperand() && "Spill instruction does not have exactly one memory operand?"); auto MMOI = MI.memoperands_begin(); @@ -1002,14 +1018,14 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) { int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex(); const MachineBasicBlock *MBB = MI.getParent(); Register Reg; - int Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg); + StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg); return {Reg, Offset}; } /// Try to salvage the debug entry value if we encounter a new debug value /// describing the same parameter, otherwise stop tracking the value. Return /// true if we should stop tracking the entry value, otherwise return false. -bool LiveDebugValues::removeEntryValue(const MachineInstr &MI, +bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, const VarLoc &EntryVL) { @@ -1061,7 +1077,7 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI, /// End all previous ranges related to @MI and start a new range from @MI /// if it is a DBG_VALUE instr. -void LiveDebugValues::transferDebugValue(const MachineInstr &MI, +void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs) { if (!MI.isDebugValue()) @@ -1112,7 +1128,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, } /// Turn the entry value backup locations into primary locations. -void LiveDebugValues::emitEntryValues(MachineInstr &MI, +void VarLocBasedLDV::emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, @@ -1150,7 +1166,7 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, /// new VarLoc. If \p NewReg is different than default zero value then the /// new location will be register location created by the copy like instruction, /// otherwise it is variable's location on the stack. -void LiveDebugValues::insertTransferDebugPair( +void VarLocBasedLDV::insertTransferDebugPair( MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers, VarLocMap &VarLocIDs, LocIndex OldVarID, TransferKind Kind, Register NewReg) { @@ -1217,7 +1233,7 @@ void LiveDebugValues::insertTransferDebugPair( } /// A definition of a register may mark the end of a range. -void LiveDebugValues::transferRegisterDef( +void VarLocBasedLDV::transferRegisterDef( MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers) { @@ -1278,14 +1294,14 @@ void LiveDebugValues::transferRegisterDef( collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs()); OpenRanges.erase(KillSet, VarLocIDs); - if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { + if (TPC) { auto &TM = TPC->getTM<TargetMachine>(); if (TM.Options.ShouldEmitDebugEntryValues()) emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet); } } -bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, +bool VarLocBasedLDV::isSpillInstruction(const MachineInstr &MI, MachineFunction *MF) { // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) @@ -1298,7 +1314,7 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, return true; } -bool LiveDebugValues::isLocationSpill(const MachineInstr &MI, +bool VarLocBasedLDV::isLocationSpill(const MachineInstr &MI, MachineFunction *MF, Register &Reg) { if (!isSpillInstruction(MI, MF)) return false; @@ -1338,8 +1354,8 @@ bool LiveDebugValues::isLocationSpill(const MachineInstr &MI, return false; } -Optional<LiveDebugValues::VarLoc::SpillLoc> -LiveDebugValues::isRestoreInstruction(const MachineInstr &MI, +Optional<VarLocBasedLDV::VarLoc::SpillLoc> +VarLocBasedLDV::isRestoreInstruction(const MachineInstr &MI, MachineFunction *MF, Register &Reg) { if (!MI.hasOneMemOperand()) return None; @@ -1360,7 +1376,7 @@ LiveDebugValues::isRestoreInstruction(const MachineInstr &MI, /// the DBG_VALUE without inserting it and keep track of it in \p Transfers. /// It will be inserted into the BB when we're done iterating over the /// instructions. -void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, +void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers) { @@ -1449,7 +1465,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, /// If \p MI is a register copy instruction, that copies a previously tracked /// value from one register to another register that is callee saved, we /// create new DBG_VALUE instruction described with copy destination register. -void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, +void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers) { @@ -1519,7 +1535,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, } /// Terminate all open ranges at the end of the current basic block. -bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB, +bool VarLocBasedLDV::transferTerminator(MachineBasicBlock *CurMBB, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs) { @@ -1551,7 +1567,7 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB, /// Variable which are known to exist. /// \param OverlappingFragments The overlap map being constructed, from one /// Var/Fragment pair to a vector of fragments known to overlap. -void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, +void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, OverlapMap &OverlappingFragments) { DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(), @@ -1603,7 +1619,7 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, } /// This routine creates OpenRanges. -void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, +void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers) { transferDebugValue(MI, OpenRanges, VarLocIDs); transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers); @@ -1614,7 +1630,7 @@ void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, /// This routine joins the analysis results of all incoming edges in @MBB by /// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same /// source variable in all the predecessors of @MBB reside in the same location. -bool LiveDebugValues::join( +bool VarLocBasedLDV::join( MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet<const MachineBasicBlock *, 16> &Visited, @@ -1697,7 +1713,7 @@ bool LiveDebugValues::join( return Changed; } -void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, +void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs) { // PendingInLocs records all locations propagated into blocks, which have // not had DBG_VALUE insts created. Go through and create those insts now. @@ -1721,7 +1737,7 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, } } -bool LiveDebugValues::isEntryValueCandidate( +bool VarLocBasedLDV::isEntryValueCandidate( const MachineInstr &MI, const DefinedRegsSet &DefinedRegs) const { assert(MI.isDebugValue() && "This must be DBG_VALUE."); @@ -1770,11 +1786,11 @@ static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs, /// This routine records the entry values of function parameters. The values /// could be used as backup values. If we loose the track of some unmodified /// parameters, the backup values will be used as a primary locations. -void LiveDebugValues::recordEntryValue(const MachineInstr &MI, +void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI, const DefinedRegsSet &DefinedRegs, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs) { - if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { + if (TPC) { auto &TM = TPC->getTM<TargetMachine>(); if (!TM.Options.ShouldEmitDebugEntryValues()) return; @@ -1800,9 +1816,25 @@ void LiveDebugValues::recordEntryValue(const MachineInstr &MI, /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. -bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { +bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) { LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n"); + if (!MF.getFunction().getSubprogram()) + // VarLocBaseLDV will already have removed all DBG_VALUEs. + return false; + + // Skip functions from NoDebug compilation units. + if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() == + DICompileUnit::NoDebug) + return false; + + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TFI = MF.getSubtarget().getFrameLowering(); + TFI->getCalleeSaves(MF, CalleeSavedRegs); + this->TPC = TPC; + LS.initialize(MF); + bool Changed = false; bool OLChanged = false; bool MBBJoined = false; @@ -1840,8 +1872,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { MachineBasicBlock &First_MBB = *(MF.begin()); for (auto &MI : First_MBB) { collectRegDefs(MI, DefinedRegs, TRI); - if (MI.isDebugValue()) - recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs); + if (MI.isDebugValue()) + recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs); } // Initialize per-block structures and scan for fragment overlaps. @@ -1878,7 +1910,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { if (MI.isDebugValue()) ++NumInputDbgValues; if (NumInputDbgValues > InputDbgValueLimit) { - LLVM_DEBUG(dbgs() << "Disabling LiveDebugValues: " << MF.getName() + LLVM_DEBUG(dbgs() << "Disabling VarLocBasedLDV: " << MF.getName() << " has " << RPONumber << " basic blocks and " << NumInputDbgValues << " input DBG_VALUEs, exceeding limits.\n"); @@ -1955,22 +1987,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { return Changed; } -bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { - if (!MF.getFunction().getSubprogram()) - // LiveDebugValues will already have removed all DBG_VALUEs. - return false; - - // Skip functions from NoDebug compilation units. - if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() == - DICompileUnit::NoDebug) - return false; - - TRI = MF.getSubtarget().getRegisterInfo(); - TII = MF.getSubtarget().getInstrInfo(); - TFI = MF.getSubtarget().getFrameLowering(); - TFI->getCalleeSaves(MF, CalleeSavedRegs); - LS.initialize(MF); - - bool Changed = ExtendRanges(MF); - return Changed; +LDVImpl * +llvm::makeVarLocBasedLiveDebugValues() +{ + return new VarLocBasedLDV(); } diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 158e873370b1..2325341070a3 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -54,7 +54,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -96,6 +95,7 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) { enum : unsigned { UndefLocNo = ~0U }; +namespace { /// Describes a debug variable value by location number and expression along /// with some flags about the original usage of the location. class DbgVariableValue { @@ -136,6 +136,7 @@ private: unsigned WasIndirect : 1; const DIExpression *Expression = nullptr; }; +} // namespace /// Map of where a user value is live to that value. using LocMap = IntervalMap<SlotIndex, DbgVariableValue, 4>; @@ -394,6 +395,11 @@ class LDVImpl { LiveIntervals *LIS; const TargetRegisterInfo *TRI; + using StashedInstrRef = + std::tuple<unsigned, unsigned, const DILocalVariable *, + const DIExpression *, DebugLoc>; + std::map<SlotIndex, std::vector<StashedInstrRef>> StashedInstrReferences; + /// Whether emitDebugValues is called. bool EmitDone = false; @@ -430,6 +436,16 @@ class LDVImpl { /// \returns True if the DBG_VALUE instruction should be deleted. bool handleDebugValue(MachineInstr &MI, SlotIndex Idx); + /// Track a DBG_INSTR_REF. This needs to be removed from the MachineFunction + /// during regalloc -- but there's no need to maintain live ranges, as we + /// refer to a value rather than a location. + /// + /// \param MI DBG_INSTR_REF instruction + /// \param Idx Last valid SlotIndex before instruction + /// + /// \returns True if the DBG_VALUE instruction should be deleted. + bool handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx); + /// Add DBG_LABEL instruction to UserLabel. /// /// \param MI DBG_LABEL instruction @@ -458,6 +474,7 @@ public: /// Release all memory. void clear() { MF = nullptr; + StashedInstrReferences.clear(); userValues.clear(); userLabels.clear(); virtRegToEqClass.clear(); @@ -665,6 +682,19 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { return true; } +bool LDVImpl::handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx) { + assert(MI.isDebugRef()); + unsigned InstrNum = MI.getOperand(0).getImm(); + unsigned OperandNum = MI.getOperand(1).getImm(); + auto *Var = MI.getDebugVariable(); + auto *Expr = MI.getDebugExpression(); + auto &DL = MI.getDebugLoc(); + StashedInstrRef Stashed = + std::make_tuple(InstrNum, OperandNum, Var, Expr, DL); + StashedInstrReferences[Idx].push_back(Stashed); + return true; +} + bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { // DBG_LABEL label if (MI.getNumOperands() != 1 || !MI.getOperand(0).isMetadata()) { @@ -712,6 +742,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { // Only handle DBG_VALUE in handleDebugValue(). Skip all other // kinds of debug instructions. if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) || + (MBBI->isDebugRef() && handleDebugInstrRef(*MBBI, Idx)) || (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) { MBBI = MBB->erase(MBBI); Changed = true; @@ -775,12 +806,12 @@ void UserValue::addDefsFromCopies( if (Kills.empty()) return; // Don't track copies from physregs, there are too many uses. - if (!Register::isVirtualRegister(LI->reg)) + if (!Register::isVirtualRegister(LI->reg())) return; // Collect all the (vreg, valno) pairs that are copies of LI. SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues; - for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) { + for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg())) { MachineInstr *MI = MO.getParent(); // Copies of the full value. if (MO.getSubReg() || !MI->isCopy()) @@ -991,10 +1022,10 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { return Changed; } -static void removeDebugValues(MachineFunction &mf) { +static void removeDebugInstrs(MachineFunction &mf) { for (MachineBasicBlock &MBB : mf) { for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) { - if (!MBBI->isDebugValue()) { + if (!MBBI->isDebugInstr()) { ++MBBI; continue; } @@ -1007,7 +1038,7 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; if (!mf.getFunction().getSubprogram()) { - removeDebugValues(mf); + removeDebugInstrs(mf); return false; } if (!pImpl) @@ -1064,7 +1095,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs, LII->start < LocMapI.stop()) { // Overlapping correct location. Allocate NewLocNo now. if (NewLocNo == UndefLocNo) { - MachineOperand MO = MachineOperand::CreateReg(LI->reg, false); + MachineOperand MO = MachineOperand::CreateReg(LI->reg(), false); MO.setSubReg(locations[OldLocNo].getSubReg()); NewLocNo = getLocationNo(MO); DidChange = true; @@ -1434,6 +1465,28 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { LLVM_DEBUG(userLabel->print(dbgs(), TRI)); userLabel->emitDebugLabel(*LIS, *TII); } + + LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n"); + + // Re-insert any DBG_INSTR_REFs back in the position they were. Ordering + // is preserved by vector. + auto Slots = LIS->getSlotIndexes(); + const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); + for (auto &P : StashedInstrReferences) { + const SlotIndex &Idx = P.first; + auto *MBB = Slots->getMBBFromIndex(Idx); + MachineBasicBlock::iterator insertPos = findInsertLocation(MBB, Idx, *LIS); + for (auto &Stashed : P.second) { + auto MIB = BuildMI(*MF, std::get<4>(Stashed), RefII); + MIB.addImm(std::get<0>(Stashed)); + MIB.addImm(std::get<1>(Stashed)); + MIB.addMetadata(std::get<2>(Stashed)); + MIB.addMetadata(std::get<3>(Stashed)); + MachineInstr *New = MIB; + MBB->insert(insertPos, New); + } + } + EmitDone = true; } @@ -1442,10 +1495,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM); } -bool LiveDebugVariables::doInitialization(Module &M) { - return Pass::doInitialization(M); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void LiveDebugVariables::dump() const { if (pImpl) diff --git a/llvm/lib/CodeGen/LiveDebugVariables.h b/llvm/lib/CodeGen/LiveDebugVariables.h index 74e738ec3e56..07dd3a83866f 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/llvm/lib/CodeGen/LiveDebugVariables.h @@ -56,7 +56,6 @@ private: bool runOnMachineFunction(MachineFunction &) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; - bool doInitialization(Module &) override; }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp index 930dc116205a..ce0e58772068 100644 --- a/llvm/lib/CodeGen/LiveInterval.cpp +++ b/llvm/lib/CodeGen/LiveInterval.cpp @@ -951,9 +951,9 @@ void LiveInterval::refineSubRanges( MatchingRange = createSubRangeFrom(Allocator, Matching, SR); // Now that the subrange is split in half, make sure we // only keep in the subranges the VNIs that touch the related half. - stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI, + stripValuesNotDefiningMask(reg(), *MatchingRange, Matching, Indexes, TRI, ComposeSubRegIdx); - stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI, + stripValuesNotDefiningMask(reg(), SR, SR.LaneMask, Indexes, TRI, ComposeSubRegIdx); } Apply(*MatchingRange); @@ -977,11 +977,11 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const { - assert(Register::isVirtualRegister(reg)); - LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg); + assert(Register::isVirtualRegister(reg())); + LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg()); assert((VRegMask & LaneMask).any()); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - for (const MachineOperand &MO : MRI.def_operands(reg)) { + for (const MachineOperand &MO : MRI.def_operands(reg())) { if (!MO.isUndef()) continue; unsigned SubReg = MO.getSubReg(); @@ -1043,12 +1043,12 @@ void LiveInterval::SubRange::print(raw_ostream &OS) const { } void LiveInterval::print(raw_ostream &OS) const { - OS << printReg(reg) << ' '; + OS << printReg(reg()) << ' '; super::print(OS); // Print subranges for (const SubRange &SR : subranges()) OS << SR; - OS << " weight:" << weight; + OS << " weight:" << Weight; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1087,7 +1087,7 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const { // Make sure SubRanges are fine and LaneMasks are disjunct. LaneBitmask Mask; - LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) + LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg()) : LaneBitmask::getAll(); for (const SubRange &SR : subranges()) { // Subrange lanemask should be disjunct to any previous subrange masks. @@ -1361,8 +1361,9 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) { void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], MachineRegisterInfo &MRI) { // Rewrite instructions. - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), - RE = MRI.reg_end(); RI != RE;) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg()), + RE = MRI.reg_end(); + RI != RE;) { MachineOperand &MO = *RI; MachineInstr *MI = RI->getParent(); ++RI; @@ -1382,7 +1383,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], if (!VNI) continue; if (unsigned EqClass = getEqClass(VNI)) - MO.setReg(LIV[EqClass-1]->reg); + MO.setReg(LIV[EqClass - 1]->reg()); } // Distribute subregister liveranges. diff --git a/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/llvm/lib/CodeGen/LiveIntervalCalc.cpp index 30c2d74a71c5..2756086cb8b1 100644 --- a/llvm/lib/CodeGen/LiveIntervalCalc.cpp +++ b/llvm/lib/CodeGen/LiveIntervalCalc.cpp @@ -60,7 +60,7 @@ void LiveIntervalCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { // Visit all def operands. If the same instruction has multiple defs of Reg, // createDeadDef() will deduplicate. const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { if (!MO.isDef() && !MO.readsReg()) continue; @@ -127,7 +127,7 @@ void LiveIntervalCalc::constructMainRangeFromSubranges(LiveInterval &LI) { } } resetLiveOutMap(); - extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI); + extendToUses(MainRange, LI.reg(), LaneBitmask::getAll(), &LI); } void LiveIntervalCalc::createDeadDefs(LiveRange &LR, Register Reg) { @@ -202,4 +202,4 @@ void LiveIntervalCalc::extendToUses(LiveRange &LR, Register Reg, // reading Reg multiple times. That is OK, extend() is idempotent. extend(LR, UseIdx, Reg, Undefs); } -}
\ No newline at end of file +} diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 43fa8f2d7157..7ccb8df4bc05 100644 --- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -85,8 +85,8 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { return; } for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) { - OS << " [" << SI.start() << ' ' << SI.stop() << "):" - << printReg(SI.value()->reg, TRI); + OS << " [" << SI.start() << ' ' << SI.stop() + << "):" << printReg(SI.value()->reg(), TRI); } OS << '\n'; } @@ -95,10 +95,20 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { // Verify the live intervals in this union and add them to the visited set. void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI) - VisitedVRegs.set(SI.value()->reg); + VisitedVRegs.set(SI.value()->reg()); } #endif //!NDEBUG +LiveInterval *LiveIntervalUnion::getOneVReg() const { + if (empty()) + return nullptr; + for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) { + // return the first valid live interval + return SI.value(); + } + return nullptr; +} + // Scan the vector of interfering virtual registers in this union. Assume it's // quite small. bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index e8ee0599e1a2..a32b486240c8 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -159,7 +160,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { // Dump the virtregs. for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); if (hasInterval(Reg)) OS << getInterval(Reg) << '\n'; } @@ -183,7 +184,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const { } #endif -LiveInterval* LiveIntervals::createInterval(unsigned reg) { +LiveInterval *LiveIntervals::createInterval(Register reg) { float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F; return new LiveInterval(reg, Weight); } @@ -193,13 +194,13 @@ bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LICalc && "LICalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); + LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg())); return computeDeadValues(LI, nullptr); } void LiveIntervals::computeVirtRegs() { for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; LiveInterval &LI = createEmptyInterval(Reg); @@ -225,6 +226,15 @@ void LiveIntervals::computeRegMasks() { RegMaskBits.push_back(Mask); } + // Unwinders may clobber additional registers. + // FIXME: This functionality can possibly be merged into + // MachineBasicBlock::getBeginClobberMask(). + if (MBB.isEHPad()) + if (auto *Mask = TRI->getCustomEHPadPreservedMask(*MBB.getParent())) { + RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB)); + RegMaskBits.push_back(Mask); + } + for (const MachineInstr &MI : MBB) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isRegMask()) @@ -277,7 +287,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { bool IsRootReserved = true; for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true); Super.isValid(); ++Super) { - unsigned Reg = *Super; + MCRegister Reg = *Super; if (!MRI->reg_empty(Reg)) LICalc->createDeadDefs(LR, Reg); // A register unit is considered reserved if all its roots and all their @@ -296,7 +306,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) { for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true); Super.isValid(); ++Super) { - unsigned Reg = *Super; + MCRegister Reg = *Super; if (!MRI->reg_empty(Reg)) LICalc->extendToUses(LR, Reg); } @@ -362,7 +372,7 @@ static void createSegmentsForValues(LiveRange &LR, void LiveIntervals::extendSegmentsToUses(LiveRange &Segments, ShrinkToUsesWorkList &WorkList, - unsigned Reg, LaneBitmask LaneMask) { + Register Reg, LaneBitmask LaneMask) { // Keep track of the PHIs that are in use. SmallPtrSet<VNInfo*, 8> UsedPHIs; // Blocks that have already been added to WorkList as live-out. @@ -444,13 +454,13 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments, bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead) { LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n'); - assert(Register::isVirtualRegister(li->reg) && + assert(Register::isVirtualRegister(li->reg()) && "Can only shrink virtual registers"); // Shrink subregister live ranges. bool NeedsCleanup = false; for (LiveInterval::SubRange &S : li->subranges()) { - shrinkToUses(S, li->reg); + shrinkToUses(S, li->reg()); if (S.empty()) NeedsCleanup = true; } @@ -460,8 +470,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Find all the values used, including PHI kills. ShrinkToUsesWorkList WorkList; - // Visit all instructions reading li->reg. - unsigned Reg = li->reg; + // Visit all instructions reading li->reg(). + Register Reg = li->reg(); for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) { if (UseMI.isDebugValue() || !UseMI.readsVirtualRegister(Reg)) continue; @@ -514,7 +524,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // Is the register live before? Otherwise we may have to add a read-undef // flag for subregister defs. - unsigned VReg = LI.reg; + Register VReg = LI.reg(); if (MRI->shouldTrackSubRegLiveness(VReg)) { if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { MachineInstr *MI = getInstructionFromIndex(Def); @@ -534,7 +544,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(Def); assert(MI && "No instruction defining live value"); - MI->addRegisterDead(LI.reg, TRI); + MI->addRegisterDead(LI.reg(), TRI); if (HaveDeadDef) MayHaveSplitComponents = true; HaveDeadDef = true; @@ -548,7 +558,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, return MayHaveSplitComponents; } -void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { +void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) { LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n'); assert(Register::isVirtualRegister(Reg) && "Can only shrink virtual registers"); @@ -697,7 +707,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { LiveRange::const_iterator>, 4> SRs; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; const LiveInterval &LI = getInterval(Reg); @@ -868,14 +878,12 @@ float LiveIntervals::getSpillWeight(bool isDef, bool isUse, float LiveIntervals::getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, const MachineBasicBlock *MBB) { - BlockFrequency Freq = MBFI->getBlockFreq(MBB); - const float Scale = 1.0f / MBFI->getEntryFreq(); - return (isDef + isUse) * (Freq.getFrequency() * Scale); + return (isDef + isUse) * MBFI->getBlockFreqRelativeToEntryBlock(MBB); } LiveRange::Segment -LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) { - LiveInterval& Interval = createEmptyInterval(reg); +LiveIntervals::addSegmentToEndOfBlock(Register Reg, MachineInstr &startInst) { + LiveInterval &Interval = createEmptyInterval(Reg); VNInfo *VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); @@ -1030,7 +1038,8 @@ public: // For physregs, only update the regunits that actually have a // precomputed live range. - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid(); + ++Units) if (LiveRange *LR = getRegUnitLI(*Units)) updateRange(*LR, *Units, LaneBitmask::getNone()); } @@ -1041,7 +1050,7 @@ public: private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. - void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { + void updateRange(LiveRange &LR, Register Reg, LaneBitmask LaneMask) { if (!Updated.insert(&LR).second) return; LLVM_DEBUG({ @@ -1238,7 +1247,7 @@ private: /// Update LR to reflect an instruction has been moved upwards from OldIdx /// to NewIdx (NewIdx < OldIdx). - void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { + void handleMoveUp(LiveRange &LR, Register Reg, LaneBitmask LaneMask) { LiveRange::iterator E = LR.end(); // Segment going into OldIdx. LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex()); @@ -1420,7 +1429,7 @@ private: } // Return the last use of reg between NewIdx and OldIdx. - SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg, + SlotIndex findLastUseBefore(SlotIndex Before, Register Reg, LaneBitmask LaneMask) { if (Register::isVirtualRegister(Reg)) { SlotIndex LastUse = Before; @@ -1533,17 +1542,17 @@ void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart, void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, const MachineBasicBlock::iterator End, - const SlotIndex endIdx, - LiveRange &LR, const unsigned Reg, + const SlotIndex EndIdx, LiveRange &LR, + const Register Reg, LaneBitmask LaneMask) { - LiveInterval::iterator LII = LR.find(endIdx); + LiveInterval::iterator LII = LR.find(EndIdx); SlotIndex lastUseIdx; if (LII == LR.begin()) { // This happens when the function is called for a subregister that only // occurs _after_ the range that is to be repaired. return; } - if (LII != LR.end() && LII->start < endIdx) + if (LII != LR.end() && LII->start < EndIdx) lastUseIdx = LII->end; else --LII; @@ -1637,11 +1646,11 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, while (End != MBB->end() && !Indexes->hasIndex(*End)) ++End; - SlotIndex endIdx; + SlotIndex EndIdx; if (End == MBB->end()) - endIdx = getMBBEndIdx(MBB).getPrevSlot(); + EndIdx = getMBBEndIdx(MBB).getPrevSlot(); else - endIdx = getInstructionIndex(*End); + EndIdx = getInstructionIndex(*End); Indexes->repairIndexesInRange(MBB, Begin, End); @@ -1670,13 +1679,13 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, continue; for (LiveInterval::SubRange &S : LI.subranges()) - repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask); + repairOldRegInRange(Begin, End, EndIdx, S, Reg, S.LaneMask); - repairOldRegInRange(Begin, End, endIdx, LI, Reg); + repairOldRegInRange(Begin, End, EndIdx, LI, Reg); } } -void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) { +void LiveIntervals::removePhysRegDefAt(MCRegister Reg, SlotIndex Pos) { for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) { if (LiveRange *LR = getCachedRegUnit(*Unit)) if (VNInfo *VNI = LR->getVNInfoAt(Pos)) @@ -1709,7 +1718,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, if (NumComp <= 1) return; LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); - unsigned Reg = LI.reg; + Register Reg = LI.reg(); const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); for (unsigned I = 1; I < NumComp; ++I) { Register NewVReg = MRI->createVirtualRegister(RegClass); diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 9de77c19a23a..037cb5426235 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -188,7 +187,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, MachineInstr *DefMI = nullptr, *UseMI = nullptr; // Check that there is a single def and a single use. - for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) { + for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg())) { MachineInstr *MI = MO.getParent(); if (MO.isDef()) { if (DefMI && DefMI != MI) @@ -224,7 +223,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, << " into single use: " << *UseMI); SmallVector<unsigned, 8> Ops; - if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) + if (UseMI->readsWritesVirtualRegister(LI->reg(), &Ops).second) return false; MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS); @@ -236,7 +235,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (UseMI->shouldUpdateCallSiteInfo()) UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI); UseMI->eraseFromParent(); - DefMI->addRegisterDead(LI->reg, nullptr); + DefMI->addRegisterDead(LI->reg(), nullptr); Dead.push_back(DefMI); ++NumDCEFoldedLoads; return true; @@ -316,7 +315,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; else if (MOI->isDef()) - LIS.removePhysRegDefAt(Reg, Idx); + LIS.removePhysRegDefAt(Reg.asMCReg(), Idx); continue; } LiveInterval &LI = LIS.getInterval(Reg); @@ -332,7 +331,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // Remove defined value. if (MOI->isDef()) { if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr) - TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + TheDelegate->LRE_WillShrinkVirtReg(LI.reg()); LIS.removeVRegDefAt(LI, Idx); if (LI.empty()) RegsToErase.push_back(Reg); @@ -369,7 +368,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, pop_back(); DeadRemats->insert(MI); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - MI->substituteRegister(Dest, NewLI.reg, 0, TRI); + MI->substituteRegister(Dest, NewLI.reg(), 0, TRI); MI->getOperand(0).setIsDead(true); } else { if (TheDelegate) @@ -409,7 +408,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, ToShrink.pop_back(); if (foldAsLoad(LI, Dead)) continue; - unsigned VReg = LI->reg; + unsigned VReg = LI->reg(); if (TheDelegate) TheDelegate->LRE_WillShrinkVirtReg(VReg); if (!LIS.shrinkToUses(LI, &Dead)) @@ -436,15 +435,15 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, if (!SplitLIs.empty()) ++NumFracRanges; - unsigned Original = VRM ? VRM->getOriginal(VReg) : 0; + Register Original = VRM ? VRM->getOriginal(VReg) : Register(); for (const LiveInterval *SplitLI : SplitLIs) { // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. if (Original != VReg && Original != 0) - VRM->setIsSplitFromReg(SplitLI->reg, Original); + VRM->setIsSplitFromReg(SplitLI->reg(), Original); if (TheDelegate) - TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg); + TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg(), VReg); } } } @@ -463,14 +462,14 @@ void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, const MachineLoopInfo &Loops, const MachineBlockFrequencyInfo &MBFI) { - VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI); + VirtRegAuxInfo VRAI(MF, LIS, *VRM, Loops, MBFI); for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); - if (MRI.recomputeRegClass(LI.reg)) + if (MRI.recomputeRegClass(LI.reg())) LLVM_DEBUG({ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - dbgs() << "Inflated " << printReg(LI.reg) << " to " - << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n'; + dbgs() << "Inflated " << printReg(LI.reg()) << " to " + << TRI->getRegClassName(MRI.getRegClass(LI.reg())) << '\n'; }); VRAI.calculateSpillWeightAndHint(LI); } diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp index 08f046420fa1..a69aa6557e46 100644 --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -78,7 +78,7 @@ void LiveRegMatrix::releaseMemory() { template <typename Callable> static bool foreachUnit(const TargetRegisterInfo *TRI, - LiveInterval &VRegInterval, unsigned PhysReg, + LiveInterval &VRegInterval, MCRegister PhysReg, Callable Func) { if (VRegInterval.hasSubRanges()) { for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { @@ -101,11 +101,11 @@ static bool foreachUnit(const TargetRegisterInfo *TRI, return false; } -void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { - LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) << " to " +void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) { + LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to " << printReg(PhysReg, TRI) << ':'); - assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); - VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment"); + VRM->assignVirt2Phys(VirtReg.reg(), PhysReg); foreachUnit( TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { @@ -119,10 +119,10 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { } void LiveRegMatrix::unassign(LiveInterval &VirtReg) { - Register PhysReg = VRM->getPhys(VirtReg.reg); - LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from " - << printReg(PhysReg, TRI) << ':'); - VRM->clearVirt(VirtReg.reg); + Register PhysReg = VRM->getPhys(VirtReg.reg()); + LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg(), TRI) + << " from " << printReg(PhysReg, TRI) << ':'); + VRM->clearVirt(VirtReg.reg()); foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { @@ -135,7 +135,7 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) { LLVM_DEBUG(dbgs() << '\n'); } -bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const { +bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const { for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { if (!Matrix[*Unit].empty()) return true; @@ -144,12 +144,12 @@ bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const { } bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, - unsigned PhysReg) { + MCRegister PhysReg) { // Check if the cached information is valid. // The same BitVector can be reused for all PhysRegs. // We could cache multiple VirtRegs if it becomes necessary. - if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) { - RegMaskVirtReg = VirtReg.reg; + if (RegMaskVirtReg != VirtReg.reg() || RegMaskTag != UserTag) { + RegMaskVirtReg = VirtReg.reg(); RegMaskTag = UserTag; RegMaskUsable.clear(); LIS->checkRegMaskInterference(VirtReg, RegMaskUsable); @@ -162,10 +162,10 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, } bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, - unsigned PhysReg) { + MCRegister PhysReg) { if (VirtReg.empty()) return false; - CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); + CoalescerPair CP(VirtReg.reg(), PhysReg, *TRI); bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { @@ -176,14 +176,14 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, } LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR, - unsigned RegUnit) { + MCRegister RegUnit) { LiveIntervalUnion::Query &Q = Queries[RegUnit]; Q.init(UserTag, LR, Matrix[RegUnit]); return Q; } LiveRegMatrix::InterferenceKind -LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { +LiveRegMatrix::checkInterference(LiveInterval &VirtReg, MCRegister PhysReg) { if (VirtReg.empty()) return IK_Free; @@ -197,9 +197,9 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { // Check the matrix for virtual register interference. bool Interference = foreachUnit(TRI, VirtReg, PhysReg, - [&](unsigned Unit, const LiveRange &LR) { - return query(LR, Unit).checkInterference(); - }); + [&](MCRegister Unit, const LiveRange &LR) { + return query(LR, Unit).checkInterference(); + }); if (Interference) return IK_VirtReg; @@ -207,7 +207,7 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { } bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End, - unsigned PhysReg) { + MCRegister PhysReg) { // Construct artificial live range containing only one segment [Start, End). VNInfo valno(0, Start); LiveRange::Segment Seg(Start, End, &valno); @@ -221,3 +221,13 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End, } return false; } + +Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const { + LiveInterval *VRegInterval = nullptr; + for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { + if ((VRegInterval = Matrix[*Unit].getOneVReg())) + return VRegInterval->reg(); + } + + return MCRegister::NoRegister; +} diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp index b2731aa0e7db..ea2075bc139d 100644 --- a/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -11,15 +11,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveRegUnits.h" - #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/MCRegisterInfo.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index 6610491dd111..49b880c30936 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -82,17 +82,15 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const { #endif /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. -LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { - assert(Register::isVirtualRegister(RegIdx) && - "getVarInfo: not a virtual register!"); - VirtRegInfo.grow(RegIdx); - return VirtRegInfo[RegIdx]; +LiveVariables::VarInfo &LiveVariables::getVarInfo(Register Reg) { + assert(Reg.isVirtual() && "getVarInfo: not a virtual register!"); + VirtRegInfo.grow(Reg); + return VirtRegInfo[Reg]; } -void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, - MachineBasicBlock *DefBlock, - MachineBasicBlock *MBB, - std::vector<MachineBasicBlock*> &WorkList) { +void LiveVariables::MarkVirtRegAliveInBlock( + VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *MBB, + SmallVectorImpl<MachineBasicBlock *> &WorkList) { unsigned BBNum = MBB->getNumber(); // Check to see if this basic block is one of the killing blocks. If so, @@ -118,7 +116,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *MBB) { - std::vector<MachineBasicBlock*> WorkList; + SmallVector<MachineBasicBlock *, 16> WorkList; MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList); while (!WorkList.empty()) { @@ -128,13 +126,13 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, } } -void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, +void LiveVariables::HandleVirtRegUse(Register Reg, MachineBasicBlock *MBB, MachineInstr &MI) { - assert(MRI->getVRegDef(reg) && "Register use before def!"); + assert(MRI->getVRegDef(Reg) && "Register use before def!"); unsigned BBNum = MBB->getNumber(); - VarInfo& VRInfo = getVarInfo(reg); + VarInfo &VRInfo = getVarInfo(Reg); // Check to see if this basic block is already a kill block. if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { @@ -165,7 +163,8 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, // where there is a use in a PHI node that's a predecessor to the defining // block. We don't want to mark all predecessors as having the value "alive" // in this case. - if (MBB == MRI->getVRegDef(reg)->getParent()) return; + if (MBB == MRI->getVRegDef(Reg)->getParent()) + return; // Add a new kill entry for this basic block. If this virtual register is // already marked as alive in this basic block, that means it is alive in at @@ -176,10 +175,10 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, // Update all dominating blocks to mark them as "known live". for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), E = MBB->pred_end(); PI != E; ++PI) - MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI); + MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(Reg)->getParent(), *PI); } -void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr &MI) { +void LiveVariables::HandleVirtRegDef(Register Reg, MachineInstr &MI) { VarInfo &VRInfo = getVarInfo(Reg); if (VRInfo.AliveBlocks.empty()) @@ -189,8 +188,9 @@ void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr &MI) { /// FindLastPartialDef - Return the last partial def of the specified register. /// Also returns the sub-registers that're defined by the instruction. -MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, - SmallSet<unsigned,4> &PartDefRegs) { +MachineInstr * +LiveVariables::FindLastPartialDef(Register Reg, + SmallSet<unsigned, 4> &PartDefRegs) { unsigned LastDefReg = 0; unsigned LastDefDist = 0; MachineInstr *LastDef = nullptr; @@ -228,7 +228,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, /// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add /// implicit defs to a machine instruction if there was an earlier def of its /// super-register. -void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) { +void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) { MachineInstr *LastDef = PhysRegDef[Reg]; // If there was a previous use or a "full" def all is well. if (!LastDef && !PhysRegUse[Reg]) { @@ -278,7 +278,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) { /// FindLastRefOrPartRef - Return the last reference or partial reference of /// the specified register. -MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { +MachineInstr *LiveVariables::FindLastRefOrPartRef(Register Reg) { MachineInstr *LastDef = PhysRegDef[Reg]; MachineInstr *LastUse = PhysRegUse[Reg]; if (!LastDef && !LastUse) @@ -308,7 +308,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { return LastRefOrPartRef; } -bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { +bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) { MachineInstr *LastDef = PhysRegDef[Reg]; MachineInstr *LastUse = PhysRegUse[Reg]; if (!LastDef && !LastUse) @@ -440,7 +440,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) { } } -void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, +void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI, SmallVectorImpl<unsigned> &Defs) { // What parts of the register are previously defined? SmallSet<unsigned, 32> Live; @@ -486,7 +486,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs) { while (!Defs.empty()) { - unsigned Reg = Defs.back(); + Register Reg = Defs.back(); Defs.pop_back(); for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) { @@ -653,7 +653,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Convert and transfer the dead / killed information we have gathered into // VirtRegInfo onto MI's. for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { - const unsigned Reg = Register::index2VirtReg(i); + const Register Reg = Register::index2VirtReg(i); for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); @@ -666,7 +666,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // other part of the code generator if this happens. #ifndef NDEBUG for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) - assert(Visited.count(&*i) != 0 && "unreachable basic block found"); + assert(Visited.contains(&*i) && "unreachable basic block found"); #endif PhysRegDef.clear(); @@ -678,7 +678,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { /// replaceKillInstruction - Update register kill info by replacing a kill /// instruction with a new one. -void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr &OldMI, +void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI) { VarInfo &VI = getVarInfo(Reg); std::replace(VI.Kills.begin(), VI.Kills.end(), &OldMI, &NewMI); @@ -718,8 +718,7 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { } bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, - unsigned Reg, - MachineRegisterInfo &MRI) { + Register Reg, MachineRegisterInfo &MRI) { unsigned Num = MBB.getNumber(); // Reg is live-through. @@ -735,7 +734,7 @@ bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, return findKill(&MBB); } -bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) { +bool LiveVariables::isLiveOut(Register Reg, const MachineBasicBlock &MBB) { LiveVariables::VarInfo &VI = getVarInfo(Reg); SmallPtrSet<const MachineBasicBlock *, 8> Kills; @@ -793,7 +792,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, // Update info for all live variables for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); // If the Defs is defined in the successor it can't be live in BB. if (Defs.count(Reg)) @@ -819,7 +818,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, SparseBitVector<> &BV = LiveInSets[SuccBB->getNumber()]; for (auto R = BV.begin(), E = BV.end(); R != E; R++) { - unsigned VirtReg = Register::index2VirtReg(*R); + Register VirtReg = Register::index2VirtReg(*R); LiveVariables::VarInfo &VI = getVarInfo(VirtReg); VI.AliveBlocks.set(NumNew); } diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 6c5ef0255a08..ec6e693e8a46 100644 --- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -117,7 +117,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { // If the target doesn't want/need this pass, or if there are no locals // to consider, early exit. - if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0) + if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF)) return true; // Make sure we have enough space to store the local offsets. @@ -220,6 +220,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (StackProtectorFI == (int)i) continue; + if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i))) + continue; switch (MFI.getObjectSSPLayout(i)) { case MachineFrameInfo::SSPLK_None: @@ -254,6 +256,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (ProtectedObjs.count(i)) continue; + if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i))) + continue; AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); } @@ -412,15 +416,16 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); BaseReg = Fn.getRegInfo().createVirtualRegister(RC); - LLVM_DEBUG(dbgs() << " Materializing base register " << BaseReg + LLVM_DEBUG(dbgs() << " Materializing base register" << " at frame local offset " - << LocalOffset + InstrOffset << "\n"); + << LocalOffset + InstrOffset); // Tell the target to insert the instruction to initialize // the base register. // MachineBasicBlock::iterator InsertionPt = Entry->begin(); - TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx, - InstrOffset); + BaseReg = TRI->materializeFrameBaseRegister(Entry, FrameIdx, InstrOffset); + + LLVM_DEBUG(dbgs() << " into " << printReg(BaseReg, TRI) << '\n'); // The base register already includes any offset specified // by the instruction, so account for that so it doesn't get diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp index 33752a1f9230..2bda586db8c7 100644 --- a/llvm/lib/CodeGen/LowLevelType.cpp +++ b/llvm/lib/CodeGen/LowLevelType.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LowLevelType.h" +#include "llvm/ADT/APFloat.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/raw_ostream.h" @@ -58,3 +59,18 @@ LLT llvm::getLLTForMVT(MVT Ty) { return LLT::vector(Ty.getVectorNumElements(), Ty.getVectorElementType().getSizeInBits()); } + +const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) { + assert(Ty.isScalar() && "Expected a scalar type."); + switch (Ty.getSizeInBits()) { + case 16: + return APFloat::IEEEhalf(); + case 32: + return APFloat::IEEEsingle(); + case 64: + return APFloat::IEEEdouble(); + case 128: + return APFloat::IEEEquad(); + } + llvm_unreachable("Invalid FP type size."); +} diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp index 36b863178b47..a06d1d6255c7 100644 --- a/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" @@ -44,6 +44,7 @@ private: GlobalVariable *to) { to->setLinkage(from->getLinkage()); to->setVisibility(from->getVisibility()); + to->setDSOLocal(from->isDSOLocal()); if (from->hasComdat()) { to->setComdat(M.getOrInsertComdat(to->getName())); to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind()); diff --git a/llvm/lib/CodeGen/MBFIWrapper.cpp b/llvm/lib/CodeGen/MBFIWrapper.cpp index 5110f75ebb42..4755defec793 100644 --- a/llvm/lib/CodeGen/MBFIWrapper.cpp +++ b/llvm/lib/CodeGen/MBFIWrapper.cpp @@ -30,6 +30,18 @@ void MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, MergedBBFreq[MBB] = F; } +Optional<uint64_t> +MBFIWrapper::getBlockProfileCount(const MachineBasicBlock *MBB) const { + auto I = MergedBBFreq.find(MBB); + + // Modified block frequency also impacts profile count. So we should compute + // profile count from new block frequency if it has been changed. + if (I != MergedBBFreq.end()) + return MBFI.getProfileCountFromFreq(I->second.getFrequency()); + + return MBFI.getBlockProfileCount(MBB); +} + raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS, const MachineBasicBlock *MBB) const { return MBFI.printBlockFreq(OS, getBlockFreq(MBB)); diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 9eddb8626f60..8ef6aca602a1 100644 --- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -85,9 +85,7 @@ static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { return {}; ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); std::vector<MachineBasicBlock *> RPOList; - for (auto MBB : RPOT) { - RPOList.push_back(MBB); - } + append_range(RPOList, RPOT); return RPOList; } @@ -108,7 +106,7 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions, OS.flush(); // Trim the assignment, or start from the beginning in the case of a store. - const size_t i = S.find("="); + const size_t i = S.find('='); StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); } @@ -198,8 +196,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, if (II->getOperand(i).isReg()) { if (!Register::isVirtualRegister(II->getOperand(i).getReg())) - if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == - PhysRegDefs.end()) { + if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) { continue; } } @@ -276,9 +273,9 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, // Sort the defs for users of multiple defs lexographically. for (const auto &E : MultiUserLookup) { - auto UseI = - std::find_if(MBB->instr_begin(), MBB->instr_end(), - [&](MachineInstr &MI) -> bool { return &MI == E.second; }); + auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool { + return &MI == E.second; + }); if (UseI == MBB->instr_end()) continue; diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 98af46dc4872..b86fd6b41318 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -212,11 +212,12 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("contract", MIToken::kw_contract) .Case("afn", MIToken::kw_afn) .Case("reassoc", MIToken::kw_reassoc) - .Case("nuw" , MIToken::kw_nuw) - .Case("nsw" , MIToken::kw_nsw) - .Case("exact" , MIToken::kw_exact) + .Case("nuw", MIToken::kw_nuw) + .Case("nsw", MIToken::kw_nsw) + .Case("exact", MIToken::kw_exact) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("debug-location", MIToken::kw_debug_location) + .Case("debug-instr-number", MIToken::kw_debug_instr_number) .Case("same_value", MIToken::kw_cfi_same_value) .Case("offset", MIToken::kw_cfi_offset) .Case("rel_offset", MIToken::kw_cfi_rel_offset) @@ -231,7 +232,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("undefined", MIToken::kw_cfi_undefined) .Case("register", MIToken::kw_cfi_register) .Case("window_save", MIToken::kw_cfi_window_save) - .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state) + .Case("negate_ra_sign_state", + MIToken::kw_cfi_aarch64_negate_ra_sign_state) .Case("blockaddress", MIToken::kw_blockaddress) .Case("intrinsic", MIToken::kw_intrinsic) .Case("target-index", MIToken::kw_target_index) @@ -247,6 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("dereferenceable", MIToken::kw_dereferenceable) .Case("invariant", MIToken::kw_invariant) .Case("align", MIToken::kw_align) + .Case("basealign", MIToken::kw_align) .Case("addrspace", MIToken::kw_addrspace) .Case("stack", MIToken::kw_stack) .Case("got", MIToken::kw_got) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index ef16da94d21b..452eda721331 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -74,6 +74,7 @@ struct MIToken { kw_exact, kw_nofpexcept, kw_debug_location, + kw_debug_instr_number, kw_cfi_same_value, kw_cfi_offset, kw_cfi_rel_offset, @@ -103,6 +104,7 @@ struct MIToken { kw_non_temporal, kw_invariant, kw_align, + kw_basealign, kw_addrspace, kw_stack, kw_got, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index ded31cd08fb5..fe979b981886 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -369,10 +369,7 @@ static void initSlots2Values(const Function &F, const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) { if (Slots2Values.empty()) initSlots2Values(MF.getFunction(), Slots2Values); - auto ValueInfo = Slots2Values.find(Slot); - if (ValueInfo == Slots2Values.end()) - return nullptr; - return ValueInfo->second; + return Slots2Values.lookup(Slot); } namespace { @@ -984,6 +981,7 @@ bool MIParser::parse(MachineInstr *&MI) { Token.isNot(MIToken::kw_post_instr_symbol) && Token.isNot(MIToken::kw_heap_alloc_marker) && Token.isNot(MIToken::kw_debug_location) && + Token.isNot(MIToken::kw_debug_instr_number) && Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { auto Loc = Token.location(); Optional<unsigned> TiedDefIdx; @@ -1014,6 +1012,19 @@ bool MIParser::parse(MachineInstr *&MI) { if (parseHeapAllocMarker(HeapAllocMarker)) return true; + unsigned InstrNum = 0; + if (Token.is(MIToken::kw_debug_instr_number)) { + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after 'debug-instr-number'"); + if (getUnsigned(InstrNum)) + return true; + lex(); + // Lex past trailing comma if present. + if (Token.is(MIToken::comma)) + lex(); + } + DebugLoc DebugLocation; if (Token.is(MIToken::kw_debug_location)) { lex(); @@ -1070,6 +1081,8 @@ bool MIParser::parse(MachineInstr *&MI) { MI->setHeapAllocMarker(MF, HeapAllocMarker); if (!MemOperands.empty()) MI->setMemRefs(MF, MemOperands); + if (InstrNum) + MI->setDebugInstrNum(InstrNum); return false; } @@ -2713,7 +2726,7 @@ bool MIParser::parseOffset(int64_t &Offset) { } bool MIParser::parseAlignment(unsigned &Alignment) { - assert(Token.is(MIToken::kw_align)); + assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign)); lex(); if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) return error("expected an integer literal after 'align'"); @@ -3061,6 +3074,12 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { while (consumeIfPresent(MIToken::comma)) { switch (Token.kind()) { case MIToken::kw_align: + // align is printed if it is different than size. + if (parseAlignment(BaseAlignment)) + return true; + break; + case MIToken::kw_basealign: + // basealign is printed if it is different than align. if (parseAlignment(BaseAlignment)) return true; break; @@ -3153,10 +3172,7 @@ static void initSlots2BasicBlocks( static const BasicBlock *getIRBlockFromSlot( unsigned Slot, const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { - auto BlockInfo = Slots2BasicBlocks.find(Slot); - if (BlockInfo == Slots2BasicBlocks.end()) - return nullptr; - return BlockInfo->second; + return Slots2BasicBlocks.lookup(Slot); } const BasicBlock *MIParser::getIRBlock(unsigned Slot) { diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 2e0b0e745e9e..ffa9aeb21edb 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -161,6 +161,9 @@ private: SMRange SourceRange); void computeFunctionProperties(MachineFunction &MF); + + void setupDebugValueTracking(MachineFunction &MF, + PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF); }; } // end namespace llvm @@ -322,9 +325,14 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { static bool isSSA(const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = Register::index2VirtReg(I); + Register Reg = Register::index2VirtReg(I); if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg)) return false; + + // Subregister defs are invalid in SSA. + const MachineOperand *RegDef = MRI.getOneDef(Reg); + if (RegDef && RegDef->getSubReg() != 0) + return false; } return true; } @@ -397,6 +405,23 @@ bool MIRParserImpl::initializeCallSiteInfo( return false; } +void MIRParserImpl::setupDebugValueTracking( + MachineFunction &MF, PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF) { + // Compute the value of the "next instruction number" field. + unsigned MaxInstrNum = 0; + for (auto &MBB : MF) + for (auto &MI : MBB) + MaxInstrNum = std::max((unsigned)MI.peekDebugInstrNum(), MaxInstrNum); + MF.setDebugInstrNumberingCount(MaxInstrNum); + + // Load any substitutions. + for (auto &Sub : YamlMF.DebugValueSubstitutions) { + MF.makeDebugValueSubstitution(std::make_pair(Sub.SrcInst, Sub.SrcOp), + std::make_pair(Sub.DstInst, Sub.DstOp)); + } +} + bool MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MachineFunction &MF) { @@ -446,10 +471,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, } // Check Basic Block Section Flags. if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) { - MF.createBBLabels(); MF.setBBSectionsType(BasicBlockSection::Labels); } else if (MF.hasBBSections()) { - MF.createBBLabels(); MF.assignBeginEndSections(); } PFS.SM = &SM; @@ -507,6 +530,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, if (initializeCallSiteInfo(PFS, YamlMF)) return false; + setupDebugValueTracking(MF, PFS, YamlMF); + MF.getSubtarget().mirFileLoaded(MF); MF.verify(); @@ -634,6 +659,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, // Compute MachineRegisterInfo::UsedPhysRegMask for (const MachineBasicBlock &MBB : MF) { + // Make sure MRI knows about registers clobbered by unwinder. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (MBB.isEHPad()) + if (auto *RegMask = TRI->getCustomEHPadPreservedMask(MF)) + MRI.addPhysRegsUsedFromRegMask(RegMask); + for (const MachineInstr &MI : MBB) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isRegMask()) diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index fa23df6288e9..eae174019b56 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -220,6 +220,10 @@ void MIRPrinter::print(const MachineFunction &MF) { convert(MST, YamlMF.FrameInfo, MF.getFrameInfo()); convertStackObjects(YamlMF, MF, MST); convertCallSiteObjects(YamlMF, MF, MST); + for (auto &Sub : MF.DebugValueSubstitutions) + YamlMF.DebugValueSubstitutions.push_back({Sub.first.first, Sub.first.second, + Sub.second.first, + Sub.second.second}); if (const auto *ConstantPool = MF.getConstantPool()) convert(YamlMF, *ConstantPool); if (const auto *JumpTableInfo = MF.getJumpTableInfo()) @@ -363,9 +367,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, ModuleSlotTracker &MST) { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + // Process fixed stack objects. + assert(YMF.FixedStackObjects.empty()); + SmallVector<int, 32> FixedStackObjectsIdx; + const int BeginIdx = MFI.getObjectIndexBegin(); + if (BeginIdx < 0) + FixedStackObjectsIdx.reserve(-BeginIdx); + unsigned ID = 0; - for (int I = MFI.getObjectIndexBegin(); I < 0; ++I, ++ID) { + for (int I = BeginIdx; I < 0; ++I, ++ID) { + FixedStackObjectsIdx.push_back(-1); // Fill index for possible dead. if (MFI.isDeadObjectIndex(I)) continue; @@ -380,14 +392,22 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I); YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); + // Save the ID' position in FixedStackObjects storage vector. + FixedStackObjectsIdx[ID] = YMF.FixedStackObjects.size(); YMF.FixedStackObjects.push_back(YamlObject); StackObjectOperandMapping.insert( std::make_pair(I, FrameIndexOperand::createFixed(ID))); } // Process ordinary stack objects. + assert(YMF.StackObjects.empty()); + SmallVector<unsigned, 32> StackObjectsIdx; + const int EndIdx = MFI.getObjectIndexEnd(); + if (EndIdx > 0) + StackObjectsIdx.reserve(EndIdx); ID = 0; - for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I, ++ID) { + for (int I = 0; I < EndIdx; ++I, ++ID) { + StackObjectsIdx.push_back(-1); // Fill index for possible dead. if (MFI.isDeadObjectIndex(I)) continue; @@ -395,7 +415,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, YamlObject.ID = ID; if (const auto *Alloca = MFI.getObjectAllocation(I)) YamlObject.Name.Value = std::string( - Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>"); + Alloca->hasName() ? Alloca->getName() : ""); YamlObject.Type = MFI.isSpillSlotObjectIndex(I) ? yaml::MachineStackObject::SpillSlot : MFI.isVariableSizedObjectIndex(I) @@ -406,41 +426,42 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, YamlObject.Alignment = MFI.getObjectAlign(I); YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I); + // Save the ID' position in StackObjects storage vector. + StackObjectsIdx[ID] = YMF.StackObjects.size(); YMF.StackObjects.push_back(YamlObject); StackObjectOperandMapping.insert(std::make_pair( I, FrameIndexOperand::create(YamlObject.Name.Value, ID))); } for (const auto &CSInfo : MFI.getCalleeSavedInfo()) { - if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(CSInfo.getFrameIdx())) + const int FrameIdx = CSInfo.getFrameIdx(); + if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(FrameIdx)) continue; yaml::StringValue Reg; printRegMIR(CSInfo.getReg(), Reg, TRI); if (!CSInfo.isSpilledToReg()) { - auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); - assert(StackObjectInfo != StackObjectOperandMapping.end() && + assert(FrameIdx >= MFI.getObjectIndexBegin() && + FrameIdx < MFI.getObjectIndexEnd() && "Invalid stack object index"); - const FrameIndexOperand &StackObject = StackObjectInfo->second; - if (StackObject.IsFixed) { - YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; - YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored = - CSInfo.isRestored(); + if (FrameIdx < 0) { // Negative index means fixed objects. + auto &Object = + YMF.FixedStackObjects + [FixedStackObjectsIdx[FrameIdx + MFI.getNumFixedObjects()]]; + Object.CalleeSavedRegister = Reg; + Object.CalleeSavedRestored = CSInfo.isRestored(); } else { - YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; - YMF.StackObjects[StackObject.ID].CalleeSavedRestored = - CSInfo.isRestored(); + auto &Object = YMF.StackObjects[StackObjectsIdx[FrameIdx]]; + Object.CalleeSavedRegister = Reg; + Object.CalleeSavedRestored = CSInfo.isRestored(); } } } for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { auto LocalObject = MFI.getLocalFrameObjectMap(I); - auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first); - assert(StackObjectInfo != StackObjectOperandMapping.end() && - "Invalid stack object index"); - const FrameIndexOperand &StackObject = StackObjectInfo->second; - assert(!StackObject.IsFixed && "Expected a locally mapped stack object"); - YMF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second; + assert(LocalObject.first >= 0 && "Expected a locally mapped stack object"); + YMF.StackObjects[StackObjectsIdx[LocalObject.first]].LocalOffset = + LocalObject.second; } // Print the stack object references in the frame information class after @@ -454,15 +475,16 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, // Print the debug variable information. for (const MachineFunction::VariableDbgInfo &DebugVar : MF.getVariableDbgInfo()) { - auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot); - assert(StackObjectInfo != StackObjectOperandMapping.end() && + assert(DebugVar.Slot >= MFI.getObjectIndexBegin() && + DebugVar.Slot < MFI.getObjectIndexEnd() && "Invalid stack object index"); - const FrameIndexOperand &StackObject = StackObjectInfo->second; - if (StackObject.IsFixed) { - auto &Object = YMF.FixedStackObjects[StackObject.ID]; + if (DebugVar.Slot < 0) { // Negative index means fixed objects. + auto &Object = + YMF.FixedStackObjects[FixedStackObjectsIdx[DebugVar.Slot + + MFI.getNumFixedObjects()]]; printStackObjectDbgInfo(DebugVar, Object, MST); } else { - auto &Object = YMF.StackObjects[StackObject.ID]; + auto &Object = YMF.StackObjects[StackObjectsIdx[DebugVar.Slot]]; printStackObjectDbgInfo(DebugVar, Object, MST); } } @@ -608,58 +630,10 @@ bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const { void MIPrinter::print(const MachineBasicBlock &MBB) { assert(MBB.getNumber() >= 0 && "Invalid MBB number"); - OS << "bb." << MBB.getNumber(); - bool HasAttributes = false; - if (const auto *BB = MBB.getBasicBlock()) { - if (BB->hasName()) { - OS << "." << BB->getName(); - } else { - HasAttributes = true; - OS << " ("; - int Slot = MST.getLocalSlot(BB); - if (Slot == -1) - OS << "<ir-block badref>"; - else - OS << (Twine("%ir-block.") + Twine(Slot)).str(); - } - } - if (MBB.hasAddressTaken()) { - OS << (HasAttributes ? ", " : " ("); - OS << "address-taken"; - HasAttributes = true; - } - if (MBB.isEHPad()) { - OS << (HasAttributes ? ", " : " ("); - OS << "landing-pad"; - HasAttributes = true; - } - if (MBB.isEHFuncletEntry()) { - OS << (HasAttributes ? ", " : " ("); - OS << "ehfunclet-entry"; - HasAttributes = true; - } - if (MBB.getAlignment() != Align(1)) { - OS << (HasAttributes ? ", " : " ("); - OS << "align " << MBB.getAlignment().value(); - HasAttributes = true; - } - if (MBB.getSectionID() != MBBSectionID(0)) { - OS << (HasAttributes ? ", " : " ("); - OS << "bbsections "; - switch (MBB.getSectionID().Type) { - case MBBSectionID::SectionType::Exception: - OS << "Exception"; - break; - case MBBSectionID::SectionType::Cold: - OS << "Cold"; - break; - default: - OS << MBB.getSectionID().Number; - } - HasAttributes = true; - } - if (HasAttributes) - OS << ")"; + MBB.printName(OS, + MachineBasicBlock::PrintNameIr | + MachineBasicBlock::PrintNameAttributes, + &MST); OS << ":\n"; bool HasLineAttributes = false; @@ -818,6 +792,13 @@ void MIPrinter::print(const MachineInstr &MI) { NeedComma = true; } + if (auto Num = MI.peekDebugInstrNum()) { + if (NeedComma) + OS << ','; + OS << " debug-instr-number " << Num; + NeedComma = true; + } + if (PrintLocations) { if (const DebugLoc &DL = MI.getDebugLoc()) { if (NeedComma) diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index 54441301d65b..3d4f66f31174 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -8,6 +8,7 @@ #include "MIRVRegNamerUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineStableHash.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" @@ -15,6 +16,11 @@ using namespace llvm; #define DEBUG_TYPE "mir-vregnamer-utils" +static cl::opt<bool> + UseStableNamerHash("mir-vreg-namer-use-stable-hash", cl::init(false), + cl::Hidden, + cl::desc("Use Stable Hashing for MIR VReg Renaming")); + using VRegRenameMap = std::map<unsigned, unsigned>; bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) { @@ -52,6 +58,14 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { std::string S; raw_string_ostream OS(S); + if (UseStableNamerHash) { + auto Hash = stableHashValue(MI, /* HashVRegs */ true, + /* HashConstantPoolIndices */ true, + /* HashMemOperands */ true); + assert(Hash && "Expected non-zero Hash"); + return std::to_string(Hash).substr(0, 5); + } + // Gets a hashable artifact from a given MachineOperand (ie an unsigned). auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned { switch (MO.getType()) { diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 2d4b60435d96..b4187af02975 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -60,38 +60,25 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); - assert(getNumber() >= 0 && "cannot get label for unreachable MBB"); - - // We emit a non-temporary symbol for every basic block if we have BBLabels - // or -- with basic block sections -- when a basic block begins a section. - // With basic block symbols, we use a unary encoding which can - // compress the symbol names significantly. For basic block sections where - // this block is the first in a cluster, we use a non-temp descriptive name. - // Otherwise we fall back to use temp label. - if (MF->hasBBLabels()) { - auto Iter = MF->getBBSectionsSymbolPrefix().begin(); - if (getNumber() < 0 || - getNumber() >= (int)MF->getBBSectionsSymbolPrefix().size()) - report_fatal_error("Unreachable MBB: " + Twine(getNumber())); - // The basic blocks for function foo are named a.BB.foo, aa.BB.foo, and - // so on. - std::string Prefix(Iter + 1, Iter + getNumber() + 1); - std::reverse(Prefix.begin(), Prefix.end()); - CachedMCSymbol = - Ctx.getOrCreateSymbol(Twine(Prefix) + ".BB." + Twine(MF->getName())); - } else if (MF->hasBBSections() && isBeginSection()) { + // We emit a non-temporary symbol -- with a descriptive name -- if it begins + // a section (with basic block sections). Otherwise we fall back to use temp + // label. + if (MF->hasBBSections() && isBeginSection()) { SmallString<5> Suffix; if (SectionID == MBBSectionID::ColdSectionID) { Suffix += ".cold"; } else if (SectionID == MBBSectionID::ExceptionSectionID) { Suffix += ".eh"; } else { - Suffix += "." + std::to_string(SectionID.Number); + // For symbols that represent basic block sections, we add ".__part." to + // allow tools like symbolizers to know that this represents a part of + // the original function. + Suffix = (Suffix + Twine(".__part.") + Twine(SectionID.Number)).str(); } CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix); } else { + const StringRef Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -100,6 +87,17 @@ MCSymbol *MachineBasicBlock::getSymbol() const { return CachedMCSymbol; } +MCSymbol *MachineBasicBlock::getEndSymbol() const { + if (!CachedEndMCSymbol) { + const MachineFunction *MF = getParent(); + MCContext &Ctx = MF->getContext(); + auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); + CachedEndMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB_END" + + Twine(MF->getFunctionNumber()) + + "_" + Twine(getNumber())); + } + return CachedEndMCSymbol; +} raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { MBB.print(OS); @@ -271,6 +269,10 @@ bool MachineBasicBlock::hasEHPadSuccessor() const { return false; } +bool MachineBasicBlock::isEntryBlock() const { + return getParent()->begin() == getIterator(); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MachineBasicBlock::dump() const { print(dbgs()); @@ -338,39 +340,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes && PrintSlotIndexes) OS << Indexes->getMBBStartIdx(this) << '\t'; - OS << "bb." << getNumber(); - bool HasAttributes = false; - if (const auto *BB = getBasicBlock()) { - if (BB->hasName()) { - OS << "." << BB->getName(); - } else { - HasAttributes = true; - OS << " ("; - int Slot = MST.getLocalSlot(BB); - if (Slot == -1) - OS << "<ir-block badref>"; - else - OS << (Twine("%ir-block.") + Twine(Slot)).str(); - } - } - - if (hasAddressTaken()) { - OS << (HasAttributes ? ", " : " ("); - OS << "address-taken"; - HasAttributes = true; - } - if (isEHPad()) { - OS << (HasAttributes ? ", " : " ("); - OS << "landing-pad"; - HasAttributes = true; - } - if (getAlignment() != Align(1)) { - OS << (HasAttributes ? ", " : " ("); - OS << "align " << Log2(getAlignment()); - HasAttributes = true; - } - if (HasAttributes) - OS << ")"; + printName(OS, PrintNameIr | PrintNameAttributes, &MST); OS << ":\n"; const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -383,11 +353,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << '\t'; // Don't indent(2), align with previous line attributes. OS << "; predecessors: "; - for (auto I = pred_begin(), E = pred_end(); I != E; ++I) { - if (I != pred_begin()) - OS << ", "; - OS << printMBBReference(**I); - } + ListSeparator LS; + for (auto *Pred : predecessors()) + OS << LS << printMBBReference(*Pred); OS << '\n'; HasLineAttributes = true; } @@ -396,10 +364,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << '\t'; // Print the successors OS.indent(2) << "successors: "; + ListSeparator LS; for (auto I = succ_begin(), E = succ_end(); I != E; ++I) { - if (I != succ_begin()) - OS << ", "; - OS << printMBBReference(**I); + OS << LS << printMBBReference(**I); if (!Probs.empty()) OS << '(' << format("0x%08" PRIx32, getSuccProbability(I).getNumerator()) @@ -408,11 +375,10 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (!Probs.empty() && IsStandalone) { // Print human readable probabilities as comments. OS << "; "; + ListSeparator LS; for (auto I = succ_begin(), E = succ_end(); I != E; ++I) { const BranchProbability &BP = getSuccProbability(I); - if (I != succ_begin()) - OS << ", "; - OS << printMBBReference(**I) << '(' + OS << LS << printMBBReference(**I) << '(' << format("%.2f%%", rint(((double)BP.getNumerator() / BP.getDenominator()) * 100.0 * 100.0) / @@ -429,12 +395,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << '\t'; OS.indent(2) << "liveins: "; - bool First = true; + ListSeparator LS; for (const auto &LI : liveins()) { - if (!First) - OS << ", "; - First = false; - OS << printReg(LI.PhysReg, TRI); + OS << LS << printReg(LI.PhysReg, TRI); if (!LI.LaneMask.all()) OS << ":0x" << PrintLaneMask(LI.LaneMask); } @@ -478,9 +441,99 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, } } +/// Print the basic block's name as: +/// +/// bb.{number}[.{ir-name}] [(attributes...)] +/// +/// The {ir-name} is only printed when the \ref PrintNameIr flag is passed +/// (which is the default). If the IR block has no name, it is identified +/// numerically using the attribute syntax as "(%ir-block.{ir-slot})". +/// +/// When the \ref PrintNameAttributes flag is passed, additional attributes +/// of the block are printed when set. +/// +/// \param printNameFlags Combination of \ref PrintNameFlag flags indicating +/// the parts to print. +/// \param moduleSlotTracker Optional ModuleSlotTracker. This method will +/// incorporate its own tracker when necessary to +/// determine the block's IR name. +void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags, + ModuleSlotTracker *moduleSlotTracker) const { + os << "bb." << getNumber(); + bool hasAttributes = false; + + if (printNameFlags & PrintNameIr) { + if (const auto *bb = getBasicBlock()) { + if (bb->hasName()) { + os << '.' << bb->getName(); + } else { + hasAttributes = true; + os << " ("; + + int slot = -1; + + if (moduleSlotTracker) { + slot = moduleSlotTracker->getLocalSlot(bb); + } else if (bb->getParent()) { + ModuleSlotTracker tmpTracker(bb->getModule(), false); + tmpTracker.incorporateFunction(*bb->getParent()); + slot = tmpTracker.getLocalSlot(bb); + } + + if (slot == -1) + os << "<ir-block badref>"; + else + os << (Twine("%ir-block.") + Twine(slot)).str(); + } + } + } + + if (printNameFlags & PrintNameAttributes) { + if (hasAddressTaken()) { + os << (hasAttributes ? ", " : " ("); + os << "address-taken"; + hasAttributes = true; + } + if (isEHPad()) { + os << (hasAttributes ? ", " : " ("); + os << "landing-pad"; + hasAttributes = true; + } + if (isEHFuncletEntry()) { + os << (hasAttributes ? ", " : " ("); + os << "ehfunclet-entry"; + hasAttributes = true; + } + if (getAlignment() != Align(1)) { + os << (hasAttributes ? ", " : " ("); + os << "align " << getAlignment().value(); + hasAttributes = true; + } + if (getSectionID() != MBBSectionID(0)) { + os << (hasAttributes ? ", " : " ("); + os << "bbsections "; + switch (getSectionID().Type) { + case MBBSectionID::SectionType::Exception: + os << "Exception"; + break; + case MBBSectionID::SectionType::Cold: + os << "Cold"; + break; + default: + os << getSectionID().Number; + } + hasAttributes = true; + } + } + + if (hasAttributes) + os << ')'; +} + void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const { - OS << "%bb." << getNumber(); + OS << '%'; + printName(OS, 0); } void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { @@ -530,7 +583,7 @@ void MachineBasicBlock::sortUniqueLiveIns() { Register MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) { assert(getParent() && "MBB must be inserted in function"); - assert(PhysReg.isPhysical() && "Expected physreg"); + assert(Register::isPhysicalRegister(PhysReg) && "Expected physreg"); assert(RC && "Register class is required"); assert((isEHPad() || this == &getParent()->front()) && "Only the entry block and landing pads can have physreg live ins"); @@ -696,7 +749,7 @@ void MachineBasicBlock::splitSuccessor(MachineBasicBlock *Old, bool NormalizeSuccProbs) { succ_iterator OldI = llvm::find(successors(), Old); assert(OldI != succ_end() && "Old is not a successor of this block!"); - assert(llvm::find(successors(), New) == succ_end() && + assert(!llvm::is_contained(successors(), New) && "New is already a successor of this block!"); // Add a new successor with equal probability as the original one. Note @@ -775,7 +828,7 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig, succ_iterator I) { - if (Orig->Probs.empty()) + if (!Orig->Probs.empty()) addSuccessor(*I, Orig->getSuccProbability(I)); else addSuccessorWithoutProb(*I); @@ -891,6 +944,47 @@ bool MachineBasicBlock::canFallThrough() { return getFallThrough() != nullptr; } +MachineBasicBlock *MachineBasicBlock::splitAt(MachineInstr &MI, + bool UpdateLiveIns, + LiveIntervals *LIS) { + MachineBasicBlock::iterator SplitPoint(&MI); + ++SplitPoint; + + if (SplitPoint == end()) { + // Don't bother with a new block. + return this; + } + + MachineFunction *MF = getParent(); + + LivePhysRegs LiveRegs; + if (UpdateLiveIns) { + // Make sure we add any physregs we define in the block as liveins to the + // new block. + MachineBasicBlock::iterator Prev(&MI); + LiveRegs.init(*MF->getSubtarget().getRegisterInfo()); + LiveRegs.addLiveOuts(*this); + for (auto I = rbegin(), E = Prev.getReverse(); I != E; ++I) + LiveRegs.stepBackward(*I); + } + + MachineBasicBlock *SplitBB = MF->CreateMachineBasicBlock(getBasicBlock()); + + MF->insert(++MachineFunction::iterator(this), SplitBB); + SplitBB->splice(SplitBB->begin(), this, SplitPoint, end()); + + SplitBB->transferSuccessorsAndUpdatePHIs(this); + addSuccessor(SplitBB); + + if (UpdateLiveIns) + addLiveIns(*SplitBB, LiveRegs); + + if (LIS) + LIS->insertMBBInMaps(SplitBB); + + return SplitBB; +} + MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( MachineBasicBlock *Succ, Pass &P, std::vector<SparseBitVector<>> *LiveInSets) { diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 1168b01a835f..54e0a14e0555 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -241,16 +241,21 @@ MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { return MBFI ? MBFI->getProfileCountFromFreq(F, Freq) : None; } -bool -MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) { +bool MachineBlockFrequencyInfo::isIrrLoopHeader( + const MachineBasicBlock *MBB) const { assert(MBFI && "Expected analysis to be available"); return MBFI->isIrrLoopHeader(MBB); } -void MachineBlockFrequencyInfo::setBlockFreq(const MachineBasicBlock *MBB, - uint64_t Freq) { +void MachineBlockFrequencyInfo::onEdgeSplit( + const MachineBasicBlock &NewPredecessor, + const MachineBasicBlock &NewSuccessor, + const MachineBranchProbabilityInfo &MBPI) { assert(MBFI && "Expected analysis to be available"); - MBFI->setBlockFreq(MBB, Freq); + auto NewSuccFreq = MBFI->getBlockFreq(&NewPredecessor) * + MBPI.getEdgeProbability(&NewPredecessor, &NewSuccessor); + + MBFI->setBlockFreq(&NewSuccessor, NewSuccFreq.getFrequency()); } const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 783d22fafee9..048baa460e49 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -177,6 +177,14 @@ static cl::opt<unsigned> TailDupPlacementPenalty( cl::init(2), cl::Hidden); +// Heuristic for tail duplication if profile count is used in cost model. +static cl::opt<unsigned> TailDupProfilePercentThreshold( + "tail-dup-profile-percent-threshold", + cl::desc("If profile count information is used in tail duplication cost " + "model, the gained fall through number from tail duplication " + "should be at least this percent of hot count."), + cl::init(50), cl::Hidden); + // Heuristic for triangle chains. static cl::opt<unsigned> TriangleChainCount( "triangle-chain-count", @@ -377,6 +385,10 @@ class MachineBlockPlacement : public MachineFunctionPass { /// Partial tail duplication threshold. BlockFrequency DupThreshold; + /// True: use block profile count to compute tail duplication cost. + /// False: use block frequency to compute tail duplication cost. + bool UseProfileCount; + /// Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily while processing the loop structure of @@ -402,6 +414,19 @@ class MachineBlockPlacement : public MachineFunctionPass { SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits; #endif + /// Get block profile count or frequency according to UseProfileCount. + /// The return value is used to model tail duplication cost. + BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) { + if (UseProfileCount) { + auto Count = MBFI->getBlockProfileCount(BB); + if (Count) + return *Count; + else + return 0; + } else + return MBFI->getBlockFreq(BB); + } + /// Scale the DupThreshold according to basic block size. BlockFrequency scaleThreshold(MachineBasicBlock *BB); void initDupThreshold(); @@ -424,10 +449,6 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBasicBlock *BB, const BlockChain &Chain, const BlockFilterSet *BlockFilter, SmallVector<MachineBasicBlock *, 4> &Successors); - bool shouldPredBlockBeOutlined( - const MachineBasicBlock *BB, const MachineBasicBlock *Succ, - const BlockChain &Chain, const BlockFilterSet *BlockFilter, - BranchProbability SuccProb, BranchProbability HotProb); bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred, BlockFilterSet *BlockFilter); void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates, @@ -1652,11 +1673,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // worklist of already placed entries. // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. - WorkList.erase(llvm::remove_if(WorkList, - [&](MachineBasicBlock *BB) { - return BlockToChain.lookup(BB) == &Chain; - }), - WorkList.end()); + llvm::erase_if(WorkList, [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }); if (WorkList.empty()) return nullptr; @@ -2287,6 +2306,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (Bottom == ExitingBB) return; + // The entry block should always be the first BB in a function. + if (Top->isEntryBlock()) + return; + bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet); // If the header has viable fallthrough, check whether the current loop @@ -2361,6 +2384,11 @@ void MachineBlockPlacement::rotateLoopWithProfile( BlockChain &LoopChain, const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { auto RotationPos = LoopChain.end(); + MachineBasicBlock *ChainHeaderBB = *LoopChain.begin(); + + // The entry block should always be the first BB in a function. + if (ChainHeaderBB->isEntryBlock()) + return; BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); @@ -2379,7 +2407,6 @@ void MachineBlockPlacement::rotateLoopWithProfile( // chain head is not the loop header. As we only consider natural loops with // single header, this computation can be done only once. BlockFrequency HeaderFallThroughCost(0); - MachineBasicBlock *ChainHeaderBB = *LoopChain.begin(); for (auto *Pred : ChainHeaderBB->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (!LoopBlockSet.count(Pred) && @@ -2516,10 +2543,14 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) { MBPI->getEdgeProbability(LoopPred, L.getHeader()); for (MachineBasicBlock *LoopBB : L.getBlocks()) { + if (LoopBlockSet.count(LoopBB)) + continue; auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency(); if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio) continue; - LoopBlockSet.insert(LoopBB); + BlockChain *Chain = BlockToChain[LoopBB]; + for (MachineBasicBlock *ChainBB : *Chain) + LoopBlockSet.insert(ChainBB); } } else LoopBlockSet.insert(L.block_begin(), L.block_end()); @@ -3011,12 +3042,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList; if (RemBB->isEHPad()) RemoveList = EHPadWorkList; - RemoveList.erase( - llvm::remove_if(RemoveList, - [RemBB](MachineBasicBlock *BB) { - return BB == RemBB; - }), - RemoveList.end()); + llvm::erase_value(RemoveList, RemBB); } // Handle the filter set @@ -3120,7 +3146,7 @@ bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB, // Compute the number of reduced taken branches if Pred falls through to BB // instead of another successor. Then compare it with threshold. - BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + BlockFrequency PredFreq = getBlockCountOrFrequency(Pred); BlockFrequency Gain = PredFreq * (BBProb - BestProb); return Gain > scaleThreshold(BB); } @@ -3134,8 +3160,8 @@ void MachineBlockPlacement::findDuplicateCandidates( MachineBasicBlock *Fallthrough = nullptr; BranchProbability DefaultBranchProb = BranchProbability::getZero(); BlockFrequency BBDupThreshold(scaleThreshold(BB)); - SmallVector<MachineBasicBlock *, 8> Preds(BB->pred_begin(), BB->pred_end()); - SmallVector<MachineBasicBlock *, 8> Succs(BB->succ_begin(), BB->succ_end()); + SmallVector<MachineBasicBlock *, 8> Preds(BB->predecessors()); + SmallVector<MachineBasicBlock *, 8> Succs(BB->successors()); // Sort for highest frequency. auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) { @@ -3194,7 +3220,7 @@ void MachineBlockPlacement::findDuplicateCandidates( // it. But it can beneficially fall through to BB, and duplicate BB into other // predecessors. for (MachineBasicBlock *Pred : Preds) { - BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + BlockFrequency PredFreq = getBlockCountOrFrequency(Pred); if (!TailDup.canTailDuplicate(BB, Pred)) { // BB can't be duplicated into Pred, but it is possible to be layout @@ -3243,6 +3269,15 @@ void MachineBlockPlacement::initDupThreshold() { if (!F->getFunction().hasProfileData()) return; + // We prefer to use prifile count. + uint64_t HotThreshold = PSI->getOrCompHotCountThreshold(); + if (HotThreshold != UINT64_MAX) { + UseProfileCount = true; + DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100; + return; + } + + // Profile count is not available, we can use block frequency instead. BlockFrequency MaxFreq = 0; for (MachineBasicBlock &MBB : *F) { BlockFrequency Freq = MBFI->getBlockFreq(&MBB); @@ -3250,10 +3285,9 @@ void MachineBlockPlacement::initDupThreshold() { MaxFreq = Freq; } - // FIXME: we may use profile count instead of frequency, - // and need more fine tuning. BranchProbability ThresholdProb(TailDupPlacementPenalty, 100); DupThreshold = MaxFreq * ThresholdProb; + UseProfileCount = false; } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { @@ -3326,8 +3360,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { // No tail merging opportunities if the block number is less than four. if (MF.size() > 3 && EnableTailMerge) { unsigned TailMergeSize = TailDupSize + 1; - BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, - *MBPI, PSI, TailMergeSize); + BranchFolder BF(/*DefaultEnableTailMerge=*/true, /*CommonHoist=*/false, + *MBFI, *MBPI, PSI, TailMergeSize); if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI, /*AfterPlacement=*/true)) { diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index 09531276bc10..199fe2dc6454 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" @@ -115,18 +116,18 @@ namespace { bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB); - bool isPhysDefTriviallyDead(unsigned Reg, + bool isPhysDefTriviallyDead(MCRegister Reg, MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator E) const; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet<unsigned, 8> &PhysRefs, + SmallSet<MCRegister, 8> &PhysRefs, PhysDefVector &PhysDefs, bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet<unsigned, 8> &PhysRefs, + SmallSet<MCRegister, 8> &PhysRefs, PhysDefVector &PhysDefs, bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); - bool isProfitableToCSE(unsigned CSReg, unsigned Reg, + bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB, MachineInstr *MI); void EnterScope(MachineBasicBlock *MBB); void ExitScope(MachineBasicBlock *MBB); @@ -218,10 +219,9 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, return Changed; } -bool -MachineCSE::isPhysDefTriviallyDead(unsigned Reg, - MachineBasicBlock::const_iterator I, - MachineBasicBlock::const_iterator E) const { +bool MachineCSE::isPhysDefTriviallyDead( + MCRegister Reg, MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) const { unsigned LookAheadLeft = LookAheadLimit; while (LookAheadLeft) { // Skip over dbg_value's. @@ -255,7 +255,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, return false; } -static bool isCallerPreservedOrConstPhysReg(unsigned Reg, +static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineFunction &MF, const TargetRegisterInfo &TRI) { // MachineRegisterInfo::isConstantPhysReg directly called by @@ -276,7 +276,7 @@ static bool isCallerPreservedOrConstPhysReg(unsigned Reg, /// instruction does not uses a physical register. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet<unsigned, 8> &PhysRefs, + SmallSet<MCRegister, 8> &PhysRefs, PhysDefVector &PhysDefs, bool &PhysUseDef) const { // First, add all uses to PhysRefs. @@ -289,7 +289,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (Register::isVirtualRegister(Reg)) continue; // Reading either caller preserved or constant physregs is ok. - if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI)) + if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), *MI->getMF(), *TRI)) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); } @@ -308,12 +308,12 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (Register::isVirtualRegister(Reg)) continue; // Check against PhysRefs even if the def is "dead". - if (PhysRefs.count(Reg)) + if (PhysRefs.count(Reg.asMCReg())) PhysUseDef = true; // If the def is dead, it's ok. But the def may not marked "dead". That's // common since this pass is run before livevariables. We can scan // forward a few instructions and check if it is obviously dead. - if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) + if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end())) PhysDefs.push_back(std::make_pair(MOP.index(), Reg)); } @@ -327,7 +327,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, } bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet<unsigned, 8> &PhysRefs, + SmallSet<MCRegister, 8> &PhysRefs, PhysDefVector &PhysDefs, bool &NonLocal) const { // For now conservatively returns false if the common subexpression is @@ -382,7 +382,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, Register MOReg = MO.getReg(); if (Register::isVirtualRegister(MOReg)) continue; - if (PhysRefs.count(MOReg)) + if (PhysRefs.count(MOReg.asMCReg())) return false; } @@ -429,7 +429,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a /// common expression that defines Reg. CSBB is basic block where CSReg is /// defined. -bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, +bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. @@ -556,7 +556,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. bool CrossMBBPhysDef = false; - SmallSet<unsigned, 8> PhysRefs; + SmallSet<MCRegister, 8> PhysRefs; PhysDefVector PhysDefs; bool PhysUseDef = false; if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, @@ -640,7 +640,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { // Actually perform the elimination. if (DoCSE) { - for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { + for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { unsigned OldReg = CSEPair.first; unsigned NewReg = CSEPair.second; // OldReg may have been unused but is used now, clear the Dead flag @@ -656,7 +656,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { // we should make sure it is not dead at CSMI. for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); - for (auto PhysDef : PhysDefs) + for (const auto &PhysDef : PhysDefs) if (!MI->getOperand(PhysDef.first).isDead()) CSMI->getOperand(PhysDef.first).setIsDead(false); @@ -748,8 +748,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { Node = WorkList.pop_back_val(); Scopes.push_back(Node); OpenChildren[Node] = Node->getNumChildren(); - for (MachineDomTreeNode *Child : Node->children()) - WorkList.push_back(Child); + append_range(WorkList, Node->children()); } while (!WorkList.empty()); // Now perform CSE. @@ -777,11 +776,11 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) { MI->getNumExplicitDefs() != 1) return false; - for (auto def : MI->defs()) + for (const auto &def : MI->defs()) if (!Register::isVirtualRegister(def.getReg())) return false; - for (auto use : MI->uses()) + for (const auto &use : MI->uses()) if (use.isReg() && !Register::isVirtualRegister(use.getReg())) return false; @@ -861,8 +860,7 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { BBs.push_back(DT->getRootNode()); do { auto Node = BBs.pop_back_val(); - for (MachineDomTreeNode *Child : Node->children()) - BBs.push_back(Child); + append_range(BBs, Node->children()); MachineBasicBlock *MBB = Node->getBlock(); Changed |= ProcessBlockPRE(DT, MBB); diff --git a/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/llvm/lib/CodeGen/MachineCheckDebugify.cpp new file mode 100644 index 000000000000..bd7f0f862947 --- /dev/null +++ b/llvm/lib/CodeGen/MachineCheckDebugify.cpp @@ -0,0 +1,126 @@ +//===- MachineCheckDebugify.cpp - Check debug info ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This checks debug info after mir-debugify (+ pass-to-test). Currently +/// it simply checks the integrity of line info in DILocation and +/// DILocalVariable which mir-debugifiy generated before. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Debugify.h" + +#define DEBUG_TYPE "mir-check-debugify" + +using namespace llvm; + +namespace { + +struct CheckDebugMachineModule : public ModulePass { + bool runOnModule(Module &M) override { + MachineModuleInfo &MMI = + getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); + + NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify"); + if (!NMD) { + errs() << "WARNING: Please run mir-debugify to generate " + "llvm.mir.debugify metadata first.\n"; + return false; + } + + auto getDebugifyOperand = [&](unsigned Idx) -> unsigned { + return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0)) + ->getZExtValue(); + }; + assert(NMD->getNumOperands() == 2 && + "llvm.mir.debugify should have exactly 2 operands!"); + unsigned NumLines = getDebugifyOperand(0); + unsigned NumVars = getDebugifyOperand(1); + BitVector MissingLines{NumLines, true}; + BitVector MissingVars{NumVars, true}; + + for (Function &F : M.functions()) { + MachineFunction *MF = MMI.getMachineFunction(F); + if (!MF) + continue; + for (MachineBasicBlock &MBB : *MF) { + // Find missing lines. + // TODO: Avoid meta instructions other than dbg_val. + for (MachineInstr &MI : MBB) { + if (MI.isDebugValue()) + continue; + const DebugLoc DL = MI.getDebugLoc(); + if (DL && DL.getLine() != 0) { + MissingLines.reset(DL.getLine() - 1); + continue; + } + + if (!DL) { + errs() << "WARNING: Instruction with empty DebugLoc in function "; + errs() << F.getName() << " --"; + MI.print(errs()); + } + } + + // Find missing variables. + // TODO: Handle DBG_INSTR_REF which is under an experimental option now. + for (MachineInstr &MI : MBB) { + if (!MI.isDebugValue()) + continue; + const DILocalVariable *LocalVar = MI.getDebugVariable(); + unsigned Var = ~0U; + + (void)to_integer(LocalVar->getName(), Var, 10); + assert(Var <= NumVars && "Unexpected name for DILocalVariable"); + MissingVars.reset(Var - 1); + } + } + } + + bool Fail = false; + for (unsigned Idx : MissingLines.set_bits()) { + errs() << "WARNING: Missing line " << Idx + 1 << "\n"; + Fail = true; + } + + for (unsigned Idx : MissingVars.set_bits()) { + errs() << "WARNING: Missing variable " << Idx + 1 << "\n"; + Fail = true; + } + errs() << "Machine IR debug info check: "; + errs() << (Fail ? "FAIL" : "PASS") << "\n"; + + return false; + } + + CheckDebugMachineModule() : ModulePass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addPreserved<MachineModuleInfoWrapperPass>(); + AU.setPreservesCFG(); + } + + static char ID; // Pass identification. +}; +char CheckDebugMachineModule::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(CheckDebugMachineModule, DEBUG_TYPE, + "Machine Check Debug Module", false, false) +INITIALIZE_PASS_END(CheckDebugMachineModule, DEBUG_TYPE, + "Machine Check Debug Module", false, false) + +ModulePass *llvm::createCheckDebugMachineModulePass() { + return new CheckDebugMachineModule(); +} diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index f241435a0482..e2b6cfe55c16 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" @@ -72,6 +73,7 @@ class MachineCombiner : public MachineFunctionPass { MachineTraceMetrics::Ensemble *MinInstr; MachineBlockFrequencyInfo *MBFI; ProfileSummaryInfo *PSI; + RegisterClassInfo RegClassInfo; TargetSchedModel TSchedModel; @@ -103,6 +105,10 @@ private: SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, MachineCombinerPattern Pattern, bool SlackIsAccurate); + bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + MachineCombinerPattern Pattern); bool preservesResourceLen(MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -257,8 +263,9 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, /// The combiner's goal may differ based on which pattern it is attempting /// to optimize. enum class CombinerObjective { - MustReduceDepth, // The data dependency chain must be improved. - Default // The critical path must not be lengthened. + MustReduceDepth, // The data dependency chain must be improved. + MustReduceRegisterPressure, // The register pressure must be reduced. + Default // The critical path must not be lengthened. }; static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { @@ -272,6 +279,9 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { case MachineCombinerPattern::REASSOC_XY_AMM_BMM: case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: return CombinerObjective::MustReduceDepth; + case MachineCombinerPattern::REASSOC_XY_BCA: + case MachineCombinerPattern::REASSOC_XY_BAC: + return CombinerObjective::MustReduceRegisterPressure; default: return CombinerObjective::Default; } @@ -300,6 +310,18 @@ std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences( return {NewRootLatency, RootLatency}; } +bool MachineCombiner::reduceRegisterPressure( + MachineInstr &Root, MachineBasicBlock *MBB, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + MachineCombinerPattern Pattern) { + // FIXME: for now, we don't do any check for the register pressure patterns. + // We treat them as always profitable. But we can do better if we make + // RegPressureTracker class be aware of TIE attribute. Then we can get an + // accurate compare of register pressure with DelInstrs or InsInstrs. + return true; +} + /// The DAGCombine code sequence ends in MI (Machine Instruction) Root. /// The new code sequence ends in MI NewRoot. A necessary condition for the new /// sequence to replace the old sequence is that it cannot lengthen the critical @@ -438,6 +460,8 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize, /// \param DelInstrs instruction to delete from \p MBB /// \param MinInstr is a pointer to the machine trace information /// \param RegUnits set of live registers, needed to compute instruction depths +/// \param TII is target instruction info, used to call target hook +/// \param Pattern is used to call target hook finalizeInsInstrs /// \param IncrementalUpdate if true, compute instruction depths incrementally, /// otherwise invalidate the trace static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, @@ -445,7 +469,18 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVector<MachineInstr *, 16> DelInstrs, MachineTraceMetrics::Ensemble *MinInstr, SparseSet<LiveRegUnit> &RegUnits, + const TargetInstrInfo *TII, + MachineCombinerPattern Pattern, bool IncrementalUpdate) { + // If we want to fix up some placeholder for some target, do it now. + // We need this because in genAlternativeCodeSequence, we have not decided the + // better pattern InsInstrs or DelInstrs, so we don't want generate some + // sideeffect to the function. For example we need to delay the constant pool + // entry creation here after InsInstrs is selected as better pattern. + // Otherwise the constant pool entry created for InsInstrs will not be deleted + // even if InsInstrs is not the better pattern. + TII->finalizeInsInstrs(MI, Pattern, InsInstrs); + for (auto *InstrPtr : InsInstrs) MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr); @@ -522,6 +557,9 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI); + bool DoRegPressureReduce = + TII->shouldReduceRegisterPressure(MBB, &RegClassInfo); + while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; SmallVector<MachineCombinerPattern, 16> Patterns; @@ -552,7 +590,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { // machine-combiner-verify-pattern-order is enabled, all patterns are // checked to ensure later patterns do not provide better latency savings. - if (!TII->getMachineCombinerPatterns(MI, Patterns)) + if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce)) continue; if (VerifyPatternOrder) @@ -588,12 +626,33 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (ML && TII->isThroughputPattern(P)) SubstituteAlways = true; - if (IncrementalUpdate) { + if (IncrementalUpdate && LastUpdate != BlockIter) { // Update depths since the last incremental update. MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits); LastUpdate = BlockIter; } + if (DoRegPressureReduce && + getCombinerObjective(P) == + CombinerObjective::MustReduceRegisterPressure) { + if (MBB->size() > inc_threshold) { + // Use incremental depth updates for basic blocks above threshold + IncrementalUpdate = true; + LastUpdate = BlockIter; + } + if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) { + // Replace DelInstrs with InsInstrs. + insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, + RegUnits, TII, P, IncrementalUpdate); + Changed |= true; + + // Go back to previous instruction as it may have ILP reassociation + // opportunity. + BlockIter--; + break; + } + } + // Substitute when we optimize for codesize and the new sequence has // fewer instructions OR // the new sequence neither lengthens the critical path nor increases @@ -601,7 +660,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount, OptForSize)) { insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, - RegUnits, IncrementalUpdate); + RegUnits, TII, P, IncrementalUpdate); // Eagerly stop after the first pattern fires. Changed = true; break; @@ -624,7 +683,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { } insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, - RegUnits, IncrementalUpdate); + RegUnits, TII, P, IncrementalUpdate); // Eagerly stop after the first pattern fires. Changed = true; @@ -660,6 +719,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { nullptr; MinInstr = nullptr; OptSize = MF.getFunction().hasOptSize(); + RegClassInfo.runOnMachineFunction(MF); LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 70d6dcc2e3e2..d8659c1c7853 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -88,18 +88,18 @@ namespace { class CopyTracker { struct CopyInfo { MachineInstr *MI; - SmallVector<unsigned, 4> DefRegs; + SmallVector<MCRegister, 4> DefRegs; bool Avail; }; - DenseMap<unsigned, CopyInfo> Copies; + DenseMap<MCRegister, CopyInfo> Copies; public: /// Mark all of the given registers and their subregisters as unavailable for /// copying. - void markRegsUnavailable(ArrayRef<unsigned> Regs, + void markRegsUnavailable(ArrayRef<MCRegister> Regs, const TargetRegisterInfo &TRI) { - for (unsigned Reg : Regs) { + for (MCRegister Reg : Regs) { // Source of copy is no longer available for propagation. for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { auto CI = Copies.find(*RUI); @@ -110,30 +110,30 @@ public: } /// Remove register from copy maps. - void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) { + void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI) { // Since Reg might be a subreg of some registers, only invalidate Reg is not // enough. We have to find the COPY defines Reg or registers defined by Reg // and invalidate all of them. - SmallSet<unsigned, 8> RegsToInvalidate; + SmallSet<MCRegister, 8> RegsToInvalidate; RegsToInvalidate.insert(Reg); for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { auto I = Copies.find(*RUI); if (I != Copies.end()) { if (MachineInstr *MI = I->second.MI) { - RegsToInvalidate.insert(MI->getOperand(0).getReg()); - RegsToInvalidate.insert(MI->getOperand(1).getReg()); + RegsToInvalidate.insert(MI->getOperand(0).getReg().asMCReg()); + RegsToInvalidate.insert(MI->getOperand(1).getReg().asMCReg()); } RegsToInvalidate.insert(I->second.DefRegs.begin(), I->second.DefRegs.end()); } } - for (unsigned InvalidReg : RegsToInvalidate) + for (MCRegister InvalidReg : RegsToInvalidate) for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI) Copies.erase(*RUI); } /// Clobber a single register, removing it from the tracker's copy maps. - void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) { + void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI) { for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { auto I = Copies.find(*RUI); if (I != Copies.end()) { @@ -143,7 +143,7 @@ public: // When we clobber the destination of a copy, we need to clobber the // whole register it defined. if (MachineInstr *MI = I->second.MI) - markRegsUnavailable({MI->getOperand(0).getReg()}, TRI); + markRegsUnavailable({MI->getOperand(0).getReg().asMCReg()}, TRI); // Now we can erase the copy. Copies.erase(I); } @@ -154,8 +154,8 @@ public: void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) { assert(MI->isCopy() && "Tracking non-copy?"); - Register Def = MI->getOperand(0).getReg(); - Register Src = MI->getOperand(1).getReg(); + MCRegister Def = MI->getOperand(0).getReg().asMCReg(); + MCRegister Src = MI->getOperand(1).getReg().asMCReg(); // Remember Def is defined by the copy. for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI) @@ -175,8 +175,9 @@ public: return !Copies.empty(); } - MachineInstr *findCopyForUnit(unsigned RegUnit, const TargetRegisterInfo &TRI, - bool MustBeAvailable = false) { + MachineInstr *findCopyForUnit(MCRegister RegUnit, + const TargetRegisterInfo &TRI, + bool MustBeAvailable = false) { auto CI = Copies.find(RegUnit); if (CI == Copies.end()) return nullptr; @@ -185,8 +186,8 @@ public: return CI->second.MI; } - MachineInstr *findCopyDefViaUnit(unsigned RegUnit, - const TargetRegisterInfo &TRI) { + MachineInstr *findCopyDefViaUnit(MCRegister RegUnit, + const TargetRegisterInfo &TRI) { auto CI = Copies.find(RegUnit); if (CI == Copies.end()) return nullptr; @@ -196,7 +197,7 @@ public: return findCopyForUnit(*RUI, TRI, true); } - MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg, + MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg, const TargetRegisterInfo &TRI) { MCRegUnitIterator RUI(Reg, &TRI); MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI); @@ -217,7 +218,7 @@ public: return AvailCopy; } - MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg, + MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg, const TargetRegisterInfo &TRI) { // We check the first RegUnit here, since we'll only be interested in the // copy if it copies the entire register anyway. @@ -274,12 +275,10 @@ public: private: typedef enum { DebugUse = false, RegularUse = true } DebugType; - void ClobberRegister(unsigned Reg); - void ReadRegister(unsigned Reg, MachineInstr &Reader, - DebugType DT); + void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT); void ForwardCopyPropagateBlock(MachineBasicBlock &MBB); void BackwardCopyPropagateBlock(MachineBasicBlock &MBB); - bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); + bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def); void forwardUses(MachineInstr &MI); void propagateDefs(MachineInstr &MI); bool isForwardableRegClassCopy(const MachineInstr &Copy, @@ -288,6 +287,8 @@ private: const MachineInstr &UseI, unsigned UseIdx); bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); + bool hasOverlappingMultipleDef(const MachineInstr &MI, + const MachineOperand &MODef, Register Def); /// Candidates for deletion. SmallSetVector<MachineInstr *, 8> MaybeDeadCopies; @@ -309,7 +310,7 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, "Machine Copy Propagation Pass", false, false) -void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader, +void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT) { // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. If a copy is "read" by a debug user, record the user @@ -332,14 +333,12 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader, /// PreviousCopy. e.g. /// isNopCopy("ecx = COPY eax", AX, CX) == true /// isNopCopy("ecx = COPY eax", AH, CL) == false -static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src, - unsigned Def, const TargetRegisterInfo *TRI) { - Register PreviousSrc = PreviousCopy.getOperand(1).getReg(); - Register PreviousDef = PreviousCopy.getOperand(0).getReg(); - if (Src == PreviousSrc) { - assert(Def == PreviousDef); +static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src, + MCRegister Def, const TargetRegisterInfo *TRI) { + MCRegister PreviousSrc = PreviousCopy.getOperand(1).getReg().asMCReg(); + MCRegister PreviousDef = PreviousCopy.getOperand(0).getReg().asMCReg(); + if (Src == PreviousSrc && Def == PreviousDef) return true; - } if (!TRI->isSubRegister(PreviousSrc, Src)) return false; unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src); @@ -349,8 +348,8 @@ static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src, /// Remove instruction \p Copy if there exists a previous copy that copies the /// register \p Src to the register \p Def; This may happen indirectly by /// copying the super registers. -bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, - unsigned Def) { +bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, + MCRegister Src, MCRegister Def) { // Avoid eliminating a copy from/to a reserved registers as we cannot predict // the value (Example: The sparc zero register is writable but stays zero). if (MRI->isReserved(Src) || MRI->isReserved(Def)) @@ -461,6 +460,21 @@ bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI, return false; } +/// For an MI that has multiple definitions, check whether \p MI has +/// a definition that overlaps with another of its definitions. +/// For example, on ARM: umull r9, r9, lr, r0 +/// The umull instruction is unpredictable unless RdHi and RdLo are different. +bool MachineCopyPropagation::hasOverlappingMultipleDef( + const MachineInstr &MI, const MachineOperand &MODef, Register Def) { + for (const MachineOperand &MIDef : MI.defs()) { + if ((&MIDef != &MODef) && MIDef.isReg() && + TRI->regsOverlap(Def, MIDef.getReg())) + return true; + } + + return false; +} + /// Look for available copies whose destination register is used by \p MI and /// replace the use in \p MI with the copy's source register. void MachineCopyPropagation::forwardUses(MachineInstr &MI) { @@ -491,7 +505,8 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (!MOUse.isRenamable()) continue; - MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg(), *TRI); + MachineInstr *Copy = + Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), *TRI); if (!Copy) continue; @@ -563,13 +578,13 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Analyze copies (which don't overlap themselves). if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(), MI->getOperand(1).getReg())) { - Register Def = MI->getOperand(0).getReg(); - Register Src = MI->getOperand(1).getReg(); - - assert(!Register::isVirtualRegister(Def) && - !Register::isVirtualRegister(Src) && + assert(MI->getOperand(0).getReg().isPhysical() && + MI->getOperand(1).getReg().isPhysical() && "MachineCopyPropagation should be run after register allocation!"); + MCRegister Def = MI->getOperand(0).getReg().asMCReg(); + MCRegister Src = MI->getOperand(1).getReg().asMCReg(); + // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ecx = COPY %eax @@ -591,7 +606,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { forwardUses(*MI); // Src may have been changed by forwardUses() - Src = MI->getOperand(1).getReg(); + Src = MI->getOperand(1).getReg().asMCReg(); // If Src is defined by a previous copy, the previous copy cannot be // eliminated. @@ -599,7 +614,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.readsReg()) continue; - Register Reg = MO.getReg(); + MCRegister Reg = MO.getReg().asMCReg(); if (!Reg) continue; ReadRegister(Reg, *MI, RegularUse); @@ -622,7 +637,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.isDef()) continue; - Register Reg = MO.getReg(); + MCRegister Reg = MO.getReg().asMCReg(); if (!Reg) continue; Tracker.clobberRegister(Reg, *TRI); @@ -636,7 +651,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Clobber any earlyclobber regs first. for (const MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isEarlyClobber()) { - Register Reg = MO.getReg(); + MCRegister Reg = MO.getReg().asMCReg(); // If we have a tied earlyclobber, that means it is also read by this // instruction, so we need to make sure we don't remove it as dead // later. @@ -648,7 +663,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { forwardUses(*MI); // Not a copy. - SmallVector<unsigned, 2> Defs; + SmallVector<Register, 2> Defs; const MachineOperand *RegMask = nullptr; for (const MachineOperand &MO : MI->operands()) { if (MO.isRegMask()) @@ -659,14 +674,14 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { if (!Reg) continue; - assert(!Register::isVirtualRegister(Reg) && + assert(!Reg.isVirtual() && "MachineCopyPropagation should be run after register allocation!"); if (MO.isDef() && !MO.isEarlyClobber()) { - Defs.push_back(Reg); + Defs.push_back(Reg.asMCReg()); continue; } else if (MO.readsReg()) - ReadRegister(Reg, *MI, MO.isDebug() ? DebugUse : RegularUse); + ReadRegister(Reg.asMCReg(), *MI, MO.isDebug() ? DebugUse : RegularUse); } // The instruction has a register mask operand which means that it clobbers @@ -678,7 +693,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDeadCopies.begin(); DI != MaybeDeadCopies.end();) { MachineInstr *MaybeDead = *DI; - Register Reg = MaybeDead->getOperand(0).getReg(); + MCRegister Reg = MaybeDead->getOperand(0).getReg().asMCReg(); assert(!MRI->isReserved(Reg)); if (!RegMask->clobbersPhysReg(Reg)) { @@ -703,7 +718,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { } // Any previous copy definition or reading the Defs is no longer available. - for (unsigned Reg : Defs) + for (MCRegister Reg : Defs) Tracker.clobberRegister(Reg, *TRI); } @@ -718,7 +733,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Update matching debug values, if any. assert(MaybeDead->isCopy()); - unsigned SrcReg = MaybeDead->getOperand(1).getReg(); + Register SrcReg = MaybeDead->getOperand(1).getReg(); MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]); MaybeDead->eraseFromParent(); @@ -770,7 +785,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { continue; MachineInstr *Copy = - Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI); + Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI); if (!Copy) continue; @@ -786,6 +801,9 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { if (hasImplicitOverlap(MI, MODef)) continue; + if (hasOverlappingMultipleDef(MI, MODef, Def)) + continue; + LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI) << "\n with " << printReg(Def, TRI) << "\n in " << MI << " from " << *Copy); @@ -815,8 +833,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( !TRI->regsOverlap(MI->getOperand(0).getReg(), MI->getOperand(1).getReg())) { - Register Def = MI->getOperand(0).getReg(); - Register Src = MI->getOperand(1).getReg(); + MCRegister Def = MI->getOperand(0).getReg().asMCReg(); + MCRegister Src = MI->getOperand(1).getReg().asMCReg(); // Unlike forward cp, we don't invoke propagateDefs here, // just let forward cp do COPY-to-COPY propagation. @@ -831,7 +849,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( // Invalidate any earlyclobber regs first. for (const MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isEarlyClobber()) { - Register Reg = MO.getReg(); + MCRegister Reg = MO.getReg().asMCReg(); if (!Reg) continue; Tracker.invalidateRegister(Reg, *TRI); @@ -846,10 +864,10 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( continue; if (MO.isDef()) - Tracker.invalidateRegister(MO.getReg(), *TRI); + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); if (MO.readsReg()) - Tracker.invalidateRegister(MO.getReg(), *TRI); + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); } } diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp index bf57ec0e8c28..599a81847592 100644 --- a/llvm/lib/CodeGen/MachineDebugify.cpp +++ b/llvm/lib/CodeGen/MachineDebugify.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -89,10 +90,11 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, // Do this by introducing debug uses of each register definition. If that is // not possible (e.g. we have a phi or a meta instruction), emit a constant. uint64_t NextImm = 0; + SmallSet<DILocalVariable *, 16> VarSet; const MCInstrDesc &DbgValDesc = TII.get(TargetOpcode::DBG_VALUE); for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::iterator FirstNonPHIIt = MBB.getFirstNonPHI(); - for (auto I = MBB.begin(), E = MBB.end(); I != E; ) { + for (auto I = MBB.begin(), E = MBB.end(); I != E;) { MachineInstr &MI = *I; ++I; @@ -113,6 +115,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, Line = EarliestDVI->getDebugLoc().getLine(); DILocalVariable *LocalVar = Line2Var[Line]; assert(LocalVar && "No variable for current line?"); + VarSet.insert(LocalVar); // Emit DBG_VALUEs for register definitions. SmallVector<MachineOperand *, 4> RegDefs; @@ -132,6 +135,33 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, } } + // Here we save the number of lines and variables into "llvm.mir.debugify". + // It is useful for mir-check-debugify. + NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify"); + IntegerType *Int32Ty = Type::getInt32Ty(Ctx); + if (!NMD) { + NMD = M.getOrInsertNamedMetadata("llvm.mir.debugify"); + auto addDebugifyOperand = [&](unsigned N) { + NMD->addOperand(MDNode::get( + Ctx, ValueAsMetadata::getConstant(ConstantInt::get(Int32Ty, N)))); + }; + // Add number of lines. + addDebugifyOperand(NextLine - 1); + // Add number of variables. + addDebugifyOperand(VarSet.size()); + } else { + assert(NMD->getNumOperands() == 2 && + "llvm.mir.debugify should have exactly 2 operands!"); + auto setDebugifyOperand = [&](unsigned Idx, unsigned N) { + NMD->setOperand(Idx, MDNode::get(Ctx, ValueAsMetadata::getConstant( + ConstantInt::get(Int32Ty, N)))); + }; + // Set number of lines. + setDebugifyOperand(0, NextLine - 1); + // Set number of variables. + setDebugifyOperand(1, VarSet.size()); + } + return true; } diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 6d45f08804ed..3f44578b1a2c 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -273,20 +273,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { } DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const { - if (&FPType == &APFloat::IEEEsingle()) { - Attribute Attr = F.getFnAttribute("denormal-fp-math-f32"); - StringRef Val = Attr.getValueAsString(); - if (!Val.empty()) - return parseDenormalFPAttribute(Val); - - // If the f32 variant of the attribute isn't specified, try to use the - // generic one. - } - - // TODO: Should probably avoid the connection to the IR and store directly - // in the MachineFunction. - Attribute Attr = F.getFnAttribute("denormal-fp-math"); - return parseDenormalFPAttribute(Attr.getValueAsString()); + return F.getDenormalMode(FPType); } /// Should we be emitting segmented stack stuff for the function @@ -341,33 +328,6 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBNumbering.resize(BlockNo); } -/// This is used with -fbasic-block-sections or -fbasicblock-labels option. -/// A unary encoding of basic block labels is done to keep ".strtab" sizes -/// small. -void MachineFunction::createBBLabels() { - const TargetInstrInfo *TII = getSubtarget().getInstrInfo(); - this->BBSectionsSymbolPrefix.resize(getNumBlockIDs(), 'a'); - for (auto MBBI = begin(), E = end(); MBBI != E; ++MBBI) { - assert( - (MBBI->getNumber() >= 0 && MBBI->getNumber() < (int)getNumBlockIDs()) && - "BasicBlock number was out of range!"); - // 'a' - Normal block. - // 'r' - Return block. - // 'l' - Landing Pad. - // 'L' - Return and landing pad. - bool isEHPad = MBBI->isEHPad(); - bool isRetBlock = MBBI->isReturnBlock() && !TII->isTailCall(MBBI->back()); - char type = 'a'; - if (isEHPad && isRetBlock) - type = 'L'; - else if (isEHPad) - type = 'l'; - else if (isRetBlock) - type = 'r'; - BBSectionsSymbolPrefix[MBBI->getNumber()] = type; - } -} - /// This method iterates over the basic blocks and assigns their IsBeginSection /// and IsEndSection fields. This must be called after MBB layout is finalized /// and the SectionID's are assigned to MBBs. @@ -387,9 +347,9 @@ void MachineFunction::assignBeginEndSections() { /// Allocate a new MachineInstr. Use this instead of `new MachineInstr'. MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, - bool NoImp) { + bool NoImplicit) { return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) - MachineInstr(*this, MCID, DL, NoImp); + MachineInstr(*this, MCID, DL, NoImplicit); } /// Create a new MachineInstr which is a copy of the 'Orig' instruction, @@ -460,6 +420,9 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { void MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { assert(MBB->getParent() == this && "MBB parent mismatch!"); + // Clean up any references to MBB in jump tables before deleting it. + if (JumpTableInfo) + JumpTableInfo->RemoveMBBFromJumpTables(MBB); MBB->~MachineBasicBlock(); BasicBlockRecycler.Deallocate(Allocator, MBB); } @@ -474,6 +437,13 @@ MachineMemOperand *MachineFunction::getMachineMemOperand( SSID, Ordering, FailureOrdering); } +MachineMemOperand *MachineFunction::getMachineMemOperand( + const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) { + return new (Allocator) MachineMemOperand( + PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(), AAMDNodes(), nullptr, + MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); +} + MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { @@ -485,9 +455,11 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, ? commonAlignment(MMO->getBaseAlign(), Offset) : MMO->getBaseAlign(); + // Do not preserve ranges, since we don't necessarily know what the high bits + // are anymore. return new (Allocator) MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size, - Alignment, AAMDNodes(), nullptr, MMO->getSyncScopeID(), + Alignment, MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } @@ -896,7 +868,7 @@ try_next:; // Add the new filter. int FilterID = -(1 + FilterIds.size()); FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); - FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end()); + llvm::append_range(FilterIds, TyIds); FilterEnds.push_back(FilterIds.size()); FilterIds.push_back(0); // terminator return FilterID; @@ -974,6 +946,46 @@ void MachineFunction::moveCallSiteInfo(const MachineInstr *Old, CallSitesInfo[New] = CSInfo; } +void MachineFunction::setDebugInstrNumberingCount(unsigned Num) { + DebugInstrNumberingCount = Num; +} + +void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A, + DebugInstrOperandPair B) { + auto Result = DebugValueSubstitutions.insert(std::make_pair(A, B)); + (void)Result; + assert(Result.second && "Substitution for an already substituted value?"); +} + +void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old, + MachineInstr &New, + unsigned MaxOperand) { + // If the Old instruction wasn't tracked at all, there is no work to do. + unsigned OldInstrNum = Old.peekDebugInstrNum(); + if (!OldInstrNum) + return; + + // Iterate over all operands looking for defs to create substitutions for. + // Avoid creating new instr numbers unless we create a new substitution. + // While this has no functional effect, it risks confusing someone reading + // MIR output. + // Examine all the operands, or the first N specified by the caller. + MaxOperand = std::min(MaxOperand, Old.getNumOperands()); + for (unsigned int I = 0; I < Old.getNumOperands(); ++I) { + const auto &OldMO = Old.getOperand(I); + auto &NewMO = New.getOperand(I); + (void)NewMO; + + if (!OldMO.isReg() || !OldMO.isDef()) + continue; + assert(NewMO.isDef()); + + unsigned NewInstrNum = New.getDebugInstrNum(); + makeDebugValueSubstitution(std::make_pair(OldInstrNum, I), + std::make_pair(NewInstrNum, I)); + } +} + /// \} //===----------------------------------------------------------------------===// @@ -1038,6 +1050,17 @@ bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, return MadeChange; } +/// If MBB is present in any jump tables, remove it. +bool MachineJumpTableInfo::RemoveMBBFromJumpTables(MachineBasicBlock *MBB) { + bool MadeChange = false; + for (MachineJumpTableEntry &JTE : JumpTables) { + auto removeBeginItr = std::remove(JTE.MBBs.begin(), JTE.MBBs.end(), MBB); + MadeChange |= (removeBeginItr != JTE.MBBs.end()); + JTE.MBBs.erase(removeBeginItr, JTE.MBBs.end()); + } + return MadeChange; +} + /// If Old is a target of the jump tables, update the jump table to branch to /// New instead. bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, @@ -1084,10 +1107,14 @@ Printable llvm::printJumpTableEntryReference(unsigned Idx) { void MachineConstantPoolValue::anchor() {} -Type *MachineConstantPoolEntry::getType() const { +unsigned MachineConstantPoolValue::getSizeInBytes(const DataLayout &DL) const { + return DL.getTypeAllocSize(Ty); +} + +unsigned MachineConstantPoolEntry::getSizeInBytes(const DataLayout &DL) const { if (isMachineConstantPoolEntry()) - return Val.MachineCPVal->getType(); - return Val.ConstVal->getType(); + return Val.MachineCPVal->getSizeInBytes(DL); + return DL.getTypeAllocSize(Val.ConstVal->getType()); } bool MachineConstantPoolEntry::needsRelocation() const { @@ -1100,7 +1127,7 @@ SectionKind MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { if (needsRelocation()) return SectionKind::getReadOnlyWithRel(); - switch (DL->getTypeAllocSize(getType())) { + switch (getSizeInBytes(*DL)) { case 4: return SectionKind::getMergeableConst4(); case 8: diff --git a/llvm/lib/CodeGen/MachineFunctionPass.cpp b/llvm/lib/CodeGen/MachineFunctionPass.cpp index 03149aa7db4a..16cde1f601f9 100644 --- a/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/GlobalsModRef.h" diff --git a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index 3645a4e3466b..c31c065b1976 100644 --- a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -14,7 +14,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/PrintPasses.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -44,7 +44,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { } bool runOnMachineFunction(MachineFunction &MF) override { - if (!llvm::isFunctionInPrintList(MF.getName())) + if (!isFunctionInPrintList(MF.getName())) return false; OS << "# " << Banner << ":\n"; MF.print(OS, getAnalysisIfAvailable<SlotIndexes>()); diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp new file mode 100644 index 000000000000..483809a8ed96 --- /dev/null +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -0,0 +1,155 @@ +//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Uses profile information to split out cold blocks. +// +// This pass splits out cold machine basic blocks from the parent function. This +// implementation leverages the basic block section framework. Blocks marked +// cold by this pass are grouped together in a separate section prefixed with +// ".text.unlikely.*". The linker can then group these together as a cold +// section. The split part of the function is a contiguous region identified by +// the symbol "foo.cold". Grouping all cold blocks across functions together +// decreases fragmentation and improves icache and itlb utilization. Note that +// the overall changes to the binary size are negligible; only a small number of +// additional jump instructions may be introduced. +// +// For the original RFC of this pass please see +// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +// FIXME: This cutoff value is CPU dependent and should be moved to +// TargetTransformInfo once we consider enabling this on other platforms. +// The value is expressed as a ProfileSummaryInfo integer percentile cutoff. +// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split. +// The default was empirically determined to be optimal when considering cutoff +// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on +// Intel CPUs. +static cl::opt<unsigned> + PercentileCutoff("mfs-psi-cutoff", + cl::desc("Percentile profile summary cutoff used to " + "determine cold blocks. Unused if set to zero."), + cl::init(999950), cl::Hidden); + +static cl::opt<unsigned> ColdCountThreshold( + "mfs-count-threshold", + cl::desc( + "Minimum number of times a block must be executed to be retained."), + cl::init(1), cl::Hidden); + +namespace { + +class MachineFunctionSplitter : public MachineFunctionPass { +public: + static char ID; + MachineFunctionSplitter() : MachineFunctionPass(ID) { + initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Machine Function Splitter Transformation"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &F) override; +}; +} // end anonymous namespace + +static bool isColdBlock(MachineBasicBlock &MBB, + const MachineBlockFrequencyInfo *MBFI, + ProfileSummaryInfo *PSI) { + Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); + if (!Count.hasValue()) + return true; + + if (PercentileCutoff > 0) { + return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); + } + return (*Count < ColdCountThreshold); +} + +bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { + // TODO: We only target functions with profile data. Static information may + // also be considered but we don't see performance improvements yet. + if (!MF.getFunction().hasProfileData()) + return false; + + // TODO: We don't split functions where a section attribute has been set + // since the split part may not be placed in a contiguous region. It may also + // be more beneficial to augment the linker to ensure contiguous layout of + // split functions within the same section as specified by the attribute. + if (!MF.getFunction().getSection().empty()) + return false; + + // We don't want to proceed further for cold functions + // or functions of unknown hotness. Lukewarm functions have no prefix. + Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); + if (SectionPrefix.hasValue() && + (SectionPrefix.getValue().equals("unlikely") || + SectionPrefix.getValue().equals("unknown"))) { + return false; + } + + // Renumbering blocks here preserves the order of the blocks as + // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort + // blocks. Preserving the order of blocks is essential to retaining decisions + // made by prior passes such as MachineBlockPlacement. + MF.RenumberBlocks(); + MF.setBBSectionsType(BasicBlockSection::Preset); + auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + + for (auto &MBB : MF) { + // FIXME: We retain the entry block and conservatively keep all landing pad + // blocks as part of the original function. Once D73739 is submitted, we can + // improve the handling of ehpads. + if ((MBB.pred_empty() || MBB.isEHPad())) + continue; + if (isColdBlock(MBB, MBFI, PSI)) + MBB.setSectionID(MBBSectionID::ColdSectionID); + } + + auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { + return X.getSectionID().Type < Y.getSectionID().Type; + }; + llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); + + return true; +} + +void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); +} + +char MachineFunctionSplitter::ID = 0; +INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", + "Split machine functions using profile information", false, + false) + +MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { + return new MachineFunctionSplitter(); +} diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index d4181591deab..59d98054e3a2 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -116,7 +117,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// the MCInstrDesc. MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, DebugLoc dl, bool NoImp) - : MCID(&tid), debugLoc(std::move(dl)) { + : MCID(&tid), debugLoc(std::move(dl)), DebugInstrNum(0) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. @@ -130,10 +131,12 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, addImplicitDefUseOperands(MF); } -/// MachineInstr ctor - Copies MachineInstr arg exactly -/// +/// MachineInstr ctor - Copies MachineInstr arg exactly. +/// Does not copy the number from debug instruction numbering, to preserve +/// uniqueness. MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()) { + : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()), + DebugInstrNum(0) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); @@ -147,6 +150,10 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) setFlags(MI.Flags); } +void MachineInstr::moveBefore(MachineInstr *MovePos) { + MovePos->getParent()->splice(MovePos, getParent(), getIterator()); +} + /// getRegInfo - If this instruction is embedded into a MachineFunction, /// return the MachineRegisterInfo object for the current function, otherwise /// return null. @@ -701,11 +708,10 @@ bool MachineInstr::isCandidateForCallSiteEntry(QueryType Type) const { if (!isCall(Type)) return false; switch (getOpcode()) { - case TargetOpcode::PATCHABLE_EVENT_CALL: - case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: case TargetOpcode::PATCHPOINT: case TargetOpcode::STACKMAP: case TargetOpcode::STATEPOINT: + case TargetOpcode::FENTRY_CALL: return false; } return true; @@ -835,27 +841,27 @@ const DILabel *MachineInstr::getDebugLabel() const { } const MachineOperand &MachineInstr::getDebugVariableOp() const { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return getOperand(2); } MachineOperand &MachineInstr::getDebugVariableOp() { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return getOperand(2); } const DILocalVariable *MachineInstr::getDebugVariable() const { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return cast<DILocalVariable>(getOperand(2).getMetadata()); } MachineOperand &MachineInstr::getDebugExpressionOp() { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return getOperand(3); } const DIExpression *MachineInstr::getDebugExpression() const { - assert(isDebugValue() && "not a DBG_VALUE"); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); return cast<DIExpression>(getOperand(3).getMetadata()); } @@ -1094,10 +1100,12 @@ void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) { if (DefIdx < TiedMax) UseMO.TiedTo = DefIdx + 1; else { - // Inline asm can use the group descriptors to find tied operands, but on - // normal instruction, the tied def must be within the first TiedMax + // Inline asm can use the group descriptors to find tied operands, + // statepoint tied operands are trivial to match (1-1 reg def with reg use), + // but on normal instruction, the tied def must be within the first TiedMax // operands. - assert(isInlineAsm() && "DefIdx out of range"); + assert((isInlineAsm() || getOpcode() == TargetOpcode::STATEPOINT) && + "DefIdx out of range"); UseMO.TiedTo = TiedMax; } @@ -1117,7 +1125,7 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { return MO.TiedTo - 1; // Uses on normal instructions can be out of range. - if (!isInlineAsm()) { + if (!isInlineAsm() && getOpcode() != TargetOpcode::STATEPOINT) { // Normal tied defs must be in the 0..TiedMax-1 range. if (MO.isUse()) return TiedMax - 1; @@ -1130,6 +1138,25 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { llvm_unreachable("Can't find tied use"); } + if (getOpcode() == TargetOpcode::STATEPOINT) { + // In STATEPOINT defs correspond 1-1 to GC pointer operands passed + // on registers. + StatepointOpers SO(this); + unsigned CurUseIdx = SO.getFirstGCPtrIdx(); + assert(CurUseIdx != -1U && "only gc pointer statepoint operands can be tied"); + unsigned NumDefs = getNumDefs(); + for (unsigned CurDefIdx = 0; CurDefIdx < NumDefs; ++CurDefIdx) { + while (!getOperand(CurUseIdx).isReg()) + CurUseIdx = StackMaps::getNextMetaArgIdx(this, CurUseIdx); + if (OpIdx == CurDefIdx) + return CurUseIdx; + if (OpIdx == CurUseIdx) + return CurDefIdx; + CurUseIdx = StackMaps::getNextMetaArgIdx(this, CurUseIdx); + } + llvm_unreachable("Can't find tied use"); + } + // Now deal with inline asm by parsing the operand group descriptor flags. // Find the beginning of each operand group. SmallVector<unsigned, 8> GroupIdx; @@ -1213,7 +1240,7 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const { // See if this instruction does a load. If so, we have to guarantee that the // loaded value doesn't change between the load and the its intended - // destination. The check for isInvariantLoad gives the targe the chance to + // destination. The check for isInvariantLoad gives the target the chance to // classify the load as always returning a constant, e.g. a constant pool // load. if (mayLoad() && !isDereferenceableInvariantLoad(AA)) @@ -1224,47 +1251,21 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const { return true; } -bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, - bool UseTBAA) const { - const MachineFunction *MF = getMF(); - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - const MachineFrameInfo &MFI = MF->getFrameInfo(); - - // If neither instruction stores to memory, they can't alias in any - // meaningful way, even if they read from the same address. - if (!mayStore() && !Other.mayStore()) - return false; - - // Both instructions must be memory operations to be able to alias. - if (!mayLoadOrStore() || !Other.mayLoadOrStore()) - return false; - - // Let the target decide if memory accesses cannot possibly overlap. - if (TII->areMemAccessesTriviallyDisjoint(*this, Other)) - return false; - - // FIXME: Need to handle multiple memory operands to support all targets. - if (!hasOneMemOperand() || !Other.hasOneMemOperand()) - return true; - - MachineMemOperand *MMOa = *memoperands_begin(); - MachineMemOperand *MMOb = *Other.memoperands_begin(); - - // The following interface to AA is fashioned after DAGCombiner::isAlias - // and operates with MachineMemOperand offset with some important - // assumptions: +static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, + bool UseTBAA, const MachineMemOperand *MMOa, + const MachineMemOperand *MMOb) { + // The following interface to AA is fashioned after DAGCombiner::isAlias and + // operates with MachineMemOperand offset with some important assumptions: // - LLVM fundamentally assumes flat address spaces. - // - MachineOperand offset can *only* result from legalization and - // cannot affect queries other than the trivial case of overlap - // checking. - // - These offsets never wrap and never step outside - // of allocated objects. + // - MachineOperand offset can *only* result from legalization and cannot + // affect queries other than the trivial case of overlap checking. + // - These offsets never wrap and never step outside of allocated objects. // - There should never be any negative offsets here. // // FIXME: Modify API to hide this math from "user" - // Even before we go to AA we can reason locally about some - // memory objects. It can save compile time, and possibly catch some - // corner cases not currently covered. + // Even before we go to AA we can reason locally about some memory objects. It + // can save compile time, and possibly catch some corner cases not currently + // covered. int64_t OffsetA = MMOa->getOffset(); int64_t OffsetB = MMOb->getOffset(); @@ -1306,20 +1307,63 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); - int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset - : MemoryLocation::UnknownSize; - int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset - : MemoryLocation::UnknownSize; + int64_t OverlapA = + KnownWidthA ? WidthA + OffsetA - MinOffset : MemoryLocation::UnknownSize; + int64_t OverlapB = + KnownWidthB ? WidthB + OffsetB - MinOffset : MemoryLocation::UnknownSize; AliasResult AAResult = AA->alias( - MemoryLocation(ValA, OverlapA, - UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), MemoryLocation(ValB, OverlapB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); return (AAResult != NoAlias); } +bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, + bool UseTBAA) const { + const MachineFunction *MF = getMF(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + + // Exclude call instruction which may alter the memory but can not be handled + // by this function. + if (isCall() || Other.isCall()) + return true; + + // If neither instruction stores to memory, they can't alias in any + // meaningful way, even if they read from the same address. + if (!mayStore() && !Other.mayStore()) + return false; + + // Both instructions must be memory operations to be able to alias. + if (!mayLoadOrStore() || !Other.mayLoadOrStore()) + return false; + + // Let the target decide if memory accesses cannot possibly overlap. + if (TII->areMemAccessesTriviallyDisjoint(*this, Other)) + return false; + + // Memory operations without memory operands may access anything. Be + // conservative and assume `MayAlias`. + if (memoperands_empty() || Other.memoperands_empty()) + return true; + + // Skip if there are too many memory operands. + auto NumChecks = getNumMemOperands() * Other.getNumMemOperands(); + if (NumChecks > TII->getMemOperandAACheckLimit()) + return true; + + // Check each pair of memory operands from both instructions, which can't + // alias only if all pairs won't alias. + for (auto *MMOa : memoperands()) + for (auto *MMOb : Other.memoperands()) + if (MemOperandsHaveAlias(MFI, AA, UseTBAA, MMOa, MMOb)) + return true; + + return false; +} + /// hasOrderedMemoryRef - Return true if this instruction may have an ordered /// or volatile memory reference, or if the information describing the memory /// reference is not available. Return false if it is known to have no ordered @@ -1447,6 +1491,8 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF, bool MachineInstr::hasComplexRegisterTies() const { const MCInstrDesc &MCID = getDesc(); + if (MCID.Opcode == TargetOpcode::STATEPOINT) + return true; for (unsigned I = 0, E = getNumOperands(); I < E; ++I) { const auto &Operand = getOperand(I); if (!Operand.isReg() || Operand.isDef()) @@ -1753,6 +1799,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, HeapAllocMarker->printAsOperand(OS, MST); } + if (DebugInstrNum) { + if (!FirstOp) + OS << ","; + OS << " debug-instr-number " << DebugInstrNum; + } + if (!SkipDebugLoc) { if (const DebugLoc &DL = getDebugLoc()) { if (!FirstOp) @@ -2227,3 +2279,9 @@ MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const { return getSpillSlotSize(Accesses, getMF()->getFrameInfo()); return None; } + +unsigned MachineInstr::getDebugInstrNum() { + if (DebugInstrNum == 0) + DebugInstrNum = getParent()->getParent()->getNewDebugInstrNum(); + return DebugInstrNum; +} diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 5e8a916b3b3b..c06bc39b4940 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -90,7 +91,7 @@ static cl::opt<UseBFI> DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks", cl::desc("Disable hoisting instructions to" " hotter blocks"), - cl::init(UseBFI::None), cl::Hidden, + cl::init(UseBFI::PGO), cl::Hidden, cl::values(clEnumValN(UseBFI::None, "none", "disable the feature"), clEnumValN(UseBFI::PGO, "pgo", @@ -145,7 +146,7 @@ namespace { } // Track 'estimated' register pressure. - SmallSet<unsigned, 32> RegSeen; + SmallSet<Register, 32> RegSeen; SmallVector<unsigned, 8> RegPressure; // Register pressure "limit" per register pressure set. If the pressure @@ -156,7 +157,7 @@ namespace { SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; // For each opcode, keep a list of potential CSE instructions. - DenseMap<unsigned, std::vector<const MachineInstr *>> CSEMap; + DenseMap<unsigned, std::vector<MachineInstr *>> CSEMap; enum { SpeculateFalse = 0, @@ -212,7 +213,7 @@ namespace { BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, SmallVectorImpl<CandidateInfo> &Candidates); - void AddToLiveIns(unsigned Reg); + void AddToLiveIns(MCRegister Reg); bool IsLICMCandidate(MachineInstr &I); @@ -221,7 +222,7 @@ namespace { bool HasLoopPHIUse(const MachineInstr *MI) const; bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, - unsigned Reg) const; + Register Reg) const; bool IsCheapInstruction(MachineInstr &MI) const; @@ -245,8 +246,6 @@ namespace { void HoistOutOfLoop(MachineDomTreeNode *HeaderN); - void HoistRegion(MachineDomTreeNode *N, bool IsHeader); - void SinkIntoLoop(); void InitRegPressure(MachineBasicBlock *BB); @@ -260,13 +259,12 @@ namespace { MachineInstr *ExtractHoistableLoad(MachineInstr *MI); - const MachineInstr * - LookForDuplicate(const MachineInstr *MI, - std::vector<const MachineInstr *> &PrevMIs); + MachineInstr *LookForDuplicate(const MachineInstr *MI, + std::vector<MachineInstr *> &PrevMIs); - bool EliminateCSE( - MachineInstr *MI, - DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI); + bool + EliminateCSE(MachineInstr *MI, + DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI); bool MayCSE(MachineInstr *MI); @@ -606,7 +604,7 @@ void MachineLICMBase::HoistRegionPostRA() { /// Add register 'Reg' to the livein sets of BBs in the current loop, and make /// sure it is not killed by any instructions in the loop. -void MachineLICMBase::AddToLiveIns(unsigned Reg) { +void MachineLICMBase::AddToLiveIns(MCRegister Reg) { for (MachineBasicBlock *BB : CurLoop->getBlocks()) { if (!BB->isLiveIn(Reg)) BB->addLiveIn(Reg); @@ -802,8 +800,13 @@ void MachineLICMBase::SinkIntoLoop() { I != Preheader->instr_end(); ++I) { // We need to ensure that we can safely move this instruction into the loop. // As such, it must not have side-effects, e.g. such as a call has. - if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) + LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I); + if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) { + LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n"); Candidates.push_back(&*I); + continue; + } + LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n"); } for (MachineInstr *I : Candidates) { @@ -813,8 +816,11 @@ void MachineLICMBase::SinkIntoLoop() { if (!MRI->hasOneDef(MO.getReg())) continue; bool CanSink = true; - MachineBasicBlock *B = nullptr; + MachineBasicBlock *SinkBlock = nullptr; + LLVM_DEBUG(dbgs() << "LICM: Try sinking: " << *I); + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + LLVM_DEBUG(dbgs() << "LICM: Analysing use: "; MI.dump()); // FIXME: Come up with a proper cost model that estimates whether sinking // the instruction (and thus possibly executing it on every loop // iteration) is more expensive than a register. @@ -823,24 +829,40 @@ void MachineLICMBase::SinkIntoLoop() { CanSink = false; break; } - if (!B) { - B = MI.getParent(); + if (!SinkBlock) { + SinkBlock = MI.getParent(); + LLVM_DEBUG(dbgs() << "LICM: Setting sink block to: " + << printMBBReference(*SinkBlock) << "\n"); continue; } - B = DT->findNearestCommonDominator(B, MI.getParent()); - if (!B) { + SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LICM: Can't find nearest dominator\n"); CanSink = false; break; } + LLVM_DEBUG(dbgs() << "LICM: Setting nearest common dom block: " << + printMBBReference(*SinkBlock) << "\n"); + } + if (!CanSink) { + LLVM_DEBUG(dbgs() << "LICM: Can't sink instruction.\n"); + continue; + } + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LICM: Not sinking, can't find sink block.\n"); + continue; } - if (!CanSink || !B || B == Preheader) + if (SinkBlock == Preheader) { + LLVM_DEBUG(dbgs() << "LICM: Not sinking, sink block is the preheader\n"); continue; + } - LLVM_DEBUG(dbgs() << "Sinking to " << printMBBReference(*B) << " from " - << printMBBReference(*I->getParent()) << ": " << *I); - B->splice(B->getFirstNonPHI(), Preheader, I); + LLVM_DEBUG(dbgs() << "LICM: Sinking to " << printMBBReference(*SinkBlock) + << " from " << printMBBReference(*I->getParent()) + << ": " << *I); + SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); - // The instruction is is moved from its basic block, so do not retain the + // The instruction is moved from its basic block, so do not retain the // debug information. assert(!I->isDebugInstr() && "Should not sink debug inst"); I->setDebugLoc(DebugLoc()); @@ -978,7 +1000,7 @@ static bool isInvariantStore(const MachineInstr &MI, Reg = TRI->lookThruCopyLike(MO.getReg(), MRI); if (Register::isVirtualRegister(Reg)) return false; - if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF())) + if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF())) return false; else FoundCallerPresReg = true; @@ -1008,7 +1030,7 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI, if (Register::isVirtualRegister(CopySrcReg)) return false; - if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF)) + if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF)) return false; Register CopyDstReg = MI.getOperand(0).getReg(); @@ -1030,6 +1052,7 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { bool DontMoveAcrossStore = true; if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) && !(HoistConstStores && isInvariantStore(I, TRI, MRI))) { + LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n"); return false; } @@ -1040,65 +1063,28 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { // indexed load from a jump table. // Stores and side effects are already checked by isSafeToMove. if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && - !IsGuaranteedToExecute(I.getParent())) + !IsGuaranteedToExecute(I.getParent())) { + LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n"); + return false; + } + + // Convergent attribute has been used on operations that involve inter-thread + // communication which results are implicitly affected by the enclosing + // control flows. It is not safe to hoist or sink such operations across + // control flow. + if (I.isConvergent()) return false; return true; } /// Returns true if the instruction is loop invariant. -/// I.e., all virtual register operands are defined outside of the loop, -/// physical registers aren't accessed explicitly, and there are no side -/// effects that aren't captured by the operands or other flags. bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { - if (!IsLICMCandidate(I)) + if (!IsLICMCandidate(I)) { + LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n"); return false; - - // The instruction is loop invariant if all of its operands are. - for (const MachineOperand &MO : I.operands()) { - if (!MO.isReg()) - continue; - - Register Reg = MO.getReg(); - if (Reg == 0) continue; - - // Don't hoist an instruction that uses or defines a physical register. - if (Register::isPhysicalRegister(Reg)) { - if (MO.isUse()) { - // If the physreg has no defs anywhere, it's just an ambient register - // and we can freely move its uses. Alternatively, if it's allocatable, - // it could get allocated to something with a def during allocation. - // However, if the physreg is known to always be caller saved/restored - // then this use is safe to hoist. - if (!MRI->isConstantPhysReg(Reg) && - !(TRI->isCallerPreservedPhysReg(Reg, *I.getMF()))) - return false; - // Otherwise it's safe to move. - continue; - } else if (!MO.isDead()) { - // A def that isn't dead. We can't move it. - return false; - } else if (CurLoop->getHeader()->isLiveIn(Reg)) { - // If the reg is live into the loop, we can't hoist an instruction - // which would clobber it. - return false; - } - } - - if (!MO.isUse()) - continue; - - assert(MRI->getVRegDef(Reg) && - "Machine instr not mapped for this vreg?!"); - - // If the loop contains the definition of an operand, then the instruction - // isn't loop invariant. - if (CurLoop->contains(MRI->getVRegDef(Reg))) - return false; } - - // If we got this far, the instruction is loop invariant! - return true; + return CurLoop->isLoopInvariant(I); } /// Return true if the specified instruction is used by a phi node and hoisting @@ -1138,9 +1124,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { /// Compute operand latency between a def of 'Reg' and an use in the current /// loop, return true if the target considered it high. -bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, - unsigned DefIdx, - unsigned Reg) const { +bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, + Register Reg) const { if (MRI->use_nodbg_empty(Reg)) return false; @@ -1400,10 +1385,10 @@ void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) { /// Find an instruction amount PrevMIs that is a duplicate of MI. /// Return this instruction if it's found. -const MachineInstr* +MachineInstr * MachineLICMBase::LookForDuplicate(const MachineInstr *MI, - std::vector<const MachineInstr*> &PrevMIs) { - for (const MachineInstr *PrevMI : PrevMIs) + std::vector<MachineInstr *> &PrevMIs) { + for (MachineInstr *PrevMI : PrevMIs) if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr))) return PrevMI; @@ -1414,14 +1399,15 @@ MachineLICMBase::LookForDuplicate(const MachineInstr *MI, /// computes the same value. If it's found, do a RAU on with the definition of /// the existing instruction rather than hoisting the instruction to the /// preheader. -bool MachineLICMBase::EliminateCSE(MachineInstr *MI, - DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) { +bool MachineLICMBase::EliminateCSE( + MachineInstr *MI, + DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) { // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate // the undef property onto uses. if (CI == CSEMap.end() || MI->isImplicitDef()) return false; - if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { + if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); // Replace virtual registers defined by MI by their counterparts defined @@ -1461,6 +1447,9 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI, Register DupReg = Dup->getOperand(Idx).getReg(); MRI->replaceRegWith(Reg, DupReg); MRI->clearKillFlags(DupReg); + // Clear Dup dead flag if any, we reuse it for Reg. + if (!MRI->use_nodbg_empty(DupReg)) + Dup->getOperand(Idx).setIsDead(false); } MI->eraseFromParent(); @@ -1474,8 +1463,8 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI, /// the loop. bool MachineLICMBase::MayCSE(MachineInstr *MI) { unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator - CI = CSEMap.find(Opcode); + DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = + CSEMap.find(Opcode); // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate // the undef property onto uses. if (CI == CSEMap.end() || MI->isImplicitDef()) @@ -1529,8 +1518,8 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // Look for opportunity to CSE the hoisted instruction. unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator - CI = CSEMap.find(Opcode); + DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = + CSEMap.find(Opcode); if (!EliminateCSE(MI, CI)) { // Otherwise, splice the instruction to the preheader. Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp index 0c1439da9b29..78480d0e1488 100644 --- a/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -16,11 +16,14 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; // Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. @@ -146,6 +149,59 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, return Preheader; } +bool MachineLoop::isLoopInvariant(MachineInstr &I) const { + MachineFunction *MF = I.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + + // The instruction is loop invariant if all of its operands are. + for (const MachineOperand &MO : I.operands()) { + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (Reg == 0) continue; + + // An instruction that uses or defines a physical register can't e.g. be + // hoisted, so mark this as not invariant. + if (Register::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF()))) + return false; + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead can't be moved. + return false; + } else if (getHeader()->isLiveIn(Reg)) { + // If the reg is live into the loop, we can't hoist an instruction + // which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(MRI->getVRegDef(Reg) && + "Machine instr not mapped for this vreg?!"); + + // If the loop contains the definition of an operand, then the instruction + // isn't loop invariant. + if (contains(MRI->getVRegDef(Reg))) + return false; + } + + // If we got this far, the instruction is loop invariant! + return true; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MachineLoop::dump() const { print(dbgs()); diff --git a/llvm/lib/CodeGen/MachineLoopUtils.cpp b/llvm/lib/CodeGen/MachineLoopUtils.cpp index 2295e1ca6d4e..fdcc8472f1c2 100644 --- a/llvm/lib/CodeGen/MachineLoopUtils.cpp +++ b/llvm/lib/CodeGen/MachineLoopUtils.cpp @@ -130,14 +130,3 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, return NewBB; } - -bool llvm::isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg) { - SmallVector<MachineBasicBlock *, 4> ExitBlocks; - Loop->getExitBlocks(ExitBlocks); - - for (auto *MBB : ExitBlocks) - if (MBB->isLiveIn(PhysReg)) - return true; - - return false; -} diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index f866c7ca53c6..5565b9cededa 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -104,7 +104,8 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { BBCallbacks.back().setMap(this); Entry.Index = BBCallbacks.size() - 1; Entry.Fn = BB->getParent(); - MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken()); + MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol() + : Context.createTempSymbol(); Entry.Symbols.push_back(Sym); return Entry.Symbols; } @@ -143,8 +144,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { BBCallbacks[OldEntry.Index] = nullptr; // Update the callback. // Otherwise, we need to add the old symbols to the new block's set. - NewEntry.Symbols.insert(NewEntry.Symbols.end(), OldEntry.Symbols.begin(), - OldEntry.Symbols.end()); + llvm::append_range(NewEntry.Symbols, OldEntry.Symbols); } void MMIAddrLabelMapCallbackPtr::deleted() { @@ -170,6 +170,7 @@ void MachineModuleInfo::finalize() { AddrLabelSymbols = nullptr; Context.reset(); + // We don't clear the ExternalContext. delete ObjFileMMI; ObjFileMMI = nullptr; @@ -178,7 +179,8 @@ void MachineModuleInfo::finalize() { MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) : TM(std::move(MMI.TM)), Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(), - MMI.TM.getObjFileLowering(), nullptr, nullptr, false) { + MMI.TM.getObjFileLowering(), nullptr, nullptr, false), + MachineFunctions(std::move(MMI.MachineFunctions)) { ObjFileMMI = MMI.ObjFileMMI; CurCallSite = MMI.CurCallSite; UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint; @@ -186,6 +188,7 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) HasSplitStack = MMI.HasSplitStack; HasNosplitStack = MMI.HasNosplitStack; AddrLabelSymbols = MMI.AddrLabelSymbols; + ExternalContext = MMI.ExternalContext; TheModule = MMI.TheModule; } @@ -195,6 +198,14 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) initialize(); } +MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM, + MCContext *ExtContext) + : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), + TM->getObjFileLowering(), nullptr, nullptr, false), + ExternalContext(ExtContext) { + initialize(); +} + MachineModuleInfo::~MachineModuleInfo() { finalize(); } //===- Address of Block Management ----------------------------------------===// @@ -203,7 +214,7 @@ ArrayRef<MCSymbol *> MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) { // Lazily create AddrLabelSymbols. if (!AddrLabelSymbols) - AddrLabelSymbols = new MMIAddrLabelMap(Context); + AddrLabelSymbols = new MMIAddrLabelMap(getContext()); return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB)); } @@ -295,6 +306,12 @@ MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass( initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry()); } +MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass( + const LLVMTargetMachine *TM, MCContext *ExtContext) + : ImmutablePass(ID), MMI(TM, ExtContext) { + initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + // Handle the Pass registration stuff necessary to use DataLayout's. INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo", "Machine Module Information", false, false) diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 2b4fd654e46c..9b09f5273298 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -85,7 +85,7 @@ void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx, } void MachineOperand::substPhysReg(MCRegister Reg, const TargetRegisterInfo &TRI) { - assert(Reg.isPhysical()); + assert(Register::isPhysicalRegister(Reg)); if (getSubReg()) { Reg = TRI.getSubReg(Reg, getSubReg()); // Note that getSubReg() may return 0 if the sub-register doesn't exist. @@ -153,22 +153,25 @@ void MachineOperand::removeRegFromUses() { /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. -void MachineOperand::ChangeToImmediate(int64_t ImmVal) { +void MachineOperand::ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); removeRegFromUses(); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; + setTargetFlags(TargetFlags); } -void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { +void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm, + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); removeRegFromUses(); OpKind = MO_FPImmediate; Contents.CFP = FPImm; + setTargetFlags(TargetFlags); } void MachineOperand::ChangeToES(const char *SymName, @@ -197,7 +200,7 @@ void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset, setTargetFlags(TargetFlags); } -void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { +void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an MCSymbol"); @@ -205,9 +208,10 @@ void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { OpKind = MO_MCSymbol; Contents.Sym = Sym; + setTargetFlags(TargetFlags); } -void MachineOperand::ChangeToFrameIndex(int Idx) { +void MachineOperand::ChangeToFrameIndex(int Idx, unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into a FrameIndex"); @@ -215,6 +219,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) { OpKind = MO_FrameIndex; setIndex(Idx); + setTargetFlags(TargetFlags); } void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, @@ -415,6 +420,11 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) { return nullptr; } +const char *MachineOperand::getTargetIndexName() const { + const MachineFunction *MF = getMFIfAvailable(*this); + return MF ? ::getTargetIndexName(*MF, this->getIndex()) : nullptr; +} + static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); for (const auto &I : Flags) { @@ -823,7 +833,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "target-index("; const char *Name = "<unknown>"; if (const MachineFunction *MF = getMFIfAvailable(*this)) - if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex())) + if (const auto *TargetIndexName = ::getTargetIndexName(*MF, getIndex())) Name = TargetIndexName; OS << Name << ')'; printOperandOffset(OS, getOffset()); @@ -1142,7 +1152,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, const MIRFormatter *Formatter = TII->getMIRFormatter(); // FIXME: This is not necessarily the correct MIR serialization format for // a custom pseudo source value, but at least it allows - // -print-machineinstrs to work on a target with custom pseudo source + // MIR printing to work on a target with custom pseudo source // values. OS << "custom \""; Formatter->printCustomPseudoSourceValue(OS, MST, *PVal); @@ -1152,8 +1162,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, } } MachineOperand::printOperandOffset(OS, getOffset()); - if (getBaseAlign() != getSize()) - OS << ", align " << getBaseAlign().value(); + if (getAlign() != getSize()) + OS << ", align " << getAlign().value(); + if (getAlign() != getBaseAlign()) + OS << ", basealign " << getBaseAlign().value(); auto AAInfo = getAAInfo(); if (AAInfo.TBAA) { OS << ", !tbaa "; diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index f9d099e02995..02998d41d831 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -59,10 +59,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -309,10 +307,8 @@ struct InstructionMapper { // repeated substring. mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, InstrListForMBB); - InstrList.insert(InstrList.end(), InstrListForMBB.begin(), - InstrListForMBB.end()); - UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(), - UnsignedVecForMBB.end()); + llvm::append_range(InstrList, InstrListForMBB); + llvm::append_range(UnsignedVec, UnsignedVecForMBB); } } @@ -549,11 +545,10 @@ void MachineOutliner::findCandidates( // That is, one must either // * End before the other starts // * Start after the other ends - if (std::all_of( - CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(), - [&StartIdx, &EndIdx](const Candidate &C) { - return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx()); - })) { + if (llvm::all_of(CandidatesForRepeatedSeq, [&StartIdx, + &EndIdx](const Candidate &C) { + return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx()); + })) { // It doesn't overlap with anything, so we can outline it. // Each sequence is over [StartIt, EndIt]. // Save the candidate and its location. @@ -656,6 +651,8 @@ MachineFunction *MachineOutliner::createOutlinedFunction( OriginalMF->getFrameInstructions(); for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E; ++I) { + if (I->isDebugInstr()) + continue; MachineInstr *NewMI = MF.CloneMachineInstr(&*I); if (I->isCFIInstruction()) { unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex(); @@ -691,7 +688,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction( // The live-in set for the outlined function is the union of the live-ins // from all the outlining points. - for (MCPhysReg Reg : make_range(CandLiveIns.begin(), CandLiveIns.end())) + for (MCPhysReg Reg : CandLiveIns) LiveIns.addReg(Reg); } addLiveIns(MBB, LiveIns); diff --git a/llvm/lib/CodeGen/MachinePassManager.cpp b/llvm/lib/CodeGen/MachinePassManager.cpp new file mode 100644 index 000000000000..e81575c88935 --- /dev/null +++ b/llvm/lib/CodeGen/MachinePassManager.cpp @@ -0,0 +1,121 @@ +//===---------- MachinePassManager.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the pass management machinery for machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePassManager.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/PassManagerImpl.h" + +using namespace llvm; + +namespace llvm { +template class AllAnalysesOn<MachineFunction>; +template class AnalysisManager<MachineFunction>; +template class PassManager<MachineFunction>; + +Error MachineFunctionPassManager::run(Module &M, + MachineFunctionAnalysisManager &MFAM) { + // MachineModuleAnalysis is a module analysis pass that is never invalidated + // because we don't run any module pass in codegen pipeline. This is very + // important because the codegen state is stored in MMI which is the analysis + // result of MachineModuleAnalysis. MMI should not be recomputed. + auto &MMI = MFAM.getResult<MachineModuleAnalysis>(M); + + (void)RequireCodeGenSCCOrder; + assert(!RequireCodeGenSCCOrder && "not implemented"); + + // Add a PIC to verify machine functions. + if (VerifyMachineFunction) { + PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(M); + + // No need to pop this callback later since MIR pipeline is flat which means + // current pipeline is the top-level pipeline. Callbacks are not used after + // current pipeline. + PI.pushBeforeNonSkippedPassCallback([&MFAM](StringRef PassID, Any IR) { + assert(any_isa<const MachineFunction *>(IR)); + const MachineFunction *MF = any_cast<const MachineFunction *>(IR); + assert(MF && "Machine function should be valid for printing"); + std::string Banner = std::string("After ") + std::string(PassID); + verifyMachineFunction(&MFAM, Banner, *MF); + }); + } + + if (DebugLogging) { + dbgs() << "Starting " << getTypeName<MachineFunction>() + << " pass manager run.\n"; + } + + for (auto &F : InitializationFuncs) { + if (auto Err = F(M, MFAM)) + return Err; + } + + unsigned Idx = 0; + size_t Size = Passes.size(); + do { + // Run machine module passes + for (; MachineModulePasses.count(Idx) && Idx != Size; ++Idx) { + if (DebugLogging) + dbgs() << "Running pass: " << Passes[Idx]->name() << " on " + << M.getName() << '\n'; + if (auto Err = MachineModulePasses.at(Idx)(M, MFAM)) + return Err; + } + + // Finish running all passes. + if (Idx == Size) + break; + + // Run machine function passes + + // Get index range of machine function passes. + unsigned Begin = Idx; + for (; !MachineModulePasses.count(Idx) && Idx != Size; ++Idx) + ; + + for (Function &F : M) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + continue; + + MachineFunction &MF = MMI.getOrCreateMachineFunction(F); + PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(MF); + + for (unsigned I = Begin, E = Idx; I != E; ++I) { + auto *P = Passes[I].get(); + + if (!PI.runBeforePass<MachineFunction>(*P, MF)) + continue; + + // TODO: EmitSizeRemarks + PreservedAnalyses PassPA = P->run(MF, MFAM); + PI.runAfterPass(*P, MF, PassPA); + MFAM.invalidate(MF, PassPA); + } + } + } while (true); + + for (auto &F : FinalizationFuncs) { + if (auto Err = F(M, MFAM)) + return Err; + } + + if (DebugLogging) { + dbgs() << "Finished " << getTypeName<MachineFunction>() + << " pass manager run.\n"; + } + + return Error::success(); +} + +} // namespace llvm diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index ef4b02ca9e3e..d0fe29f65ede 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -268,6 +268,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { // Reset the pragma for the next loop in iteration. disabledByPragma = false; + II_setByPragma = 0; MachineBasicBlock *LBLK = L.getTopBlock(); @@ -441,6 +442,16 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { return SMS.hasNewSchedule(); } +void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<LiveIntervals>(); + AU.addRequired<MachineOptimizationRemarkEmitterPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) { if (II_setByPragma > 0) MII = II_setByPragma; @@ -705,14 +716,13 @@ static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) { /// This function calls the code in ValueTracking, but first checks that the /// instruction has a memory operand. static void getUnderlyingObjects(const MachineInstr *MI, - SmallVectorImpl<const Value *> &Objs, - const DataLayout &DL) { + SmallVectorImpl<const Value *> &Objs) { if (!MI->hasOneMemOperand()) return; MachineMemOperand *MM = *MI->memoperands_begin(); if (!MM->getValue()) return; - GetUnderlyingObjects(MM->getValue(), Objs, DL); + getUnderlyingObjects(MM->getValue(), Objs); for (const Value *V : Objs) { if (!isIdentifiedObject(V)) { Objs.clear(); @@ -736,7 +746,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { PendingLoads.clear(); else if (MI.mayLoad()) { SmallVector<const Value *, 4> Objs; - getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + ::getUnderlyingObjects(&MI, Objs); if (Objs.empty()) Objs.push_back(UnknownValue); for (auto V : Objs) { @@ -745,7 +755,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { } } else if (MI.mayStore()) { SmallVector<const Value *, 4> Objs; - getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + ::getUnderlyingObjects(&MI, Objs); if (Objs.empty()) Objs.push_back(UnknownValue); for (auto V : Objs) { @@ -803,10 +813,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { continue; } AliasResult AAResult = AA->alias( - MemoryLocation(MMO1->getValue(), LocationSize::unknown(), - MMO1->getAAInfo()), - MemoryLocation(MMO2->getValue(), LocationSize::unknown(), - MMO2->getAAInfo())); + MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()), + MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo())); if (AAResult != NoAlias) { SDep Dep(Load, SDep::Barrier); @@ -1587,12 +1595,12 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path, SmallPtrSet<SUnit *, 8> &Visited) { if (Cur->isBoundaryNode()) return false; - if (Exclude.count(Cur) != 0) + if (Exclude.contains(Cur)) return false; - if (DestNodes.count(Cur) != 0) + if (DestNodes.contains(Cur)) return true; if (!Visited.insert(Cur).second) - return Path.count(Cur) != 0; + return Path.contains(Cur); bool FoundPath = false; for (auto &SI : Cur->Succs) FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited); @@ -1632,7 +1640,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, if (Register::isVirtualRegister(Reg)) Uses.insert(Reg); else if (MRI.isAllocatable(Reg)) - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); + ++Units) Uses.insert(*Units); } } @@ -1645,7 +1654,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, LiveOutRegs.push_back(RegisterMaskPair(Reg, LaneBitmask::getNone())); } else if (MRI.isAllocatable(Reg)) { - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); + ++Units) if (!Uses.count(*Units)) LiveOutRegs.push_back(RegisterMaskPair(*Units, LaneBitmask::getNone())); @@ -1741,7 +1751,6 @@ void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) { } NodeSets.clear(); LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n"); - return; } /// Add the nodes that do not belong to a recurrence set into groups @@ -1946,7 +1955,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { for (const auto &I : maxHeight->Succs) { if (Nodes.count(I.getSUnit()) == 0) continue; - if (NodeOrder.count(I.getSUnit()) != 0) + if (NodeOrder.contains(I.getSUnit())) continue; if (ignoreDependence(I, false)) continue; @@ -1958,7 +1967,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { continue; if (Nodes.count(I.getSUnit()) == 0) continue; - if (NodeOrder.count(I.getSUnit()) != 0) + if (NodeOrder.contains(I.getSUnit())) continue; R.insert(I.getSUnit()); } @@ -1997,7 +2006,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { for (const auto &I : maxDepth->Preds) { if (Nodes.count(I.getSUnit()) == 0) continue; - if (NodeOrder.count(I.getSUnit()) != 0) + if (NodeOrder.contains(I.getSUnit())) continue; R.insert(I.getSUnit()); } @@ -2007,7 +2016,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { continue; if (Nodes.count(I.getSUnit()) == 0) continue; - if (NodeOrder.count(I.getSUnit()) != 0) + if (NodeOrder.contains(I.getSUnit())) continue; R.insert(I.getSUnit()); } @@ -2270,7 +2279,7 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, /// Return the instruction in the loop that defines the register. /// If the definition is a Phi, then follow the Phi operand to /// the instruction in the loop. -MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) { +MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) { SmallPtrSet<MachineInstr *, 8> Visited; MachineInstr *Def = MRI.getVRegDef(Reg); while (Def->isPHI()) { @@ -2943,7 +2952,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { } // Replace the old order with the new order. cycleInstrs.swap(newOrderPhi); - cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end()); + llvm::append_range(cycleInstrs, newOrderI); SSD->fixupRegisterOverlaps(cycleInstrs); } diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 4c733738840a..5325eda9d478 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -417,17 +417,11 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(Register Reg) const { } bool MachineRegisterInfo::hasOneNonDBGUse(Register RegNo) const { - use_nodbg_iterator UI = use_nodbg_begin(RegNo); - if (UI == use_nodbg_end()) - return false; - return ++UI == use_nodbg_end(); + return hasSingleElement(use_nodbg_operands(RegNo)); } bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const { - use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo); - if (UI == use_instr_nodbg_end()) - return false; - return ++UI == use_instr_nodbg_end(); + return hasSingleElement(use_nodbg_instructions(RegNo)); } /// clearKillFlags - Iterate over all the uses of the given register and @@ -532,13 +526,6 @@ bool MachineRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { return true; } -bool -MachineRegisterInfo::isCallerPreservedOrConstPhysReg(MCRegister PhysReg) const { - const TargetRegisterInfo *TRI = getTargetRegisterInfo(); - return isConstantPhysReg(PhysReg) || - TRI->isCallerPreservedPhysReg(PhysReg, *MF); -} - /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. @@ -630,8 +617,7 @@ void MachineRegisterInfo::disableCalleeSavedRegister(MCRegister Reg) { // Remove the register (and its aliases from the list). for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI), - UpdatedCSRs.end()); + llvm::erase_value(UpdatedCSRs, *AI); } const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const { @@ -645,8 +631,7 @@ void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) { if (IsUpdatedCSRsInitialized) UpdatedCSRs.clear(); - for (MCPhysReg Reg : CSRs) - UpdatedCSRs.push_back(Reg); + append_range(UpdatedCSRs, CSRs); // Zero value represents the end of the register list // (no more registers should be pushed). @@ -660,7 +645,7 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const { bool IsRootReserved = true; for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true); Super.isValid(); ++Super) { - unsigned Reg = *Super; + MCRegister Reg = *Super; if (!isReserved(Reg)) { IsRootReserved = false; break; diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp index b12557d6d326..462082df5d05 100644 --- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -50,15 +50,18 @@ MachineSSAUpdater::~MachineSSAUpdater() { } /// Initialize - Reset this object to get ready for a new set of SSA -/// updates. ProtoValue is the value used to name PHI nodes. -void MachineSSAUpdater::Initialize(Register V) { +/// updates. +void MachineSSAUpdater::Initialize(const TargetRegisterClass *RC) { if (!AV) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); - VR = V; - VRC = MRI->getRegClass(VR); + VRC = RC; +} + +void MachineSSAUpdater::Initialize(Register V) { + Initialize(MRI->getRegClass(V)); } /// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index cf75d531deb2..8d51bb26103a 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" @@ -73,6 +74,8 @@ using namespace llvm; #define DEBUG_TYPE "machine-scheduler" +STATISTIC(NumClustered, "Number of load/store pairs clustered"); + namespace llvm { cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, @@ -126,6 +129,15 @@ static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true)); +static cl::opt<bool> + ForceFastCluster("force-fast-cluster", cl::Hidden, + cl::desc("Switch to fast cluster algorithm with the lost " + "of some fusion opportunities"), + cl::init(false)); +static cl::opt<unsigned> + FastClusterThreshold("fast-cluster-threshold", cl::Hidden, + cl::desc("The threshold for fast cluster"), + cl::init(1000)); // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -228,8 +240,13 @@ char PostMachineScheduler::ID = 0; char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; -INITIALIZE_PASS(PostMachineScheduler, "postmisched", - "PostRA Machine Instruction Scheduler", false, false) +INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched", + "PostRA Machine Instruction Scheduler", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(PostMachineScheduler, "postmisched", + "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); @@ -1098,7 +1115,7 @@ updateScheduledPressure(const SUnit *SU, void ScheduleDAGMILive::updatePressureDiffs( ArrayRef<RegisterMaskPair> LiveUses) { for (const RegisterMaskPair &P : LiveUses) { - unsigned Reg = P.RegUnit; + Register Reg = P.RegUnit; /// FIXME: Currently assuming single-use physregs. if (!Register::isVirtualRegister(Reg)) continue; @@ -1298,7 +1315,7 @@ void ScheduleDAGMILive::computeDFSResult() { /// The cyclic path estimation identifies a def-use pair that crosses the back /// edge and considers the depth and height of the nodes. For example, consider /// the following instruction sequence where each instruction has unit latency -/// and defines an epomymous virtual register: +/// and defines an eponymous virtual register: /// /// a->b(a,c)->c(b)->d(c)->exit /// @@ -1323,7 +1340,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { unsigned MaxCyclicLatency = 0; // Visit each live out vreg def to find def/use pairs that cross iterations. for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { - unsigned Reg = P.RegUnit; + Register Reg = P.RegUnit; if (!Register::isVirtualRegister(Reg)) continue; const LiveInterval &LI = LIS->getInterval(Reg); @@ -1527,7 +1544,12 @@ public: void apply(ScheduleDAGInstrs *DAGInstrs) override; protected: - void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG); + void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster, + ScheduleDAGInstrs *DAG); + void collectMemOpRecords(std::vector<SUnit> &SUnits, + SmallVectorImpl<MemOpInfo> &MemOpRecords); + bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG, + DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups); }; class StoreClusterMutation : public BaseMemOpClusterMutation { @@ -1563,109 +1585,179 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII, } // end namespace llvm +// Sorting all the loads/stores first, then for each load/store, checking the +// following load/store one by one, until reach the first non-dependent one and +// call target hook to see if they can cluster. +// If FastCluster is enabled, we assume that, all the loads/stores have been +// preprocessed and now, they didn't have dependencies on each other. void BaseMemOpClusterMutation::clusterNeighboringMemOps( - ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) { - SmallVector<MemOpInfo, 32> MemOpRecords; - for (SUnit *SU : MemOps) { - const MachineInstr &MI = *SU->getInstr(); - SmallVector<const MachineOperand *, 4> BaseOps; - int64_t Offset; - bool OffsetIsScalable; - unsigned Width; - if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, - OffsetIsScalable, Width, TRI)) { - MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width)); - - LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " - << Offset << ", OffsetIsScalable: " << OffsetIsScalable - << ", Width: " << Width << "\n"); - } -#ifndef NDEBUG - for (auto *Op : BaseOps) - assert(Op); -#endif - } - if (MemOpRecords.size() < 2) - return; - - llvm::sort(MemOpRecords); + ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster, + ScheduleDAGInstrs *DAG) { + // Keep track of the current cluster length and bytes for each SUnit. + DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo; // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to // cluster mem ops collected within `MemOpRecords` array. - unsigned ClusterLength = 1; - unsigned CurrentClusterBytes = MemOpRecords[0].Width; for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { // Decision to cluster mem ops is taken based on target dependent logic auto MemOpa = MemOpRecords[Idx]; - auto MemOpb = MemOpRecords[Idx + 1]; - ++ClusterLength; - CurrentClusterBytes += MemOpb.Width; - if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, - CurrentClusterBytes)) { - // Current mem ops pair could not be clustered, reset cluster length, and - // go to next pair - ClusterLength = 1; - CurrentClusterBytes = MemOpb.Width; + + // Seek for the next load/store to do the cluster. + unsigned NextIdx = Idx + 1; + for (; NextIdx < End; ++NextIdx) + // Skip if MemOpb has been clustered already or has dependency with + // MemOpa. + if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) && + (FastCluster || + (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) && + !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU)))) + break; + if (NextIdx == End) continue; + + auto MemOpb = MemOpRecords[NextIdx]; + unsigned ClusterLength = 2; + unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width; + if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) { + ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1; + CurrentClusterBytes = + SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width; } + if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, + CurrentClusterBytes)) + continue; + SUnit *SUa = MemOpa.SU; SUnit *SUb = MemOpb.SU; if (SUa->NodeNum > SUb->NodeNum) std::swap(SUa, SUb); // FIXME: Is this check really required? - if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { - ClusterLength = 1; - CurrentClusterBytes = MemOpb.Width; + if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) continue; - } LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" << SUb->NodeNum << ")\n"); - - // Copy successor edges from SUa to SUb. Interleaving computation - // dependent on SUa can prevent load combining due to register reuse. - // Predecessor edges do not need to be copied from SUb to SUa since - // nearby loads should have effectively the same inputs. - for (const SDep &Succ : SUa->Succs) { - if (Succ.getSUnit() == SUb) - continue; - LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum - << ")\n"); - DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); + ++NumClustered; + + if (IsLoad) { + // Copy successor edges from SUa to SUb. Interleaving computation + // dependent on SUa can prevent load combining due to register reuse. + // Predecessor edges do not need to be copied from SUb to SUa since + // nearby loads should have effectively the same inputs. + for (const SDep &Succ : SUa->Succs) { + if (Succ.getSUnit() == SUb) + continue; + LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum + << ")\n"); + DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); + } + } else { + // Copy predecessor edges from SUb to SUa to avoid the SUnits that + // SUb dependent on scheduled in-between SUb and SUa. Successor edges + // do not need to be copied from SUa to SUb since no one will depend + // on stores. + // Notice that, we don't need to care about the memory dependency as + // we won't try to cluster them if they have any memory dependency. + for (const SDep &Pred : SUb->Preds) { + if (Pred.getSUnit() == SUa) + continue; + LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum + << ")\n"); + DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial)); + } } + SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength, + CurrentClusterBytes}; + LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength << ", Curr cluster bytes: " << CurrentClusterBytes << "\n"); } } -/// Callback from DAG postProcessing to create cluster edges for loads. -void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { - // Map DAG NodeNum to a set of dependent MemOps in store chain. - DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains; - for (SUnit &SU : DAG->SUnits) { +void BaseMemOpClusterMutation::collectMemOpRecords( + std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) { + for (auto &SU : SUnits) { if ((IsLoad && !SU.getInstr()->mayLoad()) || (!IsLoad && !SU.getInstr()->mayStore())) continue; + const MachineInstr &MI = *SU.getInstr(); + SmallVector<const MachineOperand *, 4> BaseOps; + int64_t Offset; + bool OffsetIsScalable; + unsigned Width; + if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, + OffsetIsScalable, Width, TRI)) { + MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width)); + + LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " + << Offset << ", OffsetIsScalable: " << OffsetIsScalable + << ", Width: " << Width << "\n"); + } +#ifndef NDEBUG + for (auto *Op : BaseOps) + assert(Op); +#endif + } +} + +bool BaseMemOpClusterMutation::groupMemOps( + ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG, + DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) { + bool FastCluster = + ForceFastCluster || + MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold; + + for (const auto &MemOp : MemOps) { unsigned ChainPredID = DAG->SUnits.size(); - for (const SDep &Pred : SU.Preds) { - if (Pred.isCtrl() && !Pred.isArtificial()) { - ChainPredID = Pred.getSUnit()->NodeNum; - break; + if (FastCluster) { + for (const SDep &Pred : MemOp.SU->Preds) { + // We only want to cluster the mem ops that have the same ctrl(non-data) + // pred so that they didn't have ctrl dependency for each other. But for + // store instrs, we can still cluster them if the pred is load instr. + if ((Pred.isCtrl() && + (IsLoad || + (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) && + !Pred.isArtificial()) { + ChainPredID = Pred.getSUnit()->NodeNum; + break; + } } - } - // Insert the SU to corresponding store chain. - auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second; - Chain.push_back(&SU); + } else + ChainPredID = 0; + + Groups[ChainPredID].push_back(MemOp); } + return FastCluster; +} - // Iterate over the store chains. - for (auto &SCD : StoreChains) - clusterNeighboringMemOps(SCD.second, DAG); +/// Callback from DAG postProcessing to create cluster edges for loads/stores. +void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { + // Collect all the clusterable loads/stores + SmallVector<MemOpInfo, 32> MemOpRecords; + collectMemOpRecords(DAG->SUnits, MemOpRecords); + + if (MemOpRecords.size() < 2) + return; + + // Put the loads/stores without dependency into the same group with some + // heuristic if the DAG is too complex to avoid compiling time blow up. + // Notice that, some fusion pair could be lost with this. + DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups; + bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups); + + for (auto &Group : Groups) { + // Sorting the loads/stores, so that, we can stop the cluster as early as + // possible. + llvm::sort(Group.second); + + // Trying to cluster all the neighboring loads/stores. + clusterNeighboringMemOps(Group.second, FastCluster, DAG); + } } //===----------------------------------------------------------------------===// @@ -2724,7 +2816,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone) { if (Zone.isTop()) { - if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + // Prefer the candidate with the lesser depth, but only if one of them has + // depth greater than the total latency scheduled so far, otherwise either + // of them could be scheduled now with no stall. + if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) > + Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) return true; @@ -2733,7 +2829,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, TryCand, Cand, GenericSchedulerBase::TopPathReduce)) return true; } else { - if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + // Prefer the candidate with the lesser height, but only if one of them has + // height greater than the total latency scheduled so far, otherwise either + // of them could be scheduled now with no stall. + if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) > + Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) return true; @@ -3356,13 +3456,13 @@ ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { return DAG; } -static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) { +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { return createGenericSchedLive(C); } static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", - createConveringSched); + createConvergingSched); //===----------------------------------------------------------------------===// // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. @@ -3736,7 +3836,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { return true; } - static bool isNodeHidden(const SUnit *Node) { + static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { if (ViewMISchedCutoff == 0) return false; return (Node->Preds.size() > ViewMISchedCutoff diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 5f958bbc31b7..378df1b75e25 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -34,6 +34,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -77,6 +79,18 @@ static cl::opt<unsigned> SplitEdgeProbabilityThreshold( "splitted critical edge"), cl::init(40), cl::Hidden); +static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold( + "machine-sink-load-instrs-threshold", + cl::desc("Do not try to find alias store for a load if there is a in-path " + "block whose instruction number is higher than this threshold."), + cl::init(2000), cl::Hidden); + +static cl::opt<unsigned> SinkLoadBlocksThreshold( + "machine-sink-load-blocks-threshold", + cl::desc("Do not try to find alias store for a load if the block number in " + "the straight line is higher than this threshold."), + cl::init(20), cl::Hidden); + STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); @@ -94,6 +108,7 @@ namespace { MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; + RegisterClassInfo RegClassInfo; // Remember which edges have been considered for breaking. SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8> @@ -127,6 +142,15 @@ namespace { /// current block. DenseSet<DebugVariable> SeenDbgVars; + std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool> + HasStoreCache; + std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, + std::vector<MachineInstr *>> + StoreInstrCache; + + /// Cached BB's register pressure. + std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure; + public: static char ID; // Pass identification @@ -159,6 +183,9 @@ namespace { MachineBasicBlock *From, MachineBasicBlock *To); + bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, + MachineInstr &MI); + /// Postpone the splitting of the given critical /// edge (\p From, \p To). /// @@ -184,12 +211,12 @@ namespace { /// to the copy source. void SalvageUnsunkDebugUsersOfCopy(MachineInstr &, MachineBasicBlock *TargetBlock); - bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, - MachineBasicBlock *DefMBB, - bool &BreakPHIEdge, bool &LocalUse) const; + bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, bool &BreakPHIEdge, + bool &LocalUse) const; MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - bool isProfitableToSinkTo(unsigned Reg, MachineInstr &MI, + bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, AllSuccsCache &AllSuccessors); @@ -200,6 +227,8 @@ namespace { SmallVector<MachineBasicBlock *, 4> & GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const; + + std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -253,12 +282,11 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, /// occur in blocks dominated by the specified block. If any use is in the /// definition block, then return false since it is never legal to move def /// after uses. -bool -MachineSinking::AllUsesDominatedByBlock(unsigned Reg, - MachineBasicBlock *MBB, - MachineBasicBlock *DefMBB, - bool &BreakPHIEdge, - bool &LocalUse) const { +bool MachineSinking::AllUsesDominatedByBlock(Register Reg, + MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, + bool &BreakPHIEdge, + bool &LocalUse) const { assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs"); // Ignore debug uses because debug info doesn't affect the code. @@ -327,6 +355,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr; MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + RegClassInfo.runOnMachineFunction(MF); bool EverMadeChange = false; @@ -347,11 +376,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { << printMBBReference(*Pair.first) << " -- " << printMBBReference(*NewSucc) << " -- " << printMBBReference(*Pair.second) << '\n'); - if (MBFI) { - auto NewSuccFreq = MBFI->getBlockFreq(Pair.first) * - MBPI->getEdgeProbability(Pair.first, NewSucc); - MBFI->setBlockFreq(NewSucc, NewSuccFreq.getFrequency()); - } + if (MBFI) + MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI); + MadeChange = true; ++NumSplit; } else @@ -362,6 +389,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { EverMadeChange = true; } + HasStoreCache.clear(); + StoreInstrCache.clear(); + // Now clear any kill flags for recorded registers. for (auto I : RegsToClearKillFlags) MRI->clearKillFlags(I); @@ -419,6 +449,8 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { SeenDbgUsers.clear(); SeenDbgVars.clear(); + // recalculate the bb register pressure after sinking one BB. + CachedRegisterPressure.clear(); return MadeChange; } @@ -430,7 +462,7 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) { DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); - bool SeenBefore = SeenDbgVars.count(Var) != 0; + bool SeenBefore = SeenDbgVars.contains(Var); MachineOperand &MO = MI.getDebugOperand(0); if (MO.isReg() && MO.getReg().isVirtual()) @@ -561,8 +593,44 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, return true; } +std::vector<unsigned> & +MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) { + // Currently to save compiling time, MBB's register pressure will not change + // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's + // register pressure is changed after sinking any instructions into it. + // FIXME: need a accurate and cheap register pressure estiminate model here. + auto RP = CachedRegisterPressure.find(&MBB); + if (RP != CachedRegisterPressure.end()) + return RP->second; + + RegionPressure Pressure; + RegPressureTracker RPTracker(Pressure); + + // Initialize the register pressure tracker. + RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(), + /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); + + for (MachineBasicBlock::iterator MII = MBB.instr_end(), + MIE = MBB.instr_begin(); + MII != MIE; --MII) { + MachineInstr &MI = *std::prev(MII); + if (MI.isDebugValue() || MI.isDebugLabel()) + continue; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI, false, false); + RPTracker.recedeSkipDebugValues(); + assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); + RPTracker.recede(RegOpers); + } + + RPTracker.closeRegion(); + auto It = CachedRegisterPressure.insert( + std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure)); + return It.first->second; +} + /// isProfitableToSinkTo - Return true if it is profitable to sink MI. -bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI, +bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, AllSuccsCache &AllSuccessors) { @@ -598,9 +666,73 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI, FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - // If SuccToSinkTo is final destination and it is a post dominator of current - // block then it is not profitable to sink MI into SuccToSinkTo block. - return false; + MachineLoop *ML = LI->getLoopFor(MBB); + + // If the instruction is not inside a loop, it is not profitable to sink MI to + // a post dominate block SuccToSinkTo. + if (!ML) + return false; + + auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { + unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; + const int *PS = TRI->getRegClassPressureSets(RC); + // Get register pressure for block SuccToSinkTo. + std::vector<unsigned> BBRegisterPressure = + getBBRegisterPressure(*SuccToSinkTo); + for (; *PS != -1; PS++) + // check if any register pressure set exceeds limit in block SuccToSinkTo + // after sinking. + if (Weight + BBRegisterPressure[*PS] >= + TRI->getRegPressureSetLimit(*MBB->getParent(), *PS)) + return true; + return false; + }; + + // If this instruction is inside a loop and sinking this instruction can make + // more registers live range shorten, it is still prifitable. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + // Ignore non-register operands. + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + // Don't handle physical register. + if (Register::isPhysicalRegister(Reg)) + return false; + + // Users for the defs are all dominated by SuccToSinkTo. + if (MO.isDef()) { + // This def register's live range is shortened after sinking. + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, + LocalUse)) + return false; + } else { + MachineInstr *DefMI = MRI->getVRegDef(Reg); + // DefMI is defined outside of loop. There should be no live range + // impact for this operand. Defination outside of loop means: + // 1: defination is outside of loop. + // 2: defination is in this loop, but it is a PHI in the loop header. + if (LI->getLoopFor(DefMI->getParent()) != ML || + (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + continue; + // The DefMI is defined inside the loop. + // If sinking this operand makes some register pressure set exceed limit, + // it is not profitable. + if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { + LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable."); + return false; + } + } + } + + // If MI is in loop and all its operands are alive across the whole loop or if + // no operand sinking make register pressure set exceed limit, it is + // profitable to sink MI. + return true; } /// Get the sorted sequence of successors for this MachineBasicBlock, possibly @@ -613,8 +745,7 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, if (Succs != AllSuccessors.end()) return Succs->second; - SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->succ_begin(), - MBB->succ_end()); + SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors()); // Handle cases where sinking can happen but where the sink point isn't a // successor. For example: @@ -876,6 +1007,97 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, } } +/// hasStoreBetween - check if there is store betweeen straight line blocks From +/// and To. +bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, + MachineBasicBlock *To, MachineInstr &MI) { + // Make sure From and To are in straight line which means From dominates To + // and To post dominates From. + if (!DT->dominates(From, To) || !PDT->dominates(To, From)) + return true; + + auto BlockPair = std::make_pair(From, To); + + // Does these two blocks pair be queried before and have a definite cached + // result? + if (HasStoreCache.find(BlockPair) != HasStoreCache.end()) + return HasStoreCache[BlockPair]; + + if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end()) + return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) { + return I->mayAlias(AA, MI, false); + }); + + bool SawStore = false; + bool HasAliasedStore = false; + DenseSet<MachineBasicBlock *> HandledBlocks; + DenseSet<MachineBasicBlock *> HandledDomBlocks; + // Go through all reachable blocks from From. + for (MachineBasicBlock *BB : depth_first(From)) { + // We insert the instruction at the start of block To, so no need to worry + // about stores inside To. + // Store in block From should be already considered when just enter function + // SinkInstruction. + if (BB == To || BB == From) + continue; + + // We already handle this BB in previous iteration. + if (HandledBlocks.count(BB)) + continue; + + HandledBlocks.insert(BB); + // To post dominates BB, it must be a path from block From. + if (PDT->dominates(To, BB)) { + if (!HandledDomBlocks.count(BB)) + HandledDomBlocks.insert(BB); + + // If this BB is too big or the block number in straight line between From + // and To is too big, stop searching to save compiling time. + if (BB->size() > SinkLoadInstsPerBlockThreshold || + HandledDomBlocks.size() > SinkLoadBlocksThreshold) { + for (auto *DomBB : HandledDomBlocks) { + if (DomBB != BB && DT->dominates(DomBB, BB)) + HasStoreCache[std::make_pair(DomBB, To)] = true; + else if(DomBB != BB && DT->dominates(BB, DomBB)) + HasStoreCache[std::make_pair(From, DomBB)] = true; + } + HasStoreCache[BlockPair] = true; + return true; + } + + for (MachineInstr &I : *BB) { + // Treat as alias conservatively for a call or an ordered memory + // operation. + if (I.isCall() || I.hasOrderedMemoryRef()) { + for (auto *DomBB : HandledDomBlocks) { + if (DomBB != BB && DT->dominates(DomBB, BB)) + HasStoreCache[std::make_pair(DomBB, To)] = true; + else if(DomBB != BB && DT->dominates(BB, DomBB)) + HasStoreCache[std::make_pair(From, DomBB)] = true; + } + HasStoreCache[BlockPair] = true; + return true; + } + + if (I.mayStore()) { + SawStore = true; + // We still have chance to sink MI if all stores between are not + // aliased to MI. + // Cache all store instructions, so that we don't need to go through + // all From reachable blocks for next load instruction. + if (I.mayAlias(AA, MI, false)) + HasAliasedStore = true; + StoreInstrCache[BlockPair].push_back(&I); + } + } + } + } + // If there is no store at all, cache the result. + if (!SawStore) + HasStoreCache[BlockPair] = false; + return HasAliasedStore; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, @@ -936,8 +1158,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, // We cannot sink a load across a critical edge - there may be stores in // other code paths. bool TryBreak = false; - bool store = true; - if (!MI.isSafeToMove(AA, store)) { + bool Store = + MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true; + if (!MI.isSafeToMove(AA, Store)) { LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); TryBreak = true; } @@ -1268,9 +1491,9 @@ static bool hasRegisterDependency(MachineInstr *MI, return HasRegDependency; } -static SmallSet<unsigned, 4> getRegUnits(unsigned Reg, - const TargetRegisterInfo *TRI) { - SmallSet<unsigned, 4> RegUnits; +static SmallSet<MCRegister, 4> getRegUnits(MCRegister Reg, + const TargetRegisterInfo *TRI) { + SmallSet<MCRegister, 4> RegUnits; for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI) RegUnits.insert(*RI); return RegUnits; @@ -1320,8 +1543,8 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, continue; // Record debug use of each reg unit. - SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI); - for (unsigned Reg : Units) + SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI); + for (MCRegister Reg : Units) SeenDbgInstrs[Reg].push_back(MI); } continue; @@ -1365,18 +1588,17 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // recorded which reg units that DBG_VALUEs read, if this instruction // writes any of those units then the corresponding DBG_VALUEs must sink. SetVector<MachineInstr *> DbgValsToSinkSet; - SmallVector<MachineInstr *, 4> DbgValsToSink; for (auto &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI); - for (unsigned Reg : Units) + SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI); + for (MCRegister Reg : Units) for (auto *MI : SeenDbgInstrs.lookup(Reg)) DbgValsToSinkSet.insert(MI); } - DbgValsToSink.insert(DbgValsToSink.begin(), DbgValsToSinkSet.begin(), - DbgValsToSinkSet.end()); + SmallVector<MachineInstr *, 4> DbgValsToSink(DbgValsToSinkSet.begin(), + DbgValsToSinkSet.end()); // Clear the kill flag if SrcReg is killed between MI and the end of the // block. diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp new file mode 100644 index 000000000000..fb14f0a33209 --- /dev/null +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -0,0 +1,194 @@ +//===- lib/CodeGen/MachineStableHash.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Stable hashing for MachineInstr and MachineOperand. Useful or getting a +// hash across runs, modules, etc. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineStableHash.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/MIRFormatter.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StableHashing.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "machine-stable-hash" + +using namespace llvm; + +STATISTIC(StableHashBailingMachineBasicBlock, + "Number of encountered unsupported MachineOperands that were " + "MachineBasicBlocks while computing stable hashes"); +STATISTIC(StableHashBailingConstantPoolIndex, + "Number of encountered unsupported MachineOperands that were " + "ConstantPoolIndex while computing stable hashes"); +STATISTIC(StableHashBailingTargetIndexNoName, + "Number of encountered unsupported MachineOperands that were " + "TargetIndex with no name"); +STATISTIC(StableHashBailingGlobalAddress, + "Number of encountered unsupported MachineOperands that were " + "GlobalAddress while computing stable hashes"); +STATISTIC(StableHashBailingBlockAddress, + "Number of encountered unsupported MachineOperands that were " + "BlockAddress while computing stable hashes"); +STATISTIC(StableHashBailingMetadataUnsupported, + "Number of encountered unsupported MachineOperands that were " + "Metadata of an unsupported kind while computing stable hashes"); + +stable_hash llvm::stableHashValue(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (Register::isVirtualRegister(MO.getReg())) { + const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo(); + return MRI.getVRegDef(MO.getReg())->getOpcode(); + } + + // Register operands don't have target flags. + return stable_hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), + MO.isDef()); + case MachineOperand::MO_Immediate: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + case MachineOperand::MO_FPImmediate: { + auto Val = MO.isCImm() ? MO.getCImm()->getValue() + : MO.getFPImm()->getValueAPF().bitcastToAPInt(); + auto ValHash = + stable_hash_combine_array(Val.getRawData(), Val.getNumWords()); + return hash_combine(MO.getType(), MO.getTargetFlags(), ValHash); + } + + case MachineOperand::MO_MachineBasicBlock: + StableHashBailingMachineBasicBlock++; + return 0; + case MachineOperand::MO_ConstantPoolIndex: + StableHashBailingConstantPoolIndex++; + return 0; + case MachineOperand::MO_BlockAddress: + StableHashBailingBlockAddress++; + return 0; + case MachineOperand::MO_Metadata: + StableHashBailingMetadataUnsupported++; + return 0; + case MachineOperand::MO_GlobalAddress: + StableHashBailingGlobalAddress++; + return 0; + case MachineOperand::MO_TargetIndex: { + if (const char *Name = MO.getTargetIndexName()) + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_string(Name), + MO.getOffset()); + StableHashBailingTargetIndexNoName++; + return 0; + } + + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_JumpTableIndex: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getIndex()); + + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + stable_hash_combine_string(MO.getSymbolName())); + + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + + case MachineOperand::MO_ShuffleMask: { + std::vector<llvm::stable_hash> ShuffleMaskHashes; + + llvm::transform( + MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes), + [](int S) -> llvm::stable_hash { return llvm::stable_hash(S); }); + + return hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_array(ShuffleMaskHashes.data(), + ShuffleMaskHashes.size())); + } + case MachineOperand::MO_MCSymbol: { + auto SymbolName = MO.getMCSymbol()->getName(); + return hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_string(SymbolName)); + } + case MachineOperand::MO_CFIIndex: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getCFIIndex()); + case MachineOperand::MO_IntrinsicID: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getIntrinsicID()); + case MachineOperand::MO_Predicate: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getPredicate()); + } + llvm_unreachable("Invalid machine operand type"); +} + +/// A stable hash value for machine instructions. +/// Returns 0 if no stable hash could be computed. +/// The hashing and equality testing functions ignore definitions so this is +/// useful for CSE, etc. +stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, + bool HashConstantPoolIndices, + bool HashMemOperands) { + // Build up a buffer of hash code components. + SmallVector<stable_hash, 16> HashComponents; + HashComponents.reserve(MI.getNumOperands() + MI.getNumMemOperands() + 2); + HashComponents.push_back(MI.getOpcode()); + HashComponents.push_back(MI.getFlags()); + for (const MachineOperand &MO : MI.operands()) { + if (!HashVRegs && MO.isReg() && MO.isDef() && + Register::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + + if (MO.isCPI()) { + HashComponents.push_back(stable_hash_combine( + MO.getType(), MO.getTargetFlags(), MO.getIndex())); + continue; + } + + stable_hash StableHash = stableHashValue(MO); + if (!StableHash) + return 0; + HashComponents.push_back(StableHash); + } + + for (const auto *Op : MI.memoperands()) { + if (!HashMemOperands) + break; + HashComponents.push_back(static_cast<unsigned>(Op->getSize())); + HashComponents.push_back(static_cast<unsigned>(Op->getFlags())); + HashComponents.push_back(static_cast<unsigned>(Op->getOffset())); + HashComponents.push_back(static_cast<unsigned>(Op->getOrdering())); + HashComponents.push_back(static_cast<unsigned>(Op->getAddrSpace())); + HashComponents.push_back(static_cast<unsigned>(Op->getSyncScopeID())); + HashComponents.push_back(static_cast<unsigned>(Op->getBaseAlign().value())); + HashComponents.push_back(static_cast<unsigned>(Op->getFailureOrdering())); + } + + return stable_hash_combine_range(HashComponents.begin(), + HashComponents.end()); +} diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index e6b51b7e1e56..8df23b781ffd 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -701,17 +701,15 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl<DataDep> &Deps, SparseSet<LiveRegUnit> &RegUnits, const TargetRegisterInfo *TRI) { - SmallVector<unsigned, 8> Kills; + SmallVector<MCRegister, 8> Kills; SmallVector<unsigned, 8> LiveDefOps; for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(), ME = UseMI->operands_end(); MI != ME; ++MI) { const MachineOperand &MO = *MI; - if (!MO.isReg()) - continue; - Register Reg = MO.getReg(); - if (!Register::isPhysicalRegister(Reg)) + if (!MO.isReg() || !MO.getReg().isPhysical()) continue; + MCRegister Reg = MO.getReg().asMCReg(); // Track live defs and kills for updating RegUnits. if (MO.isDef()) { if (MO.isDead()) @@ -734,13 +732,14 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, // Update RegUnits to reflect live registers after UseMI. // First kills. - for (unsigned Kill : Kills) + for (MCRegister Kill : Kills) for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units) RegUnits.erase(*Units); // Second, live defs. for (unsigned DefOp : LiveDefOps) { - for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); + for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg().asMCReg(), + TRI); Units.isValid(); ++Units) { LiveRegUnit &LRU = RegUnits[*Units]; LRU.MI = UseMI; @@ -766,7 +765,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { assert(TBI.HasValidInstrHeights && "Missing height info"); unsigned MaxLen = 0; for (const LiveInReg &LIR : TBI.LiveIns) { - if (!Register::isVirtualRegister(LIR.Reg)) + if (!LIR.Reg.isVirtual()) continue; const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. @@ -912,7 +911,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, continue; // This is a def of Reg. Remove corresponding entries from RegUnits, and // update MI Height to consider the physreg dependencies. - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); + ++Units) { SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); if (I == RegUnits.end()) continue; @@ -930,15 +930,15 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, } // Now we know the height of MI. Update any regunits read. - for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { - Register Reg = MI.getOperand(ReadOps[i]).getReg(); + for (size_t I = 0, E = ReadOps.size(); I != E; ++I) { + MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg(); for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { LiveRegUnit &LRU = RegUnits[*Units]; // Set the height to the highest reader of the unit. if (LRU.Cycle <= Height && LRU.MI != &MI) { LRU.Cycle = Height; LRU.MI = &MI; - LRU.Op = ReadOps[i]; + LRU.Op = ReadOps[I]; } } } @@ -979,7 +979,7 @@ void MachineTraceMetrics::Ensemble:: addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef<const MachineBasicBlock*> Trace) { assert(!Trace.empty() && "Trace should contain at least one block"); - unsigned Reg = DefMI->getOperand(DefOp).getReg(); + Register Reg = DefMI->getOperand(DefOp).getReg(); assert(Register::isVirtualRegister(Reg)); const MachineBasicBlock *DefMBB = DefMI->getParent(); @@ -1027,7 +1027,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { if (MBB) { TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; for (LiveInReg &LI : TBI.LiveIns) { - if (Register::isVirtualRegister(LI.Reg)) { + if (LI.Reg.isVirtual()) { // For virtual registers, the def latency is included. unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; if (Height < LI.Height) diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index c1a2c4e0bc6e..0f6d9b888f47 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -86,7 +86,7 @@ namespace { struct MachineVerifier { MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {} - unsigned verify(MachineFunction &MF); + unsigned verify(const MachineFunction &MF); Pass *const PASS; const char *Banner; @@ -102,10 +102,10 @@ namespace { bool isFunctionRegBankSelected; bool isFunctionSelected; - using RegVector = SmallVector<unsigned, 16>; + using RegVector = SmallVector<Register, 16>; using RegMaskVector = SmallVector<const uint32_t *, 4>; - using RegSet = DenseSet<unsigned>; - using RegMap = DenseMap<unsigned, const MachineInstr *>; + using RegSet = DenseSet<Register>; + using RegMap = DenseMap<Register, const MachineInstr *>; using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>; const MachineInstr *FirstNonPHI; @@ -120,11 +120,10 @@ namespace { SlotIndex lastIndex; // Add Reg and any sub-registers to RV - void addRegWithSubRegs(RegVector &RV, unsigned Reg) { + void addRegWithSubRegs(RegVector &RV, Register Reg) { RV.push_back(Reg); - if (Register::isPhysicalRegister(Reg)) - for (const MCPhysReg &SubReg : TRI->subregs(Reg)) - RV.push_back(SubReg); + if (Reg.isPhysical()) + append_range(RV, TRI->subregs(Reg.asMCReg())); } struct BBInfo { @@ -132,7 +131,8 @@ namespace { bool reachable = false; // Vregs that must be live in because they are used without being - // defined. Map value is the user. + // defined. Map value is the user. vregsLiveIn doesn't include regs + // that only are used by PHI nodes. RegMap vregsLiveIn; // Regs killed in MBB. They may be defined again, and will then be in both @@ -158,8 +158,8 @@ namespace { // Add register to vregsRequired if it belongs there. Return true if // anything changed. - bool addRequired(unsigned Reg) { - if (!Register::isVirtualRegister(Reg)) + bool addRequired(Register Reg) { + if (!Reg.isVirtual()) return false; if (regsLiveOut.count(Reg)) return false; @@ -169,7 +169,7 @@ namespace { // Same for a full set. bool addRequired(const RegSet &RS) { bool Changed = false; - for (unsigned Reg : RS) + for (Register Reg : RS) Changed |= addRequired(Reg); return Changed; } @@ -183,7 +183,7 @@ namespace { } // Live-out registers are either in regsLiveOut or vregsPassed. - bool isLiveOut(unsigned Reg) const { + bool isLiveOut(Register Reg) const { return regsLiveOut.count(Reg) || vregsPassed.count(Reg); } }; @@ -191,13 +191,13 @@ namespace { // Extra register info per MBB. DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap; - bool isReserved(unsigned Reg) { - return Reg < regsReserved.size() && regsReserved.test(Reg); + bool isReserved(Register Reg) { + return Reg.id() < regsReserved.size() && regsReserved.test(Reg.id()); } - bool isAllocatable(unsigned Reg) const { - return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) && - !regsReserved.test(Reg); + bool isAllocatable(Register Reg) const { + return Reg.id() < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) && + !regsReserved.test(Reg.id()); } // Analysis information if available @@ -225,7 +225,7 @@ namespace { LLT MOVRegType = LLT{}); void report_context(const LiveInterval &LI) const; - void report_context(const LiveRange &LR, unsigned VRegUnit, + void report_context(const LiveRange &LR, Register VRegUnit, LaneBitmask LaneMask) const; void report_context(const LiveRange::Segment &S) const; void report_context(const VNInfo &VNI) const; @@ -233,18 +233,19 @@ namespace { void report_context(MCPhysReg PhysReg) const; void report_context_liverange(const LiveRange &LR) const; void report_context_lanemask(LaneBitmask LaneMask) const; - void report_context_vreg(unsigned VReg) const; - void report_context_vreg_regunit(unsigned VRegOrUnit) const; + void report_context_vreg(Register VReg) const; + void report_context_vreg_regunit(Register VRegOrUnit) const; void verifyInlineAsm(const MachineInstr *MI); void checkLiveness(const MachineOperand *MO, unsigned MONum); void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum, - SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit, + SlotIndex UseIdx, const LiveRange &LR, + Register VRegOrUnit, LaneBitmask LaneMask = LaneBitmask::getNone()); void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum, - SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit, - bool SubRangeCheck = false, + SlotIndex DefIdx, const LiveRange &LR, + Register VRegOrUnit, bool SubRangeCheck = false, LaneBitmask LaneMask = LaneBitmask::getNone()); void markReachable(const MachineBasicBlock *MBB); @@ -255,12 +256,12 @@ namespace { void verifyLiveVariables(); void verifyLiveIntervals(); void verifyLiveInterval(const LiveInterval&); - void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned, + void verifyLiveRangeValue(const LiveRange &, const VNInfo *, Register, LaneBitmask); - void verifyLiveRangeSegment(const LiveRange&, - const LiveRange::const_iterator I, unsigned, + void verifyLiveRangeSegment(const LiveRange &, + const LiveRange::const_iterator I, Register, LaneBitmask); - void verifyLiveRange(const LiveRange&, unsigned, + void verifyLiveRange(const LiveRange &, Register, LaneBitmask LaneMask = LaneBitmask::getNone()); void verifyStackFrame(); @@ -303,6 +304,19 @@ FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) { return new MachineVerifierPass(Banner); } +void llvm::verifyMachineFunction(MachineFunctionAnalysisManager *, + const std::string &Banner, + const MachineFunction &MF) { + // TODO: Use MFAM after porting below analyses. + // LiveVariables *LiveVars; + // LiveIntervals *LiveInts; + // LiveStacks *LiveStks; + // SlotIndexes *Indexes; + unsigned FoundErrors = MachineVerifier(nullptr, Banner.c_str()).verify(MF); + if (FoundErrors) + report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors."); +} + bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors) const { MachineFunction &MF = const_cast<MachineFunction&>(*this); @@ -335,7 +349,7 @@ void MachineVerifier::verifyProperties(const MachineFunction &MF) { report("Function has NoVRegs property but there are VReg operands", &MF); } -unsigned MachineVerifier::verify(MachineFunction &MF) { +unsigned MachineVerifier::verify(const MachineFunction &MF) { foundErrors = 0; this->MF = &MF; @@ -474,7 +488,7 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) { errs() << "- instruction: "; if (Indexes && Indexes->hasIndex(*MI)) errs() << Indexes->getInstructionIndex(*MI) << '\t'; - MI->print(errs(), /*SkipOpers=*/true); + MI->print(errs(), /*IsStandalone=*/true); } void MachineVerifier::report(const char *msg, const MachineOperand *MO, @@ -494,7 +508,7 @@ void MachineVerifier::report_context(const LiveInterval &LI) const { errs() << "- interval: " << LI << '\n'; } -void MachineVerifier::report_context(const LiveRange &LR, unsigned VRegUnit, +void MachineVerifier::report_context(const LiveRange &LR, Register VRegUnit, LaneBitmask LaneMask) const { report_context_liverange(LR); report_context_vreg_regunit(VRegUnit); @@ -518,11 +532,11 @@ void MachineVerifier::report_context(MCPhysReg PReg) const { errs() << "- p. register: " << printReg(PReg, TRI) << '\n'; } -void MachineVerifier::report_context_vreg(unsigned VReg) const { +void MachineVerifier::report_context_vreg(Register VReg) const { errs() << "- v. register: " << printReg(VReg, TRI) << '\n'; } -void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const { +void MachineVerifier::report_context_vreg_regunit(Register VRegOrUnit) const { if (Register::isVirtualRegister(VRegOrUnit)) { report_context_vreg(VRegOrUnit); } else { @@ -776,9 +790,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { } // Ensure non-terminators don't follow terminators. - // Ignore predicated terminators formed by if conversion. - // FIXME: If conversion shouldn't need to violate this rule. - if (MI->isTerminator() && !TII->isPredicated(*MI)) { + if (MI->isTerminator()) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { @@ -992,16 +1004,15 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { } case TargetOpcode::G_PHI: { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); - if (!DstTy.isValid() || - !std::all_of(MI->operands_begin() + 1, MI->operands_end(), - [this, &DstTy](const MachineOperand &MO) { - if (!MO.isReg()) - return true; - LLT Ty = MRI->getType(MO.getReg()); - if (!Ty.isValid() || (Ty != DstTy)) - return false; - return true; - })) + if (!DstTy.isValid() || !all_of(drop_begin(MI->operands()), + [this, &DstTy](const MachineOperand &MO) { + if (!MO.isReg()) + return true; + LLT Ty = MRI->getType(MO.getReg()); + if (!Ty.isValid() || (Ty != DstTy)) + return false; + return true; + })) report("Generic Instruction G_PHI has operands with incompatible/missing " "types", MI); @@ -1343,20 +1354,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } } - switch (IntrID) { - case Intrinsic::memcpy: - if (MI->getNumOperands() != 5) - report("Expected memcpy intrinsic to have 5 operands", MI); - break; - case Intrinsic::memmove: - if (MI->getNumOperands() != 5) - report("Expected memmove intrinsic to have 5 operands", MI); - break; - case Intrinsic::memset: - if (MI->getNumOperands() != 5) - report("Expected memset intrinsic to have 5 operands", MI); - break; - } + break; } case TargetOpcode::G_SEXT_INREG: { @@ -1434,6 +1432,95 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { } break; } + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: { + ArrayRef<MachineMemOperand *> MMOs = MI->memoperands(); + if (MMOs.size() != 2) { + report("memcpy/memmove must have 2 memory operands", MI); + break; + } + + if ((!MMOs[0]->isStore() || MMOs[0]->isLoad()) || + (MMOs[1]->isStore() || !MMOs[1]->isLoad())) { + report("wrong memory operand types", MI); + break; + } + + if (MMOs[0]->getSize() != MMOs[1]->getSize()) + report("inconsistent memory operand sizes", MI); + + LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcPtrTy = MRI->getType(MI->getOperand(1).getReg()); + + if (!DstPtrTy.isPointer() || !SrcPtrTy.isPointer()) { + report("memory instruction operand must be a pointer", MI); + break; + } + + if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace()) + report("inconsistent store address space", MI); + if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace()) + report("inconsistent load address space", MI); + + break; + } + case TargetOpcode::G_MEMSET: { + ArrayRef<MachineMemOperand *> MMOs = MI->memoperands(); + if (MMOs.size() != 1) { + report("memset must have 1 memory operand", MI); + break; + } + + if ((!MMOs[0]->isStore() || MMOs[0]->isLoad())) { + report("memset memory operand must be a store", MI); + break; + } + + LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg()); + if (!DstPtrTy.isPointer()) { + report("memset operand must be a pointer", MI); + break; + } + + if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace()) + report("inconsistent memset address space", MI); + + break; + } + case TargetOpcode::G_VECREDUCE_SEQ_FADD: + case TargetOpcode::G_VECREDUCE_SEQ_FMUL: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg()); + LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg()); + if (!DstTy.isScalar()) + report("Vector reduction requires a scalar destination type", MI); + if (!Src1Ty.isScalar()) + report("Sequential FADD/FMUL vector reduction requires a scalar 1st operand", MI); + if (!Src2Ty.isVector()) + report("Sequential FADD/FMUL vector reduction must have a vector 2nd operand", MI); + break; + } + case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMUL: + case TargetOpcode::G_VECREDUCE_FMAX: + case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_ADD: + case TargetOpcode::G_VECREDUCE_MUL: + case TargetOpcode::G_VECREDUCE_AND: + case TargetOpcode::G_VECREDUCE_OR: + case TargetOpcode::G_VECREDUCE_XOR: + case TargetOpcode::G_VECREDUCE_SMAX: + case TargetOpcode::G_VECREDUCE_SMIN: + case TargetOpcode::G_VECREDUCE_UMAX: + case TargetOpcode::G_VECREDUCE_UMIN: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + if (!DstTy.isScalar()) + report("Vector reduction requires a scalar destination type", MI); + if (!SrcTy.isVector()) + report("Vector reduction requires vector source=", MI); + break; + } default: break; } @@ -1461,6 +1548,16 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (MI->isInlineAsm()) verifyInlineAsm(MI); + // Check that unspillable terminators define a reg and have at most one use. + if (TII->isUnspillableTerminator(MI)) { + if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef()) + report("Unspillable Terminator does not define a reg", MI); + Register Def = MI->getOperand(0).getReg(); + if (Def.isVirtual() && + std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1) + report("Unspillable Terminator expected to have at most one use!", MI); + } + // A fully-formed DBG_VALUE must have a location. Ignore partially formed // DBG_VALUEs: these are convenient to use in tests, but should never get // generated. @@ -1468,6 +1565,11 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (!MI->getDebugLoc()) report("Missing DebugLoc for debug instruction", MI); + // Meta instructions should never be the subject of debug value tracking, + // they don't create a value in the output program at all. + if (MI->isMetaInstruction() && MI->peekDebugInstrNum()) + report("Metadata instruction should not have a value tracking number", MI); + // Check the MachineMemOperands for basic consistency. for (MachineMemOperand *Op : MI->memoperands()) { if (Op->isLoad() && !MI->mayLoad()) @@ -1543,6 +1645,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } auto VerifyStackMapConstant = [&](unsigned Offset) { + if (Offset >= MI->getNumOperands()) { + report("stack map constant to STATEPOINT is out of range!", MI); + return; + } if (!MI->getOperand(Offset - 1).isImm() || MI->getOperand(Offset - 1).getImm() != StackMaps::ConstantOp || !MI->getOperand(Offset).isImm()) @@ -1551,6 +1657,25 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { VerifyStackMapConstant(SO.getCCIdx()); VerifyStackMapConstant(SO.getFlagsIdx()); VerifyStackMapConstant(SO.getNumDeoptArgsIdx()); + VerifyStackMapConstant(SO.getNumGCPtrIdx()); + VerifyStackMapConstant(SO.getNumAllocaIdx()); + VerifyStackMapConstant(SO.getNumGcMapEntriesIdx()); + + // Verify that all explicit statepoint defs are tied to gc operands as + // they are expected to be a relocation of gc operands. + unsigned FirstGCPtrIdx = SO.getFirstGCPtrIdx(); + unsigned LastGCPtrIdx = SO.getNumAllocaIdx() - 2; + for (unsigned Idx = 0; Idx < MI->getNumDefs(); Idx++) { + unsigned UseOpIdx; + if (!MI->isRegTiedToUseOperand(Idx, &UseOpIdx)) { + report("STATEPOINT defs expected to be tied", MI); + break; + } + if (UseOpIdx < FirstGCPtrIdx || UseOpIdx > LastGCPtrIdx) { + report("STATEPOINT def tied to non-gc operand", MI); + break; + } + } // TODO: verify we have properly encoded deopt arguments } break; @@ -1865,8 +1990,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO, - unsigned MONum, SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit, - LaneBitmask LaneMask) { + unsigned MONum, SlotIndex UseIdx, + const LiveRange &LR, + Register VRegOrUnit, + LaneBitmask LaneMask) { LiveQueryResult LRQ = LR.Query(UseIdx); // Check if we have a segment at the use, note however that we only need one // live subregister range, the others may be dead. @@ -1887,8 +2014,11 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO, } void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, - unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit, - bool SubRangeCheck, LaneBitmask LaneMask) { + unsigned MONum, SlotIndex DefIdx, + const LiveRange &LR, + Register VRegOrUnit, + bool SubRangeCheck, + LaneBitmask LaneMask) { if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) { assert(VNI && "NULL valno is not allowed"); if (VNI->def != DefIdx) { @@ -1932,7 +2062,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); - const unsigned Reg = MO->getReg(); + const Register Reg = MO->getReg(); // Both use and def operands can read a register. if (MO->readsReg()) { @@ -1950,8 +2080,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts && !LiveInts->isNotInMIMap(*MI)) { SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI); // Check the cached regunit intervals. - if (Register::isPhysicalRegister(Reg) && !isReserved(Reg)) { - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (Reg.isPhysical() && !isReserved(Reg)) { + for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid(); + ++Units) { if (MRI->isReservedRegUnit(*Units)) continue; if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) @@ -2097,9 +2228,9 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { // Kill any masked registers. while (!regMasks.empty()) { const uint32_t *Mask = regMasks.pop_back_val(); - for (unsigned Reg : regsLive) - if (Register::isPhysicalRegister(Reg) && - MachineOperand::clobbersPhysReg(Mask, Reg)) + for (Register Reg : regsLive) + if (Reg.isPhysical() && + MachineOperand::clobbersPhysReg(Mask, Reg.asMCReg())) regsDead.push_back(Reg); } set_subtract(regsLive, regsDead); regsDead.clear(); @@ -2132,7 +2263,7 @@ struct VRegFilter { // Add elements to the filter itself. \pre Input set \p FromRegSet must have // no duplicates. Both virtual and physical registers are fine. template <typename RegSetT> void add(const RegSetT &FromRegSet) { - SmallVector<unsigned, 0> VRegsBuffer; + SmallVector<Register, 0> VRegsBuffer; filterAndAdd(FromRegSet, VRegsBuffer); } // Filter \p FromRegSet through the filter and append passed elements into \p @@ -2140,13 +2271,13 @@ struct VRegFilter { // \returns true if anything changed. template <typename RegSetT> bool filterAndAdd(const RegSetT &FromRegSet, - SmallVectorImpl<unsigned> &ToVRegs) { + SmallVectorImpl<Register> &ToVRegs) { unsigned SparseUniverse = Sparse.size(); unsigned NewSparseUniverse = SparseUniverse; unsigned NewDenseSize = Dense.size(); size_t Begin = ToVRegs.size(); - for (unsigned Reg : FromRegSet) { - if (!Register::isVirtualRegister(Reg)) + for (Register Reg : FromRegSet) { + if (!Reg.isVirtual()) continue; unsigned Index = Register::virtReg2Index(Reg); if (Index < SparseUniverseMax) { @@ -2170,7 +2301,7 @@ struct VRegFilter { Sparse.resize(NewSparseUniverse); Dense.reserve(NewDenseSize); for (unsigned I = Begin; I < End; ++I) { - unsigned Reg = ToVRegs[I]; + Register Reg = ToVRegs[I]; unsigned Index = Register::virtReg2Index(Reg); if (Index < SparseUniverseMax) Sparse.set(Index); @@ -2203,7 +2334,7 @@ private: // universe). filter_b implicitly contains all physical registers at all times. class FilteringVRegSet { VRegFilter Filter; - SmallVector<unsigned, 0> VRegs; + SmallVector<Register, 0> VRegs; public: // Set-up the filter_b. \pre Input register set \p RS must have no duplicates. @@ -2229,63 +2360,28 @@ public: // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. void MachineVerifier::calcRegsPassed() { - // This is a forward dataflow, doing it in RPO. A standard map serves as a - // priority (sorting by RPO number) queue, deduplicating worklist, and an RPO - // number to MBB mapping all at once. - std::map<unsigned, const MachineBasicBlock *> RPOWorklist; - DenseMap<const MachineBasicBlock *, unsigned> RPONumbers; - if (MF->empty()) { + if (MF->empty()) // ReversePostOrderTraversal doesn't handle empty functions. return; - } - std::vector<FilteringVRegSet> VRegsPassedSets(MF->size()); - for (const MachineBasicBlock *MBB : - ReversePostOrderTraversal<const MachineFunction *>(MF)) { - // Careful with the evaluation order, fetch next number before allocating. - unsigned Number = RPONumbers.size(); - RPONumbers[MBB] = Number; - // Set-up the transfer functions for all blocks. - const BBInfo &MInfo = MBBInfoMap[MBB]; - VRegsPassedSets[Number].addToFilter(MInfo.regsKilled); - VRegsPassedSets[Number].addToFilter(MInfo.regsLiveOut); - } - // First push live-out regs to successors' vregsPassed. Remember the MBBs that - // have any vregsPassed. - for (const MachineBasicBlock &MBB : *MF) { - const BBInfo &MInfo = MBBInfoMap[&MBB]; - if (!MInfo.reachable) - continue; - for (const MachineBasicBlock *Succ : MBB.successors()) { - unsigned SuccNumber = RPONumbers[Succ]; - FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber]; - if (SuccSet.add(MInfo.regsLiveOut)) - RPOWorklist.emplace(SuccNumber, Succ); - } - } - // Iteratively push vregsPassed to successors. - while (!RPOWorklist.empty()) { - auto Next = RPOWorklist.begin(); - const MachineBasicBlock *MBB = Next->second; - RPOWorklist.erase(Next); - FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[MBB]]; - for (const MachineBasicBlock *Succ : MBB->successors()) { - if (Succ == MBB) + for (const MachineBasicBlock *MB : + ReversePostOrderTraversal<const MachineFunction *>(MF)) { + FilteringVRegSet VRegs; + BBInfo &Info = MBBInfoMap[MB]; + assert(Info.reachable); + + VRegs.addToFilter(Info.regsKilled); + VRegs.addToFilter(Info.regsLiveOut); + for (const MachineBasicBlock *Pred : MB->predecessors()) { + const BBInfo &PredInfo = MBBInfoMap[Pred]; + if (!PredInfo.reachable) continue; - unsigned SuccNumber = RPONumbers[Succ]; - FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber]; - if (SuccSet.add(MSet)) - RPOWorklist.emplace(SuccNumber, Succ); + + VRegs.add(PredInfo.regsLiveOut); + VRegs.add(PredInfo.vregsPassed); } - } - // Copy the results back to BBInfos. - for (const MachineBasicBlock &MBB : *MF) { - BBInfo &MInfo = MBBInfoMap[&MBB]; - if (!MInfo.reachable) - continue; - const FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[&MBB]]; - MInfo.vregsPassed.reserve(MSet.size()); - MInfo.vregsPassed.insert(MSet.begin(), MSet.end()); + Info.vregsPassed.reserve(VRegs.size()); + Info.vregsPassed.insert(VRegs.begin(), VRegs.end()); } } @@ -2302,6 +2398,23 @@ void MachineVerifier::calcRegsRequired() { if (PInfo.addRequired(MInfo.vregsLiveIn)) todo.insert(Pred); } + + // Handle the PHI node. + for (const MachineInstr &MI : MBB.phis()) { + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { + // Skip those Operands which are undef regs or not regs. + if (!MI.getOperand(i).isReg() || !MI.getOperand(i).readsReg()) + continue; + + // Get register and predecessor for one PHI edge. + Register Reg = MI.getOperand(i).getReg(); + const MachineBasicBlock *Pred = MI.getOperand(i + 1).getMBB(); + + BBInfo &PInfo = MBBInfoMap[Pred]; + if (PInfo.addRequired(Reg)) + todo.insert(Pred); + } + } } // Iteratively push vregsRequired to predecessors. This will converge to the @@ -2399,7 +2512,7 @@ void MachineVerifier::visitMachineFunctionAfter() { // Check for killed virtual registers that should be live out. for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; - for (unsigned VReg : MInfo.vregsRequired) + for (Register VReg : MInfo.vregsRequired) if (MInfo.regsKilled.count(VReg)) { report("Virtual register killed in block, but needed live out.", &MBB); errs() << "Virtual register " << printReg(VReg) @@ -2409,7 +2522,7 @@ void MachineVerifier::visitMachineFunctionAfter() { if (!MF->empty()) { BBInfo &MInfo = MBBInfoMap[&MF->front()]; - for (unsigned VReg : MInfo.vregsRequired) { + for (Register VReg : MInfo.vregsRequired) { report("Virtual register defs don't dominate all uses.", MF); report_context_vreg(VReg); } @@ -2449,12 +2562,27 @@ void MachineVerifier::visitMachineFunctionAfter() { for (auto CSInfo : MF->getCallSitesInfo()) if (!CSInfo.first->isCall()) report("Call site info referencing instruction that is not call", MF); + + // If there's debug-info, check that we don't have any duplicate value + // tracking numbers. + if (MF->getFunction().getSubprogram()) { + DenseSet<unsigned> SeenNumbers; + for (auto &MBB : *MF) { + for (auto &MI : MBB) { + if (auto Num = MI.peekDebugInstrNum()) { + auto Result = SeenNumbers.insert((unsigned)Num); + if (!Result.second) + report("Instruction has a duplicated value tracking number", &MI); + } + } + } + } } void MachineVerifier::verifyLiveVariables() { assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); - for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { + Register Reg = Register::index2VirtReg(I); LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; @@ -2479,8 +2607,8 @@ void MachineVerifier::verifyLiveVariables() { void MachineVerifier::verifyLiveIntervals() { assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); - for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { + Register Reg = Register::index2VirtReg(I); // Spilling and splitting may leave unused registers around. Skip them. if (MRI->reg_nodbg_empty(Reg)) @@ -2493,7 +2621,7 @@ void MachineVerifier::verifyLiveIntervals() { } const LiveInterval &LI = LiveInts->getInterval(Reg); - assert(Reg == LI.reg && "Invalid reg to interval mapping"); + assert(Reg == LI.reg() && "Invalid reg to interval mapping"); verifyLiveInterval(LI); } @@ -2504,7 +2632,7 @@ void MachineVerifier::verifyLiveIntervals() { } void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, - const VNInfo *VNI, unsigned Reg, + const VNInfo *VNI, Register Reg, LaneBitmask LaneMask) { if (VNI->isUnused()) return; @@ -2597,8 +2725,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const LiveRange::const_iterator I, - unsigned Reg, LaneBitmask LaneMask) -{ + Register Reg, + LaneBitmask LaneMask) { const LiveRange::Segment &S = *I; const VNInfo *VNI = S.valno; assert(VNI && "Live segment has no valno"); @@ -2809,7 +2937,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, } } -void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, +void MachineVerifier::verifyLiveRange(const LiveRange &LR, Register Reg, LaneBitmask LaneMask) { for (const VNInfo *VNI : LR.valnos) verifyLiveRangeValue(LR, VNI, Reg, LaneMask); @@ -2819,7 +2947,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, } void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - unsigned Reg = LI.reg; + Register Reg = LI.reg(); assert(Register::isVirtualRegister(Reg)); verifyLiveRange(LI, Reg); @@ -2836,10 +2964,10 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { } if (SR.empty()) { report("Subrange must not be empty", MF); - report_context(SR, LI.reg, SR.LaneMask); + report_context(SR, LI.reg(), SR.LaneMask); } Mask |= SR.LaneMask; - verifyLiveRange(SR, LI.reg, SR.LaneMask); + verifyLiveRange(SR, LI.reg(), SR.LaneMask); if (!LI.covers(SR)) { report("A Subrange is not covered by the main range", MF); report_context(LI); diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index d85b1b7988ce..095da09ea82b 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -11,9 +11,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/CodeGen/MultiHazardRecognizer.cpp b/llvm/lib/CodeGen/MultiHazardRecognizer.cpp new file mode 100644 index 000000000000..e4cd92ac4868 --- /dev/null +++ b/llvm/lib/CodeGen/MultiHazardRecognizer.cpp @@ -0,0 +1,92 @@ +//===- MultiHazardRecognizer.cpp - Scheduler Support ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MultiHazardRecognizer class, which is a wrapper +// for a set of ScheduleHazardRecognizer instances +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MultiHazardRecognizer.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <functional> +#include <numeric> + +using namespace llvm; + +void MultiHazardRecognizer::AddHazardRecognizer( + std::unique_ptr<ScheduleHazardRecognizer> &&R) { + MaxLookAhead = std::max(MaxLookAhead, R->getMaxLookAhead()); + Recognizers.push_back(std::move(R)); +} + +bool MultiHazardRecognizer::atIssueLimit() const { + return llvm::any_of(Recognizers, + std::mem_fn(&ScheduleHazardRecognizer::atIssueLimit)); +} + +ScheduleHazardRecognizer::HazardType +MultiHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + for (auto &R : Recognizers) { + auto res = R->getHazardType(SU, Stalls); + if (res != NoHazard) + return res; + } + return NoHazard; +} + +void MultiHazardRecognizer::Reset() { + for (auto &R : Recognizers) + R->Reset(); +} + +void MultiHazardRecognizer::EmitInstruction(SUnit *SU) { + for (auto &R : Recognizers) + R->EmitInstruction(SU); +} + +void MultiHazardRecognizer::EmitInstruction(MachineInstr *MI) { + for (auto &R : Recognizers) + R->EmitInstruction(MI); +} + +unsigned MultiHazardRecognizer::PreEmitNoops(SUnit *SU) { + auto MN = [=](unsigned a, std::unique_ptr<ScheduleHazardRecognizer> &R) { + return std::max(a, R->PreEmitNoops(SU)); + }; + return std::accumulate(Recognizers.begin(), Recognizers.end(), 0u, MN); +} + +unsigned MultiHazardRecognizer::PreEmitNoops(MachineInstr *MI) { + auto MN = [=](unsigned a, std::unique_ptr<ScheduleHazardRecognizer> &R) { + return std::max(a, R->PreEmitNoops(MI)); + }; + return std::accumulate(Recognizers.begin(), Recognizers.end(), 0u, MN); +} + +bool MultiHazardRecognizer::ShouldPreferAnother(SUnit *SU) { + auto SPA = [=](std::unique_ptr<ScheduleHazardRecognizer> &R) { + return R->ShouldPreferAnother(SU); + }; + return llvm::any_of(Recognizers, SPA); +} + +void MultiHazardRecognizer::AdvanceCycle() { + for (auto &R : Recognizers) + R->AdvanceCycle(); +} + +void MultiHazardRecognizer::RecedeCycle() { + for (auto &R : Recognizers) + R->RecedeCycle(); +} + +void MultiHazardRecognizer::EmitNoop() { + for (auto &R : Recognizers) + R->EmitNoop(); +} diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index 311b87fa9e3b..8148b64d8443 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -101,10 +101,10 @@ namespace { // These functions are temporary abstractions around LiveVariables and // LiveIntervals, so they can go away when LiveVariables does. - bool isLiveIn(unsigned Reg, const MachineBasicBlock *MBB); - bool isLiveOutPastPHIs(unsigned Reg, const MachineBasicBlock *MBB); + bool isLiveIn(Register Reg, const MachineBasicBlock *MBB); + bool isLiveOutPastPHIs(Register Reg, const MachineBasicBlock *MBB); - using BBVRegPair = std::pair<unsigned, unsigned>; + using BBVRegPair = std::pair<unsigned, Register>; using VRegPHIUse = DenseMap<BBVRegPair, unsigned>; VRegPHIUse VRegPHIUseCount; @@ -324,21 +324,43 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Increment use count of the newly created virtual register. LV->setPHIJoin(IncomingReg); - // When we are reusing the incoming register, it may already have been - // killed in this block. The old kill will also have been inserted at - // AfterPHIsIt, so it appears before the current PHICopy. - if (reusedIncoming) - if (MachineInstr *OldKill = VI.findKill(&MBB)) { - LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill); - LV->removeVirtualRegisterKilled(IncomingReg, *OldKill); - LLVM_DEBUG(MBB.dump()); + MachineInstr *OldKill = nullptr; + bool IsPHICopyAfterOldKill = false; + + if (reusedIncoming && (OldKill = VI.findKill(&MBB))) { + // Calculate whether the PHICopy is after the OldKill. + // In general, the PHICopy is inserted as the first non-phi instruction + // by default, so it's before the OldKill. But some Target hooks for + // createPHIDestinationCopy() may modify the default insert position of + // PHICopy. + for (auto I = MBB.SkipPHIsAndLabels(MBB.begin()), E = MBB.end(); + I != E; ++I) { + if (I == PHICopy) + break; + + if (I == OldKill) { + IsPHICopyAfterOldKill = true; + break; + } } + } - // Add information to LiveVariables to know that the incoming value is - // killed. Note that because the value is defined in several places (once - // each for each incoming block), the "def" block and instruction fields - // for the VarInfo is not filled in. - LV->addVirtualRegisterKilled(IncomingReg, *PHICopy); + // When we are reusing the incoming register and it has been marked killed + // by OldKill, if the PHICopy is after the OldKill, we should remove the + // killed flag from OldKill. + if (IsPHICopyAfterOldKill) { + LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill); + LV->removeVirtualRegisterKilled(IncomingReg, *OldKill); + LLVM_DEBUG(MBB.dump()); + } + + // Add information to LiveVariables to know that the first used incoming + // value or the resued incoming value whose PHICopy is after the OldKIll + // is killed. Note that because the value is defined in several places + // (once each for each incoming block), the "def" block and instruction + // fields for the VarInfo is not filled in. + if (!OldKill || IsPHICopyAfterOldKill) + LV->addVirtualRegisterKilled(IncomingReg, *PHICopy); } // Since we are going to be deleting the PHI node, if it is the last use of @@ -372,8 +394,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } LiveInterval &DestLI = LIS->getInterval(DestReg); - assert(DestLI.begin() != DestLI.end() && - "PHIs should have nonempty LiveIntervals."); + assert(!DestLI.empty() && "PHIs should have nonempty LiveIntervals."); if (DestLI.endIndex().isDead()) { // A dead PHI's live range begins and ends at the start of the MBB, but // the lowered copy, which will still be dead, needs to begin and end at @@ -420,6 +441,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (!MBBsInsertedInto.insert(&opBlock).second) continue; // If the copy has already been emitted, we're done. + MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg); + if (SrcRegDef && TII->isUnspillableTerminator(SrcRegDef)) { + assert(SrcRegDef->getOperand(0).isReg() && + SrcRegDef->getOperand(0).isDef() && + "Expected operand 0 to be a reg def!"); + // Now that the PHI's use has been removed (as the instruction was + // removed) there should be no other uses of the SrcReg. + assert(MRI->use_empty(SrcReg) && + "Expected a single use from UnspillableTerminator"); + SrcRegDef->getOperand(0).setReg(IncomingReg); + continue; + } + // Find a safe location to insert the copy, this may be the first terminator // in the block (or end()). MachineBasicBlock::iterator InsertPos = @@ -670,7 +704,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, return Changed; } -bool PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock *MBB) { +bool PHIElimination::isLiveIn(Register Reg, const MachineBasicBlock *MBB) { assert((LV || LIS) && "isLiveIn() requires either LiveVariables or LiveIntervals"); if (LIS) @@ -679,7 +713,7 @@ bool PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock *MBB) { return LV->isLiveIn(Reg, *MBB); } -bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, +bool PHIElimination::isLiveOutPastPHIs(Register Reg, const MachineBasicBlock *MBB) { assert((LV || LIS) && "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals"); diff --git a/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/llvm/lib/CodeGen/PHIEliminationUtils.cpp index bae96eb84521..016335f420d3 100644 --- a/llvm/lib/CodeGen/PHIEliminationUtils.cpp +++ b/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -8,9 +8,9 @@ #include "PHIEliminationUtils.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" + using namespace llvm; // findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg @@ -27,31 +27,35 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, // Usually, we just want to insert the copy before the first terminator // instruction. However, for the edge going to a landing pad, we must insert // the copy before the call/invoke instruction. Similarly for an INLINEASM_BR - // going to an indirect target. - if (!SuccMBB->isEHPad() && !SuccMBB->isInlineAsmBrIndirectTarget()) + // going to an indirect target. This is similar to SplitKit.cpp's + // computeLastInsertPoint, and similarly assumes that there cannot be multiple + // instructions that are Calls with EHPad successors or INLINEASM_BR in a + // block. + bool EHPadSuccessor = SuccMBB->isEHPad(); + if (!EHPadSuccessor && !SuccMBB->isInlineAsmBrIndirectTarget()) return MBB->getFirstTerminator(); - // Discover any defs/uses in this basic block. - SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; + // Discover any defs in this basic block. + SmallPtrSet<MachineInstr *, 8> DefsInMBB; MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); - for (MachineInstr &RI : MRI.reg_instructions(SrcReg)) { + for (MachineInstr &RI : MRI.def_instructions(SrcReg)) if (RI.getParent() == MBB) - DefUsesInMBB.insert(&RI); - } + DefsInMBB.insert(&RI); - MachineBasicBlock::iterator InsertPoint; - if (DefUsesInMBB.empty()) { - // No defs. Insert the copy at the start of the basic block. - InsertPoint = MBB->begin(); - } else if (DefUsesInMBB.size() == 1) { - // Insert the copy immediately after the def/use. - InsertPoint = *DefUsesInMBB.begin(); - ++InsertPoint; - } else { - // Insert the copy immediately after the last def/use. - InsertPoint = MBB->end(); - while (!DefUsesInMBB.count(&*--InsertPoint)) {} - ++InsertPoint; + MachineBasicBlock::iterator InsertPoint = MBB->begin(); + // Insert the copy at the _latest_ point of: + // 1. Immediately AFTER the last def + // 2. Immediately BEFORE a call/inlineasm_br. + for (auto I = MBB->rbegin(), E = MBB->rend(); I != E; ++I) { + if (DefsInMBB.contains(&*I)) { + InsertPoint = std::next(I.getReverse()); + break; + } + if ((EHPadSuccessor && I->isCall()) || + I->getOpcode() == TargetOpcode::INLINEASM_BR) { + InsertPoint = I.getReverse(); + break; + } } // Make sure the copy goes after any phi nodes but before diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp index c19ed1f8f71d..849b667254bd 100644 --- a/llvm/lib/CodeGen/ParallelCG.cpp +++ b/llvm/lib/CodeGen/ParallelCG.cpp @@ -28,6 +28,8 @@ static void codegen(Module *M, llvm::raw_pwrite_stream &OS, function_ref<std::unique_ptr<TargetMachine>()> TMFactory, CodeGenFileType FileType) { std::unique_ptr<TargetMachine> TM = TMFactory(); + assert(TM && "Failed to create target machine!"); + legacy::PassManager CodeGenPasses; if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType)) report_fatal_error("Failed to setup codegen"); diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 4a66863ea803..34ac396c0471 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -178,6 +178,11 @@ namespace { } } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA); + } + /// Track Def -> Use info used for rewriting copies. using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>; @@ -196,41 +201,39 @@ namespace { SmallPtrSetImpl<MachineInstr *> &LocalMIs); bool optimizeRecurrence(MachineInstr &PHI); bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap); - bool isMoveImmediate(MachineInstr &MI, - SmallSet<unsigned, 4> &ImmDefRegs, - DenseMap<unsigned, MachineInstr*> &ImmDefMIs); - bool foldImmediate(MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs, - DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, + DenseMap<Register, MachineInstr *> &ImmDefMIs); + bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, + DenseMap<Register, MachineInstr *> &ImmDefMIs); /// Finds recurrence cycles, but only ones that formulated around /// a def operand and a use operand that are tied. If there is a use /// operand commutable with the tied use operand, find recurrence cycle /// along that operand as well. - bool findTargetRecurrence(unsigned Reg, - const SmallSet<unsigned, 2> &TargetReg, + bool findTargetRecurrence(Register Reg, + const SmallSet<Register, 2> &TargetReg, RecurrenceCycle &RC); /// If copy instruction \p MI is a virtual register copy, track it in - /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was - /// previously seen as a copy, replace the uses of this copy with the - /// previously seen copy's destination register. + /// the set \p CopyMIs. If this virtual register was previously seen as a + /// copy, replace the uses of this copy with the previously seen copy's + /// destination register. bool foldRedundantCopy(MachineInstr &MI, - SmallSet<unsigned, 4> &CopySrcRegs, - DenseMap<unsigned, MachineInstr *> &CopyMIs); + DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs); /// Is the register \p Reg a non-allocatable physical register? - bool isNAPhysCopy(unsigned Reg); + bool isNAPhysCopy(Register Reg); /// If copy instruction \p MI is a non-allocatable virtual<->physical /// register copy, track it in the \p NAPhysToVirtMIs map. If this /// non-allocatable physical register was previously copied to a virtual /// registered and hasn't been clobbered, the virt->phys copy can be /// deleted. - bool foldRedundantNAPhysCopy(MachineInstr &MI, - DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs); + bool foldRedundantNAPhysCopy( + MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs); bool isLoadFoldable(MachineInstr &MI, - SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); + SmallSet<Register, 16> &FoldAsLoadDefCandidates); /// Check whether \p MI is understood by the register coalescer /// but may require some rewriting. @@ -291,7 +294,7 @@ namespace { public: ValueTrackerResult() = default; - ValueTrackerResult(unsigned Reg, unsigned SubReg) { + ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); } @@ -305,11 +308,11 @@ namespace { Inst = nullptr; } - void addSource(unsigned SrcReg, unsigned SrcSubReg) { + void addSource(Register SrcReg, unsigned SrcSubReg) { RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg)); } - void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) { + void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) { assert(Idx < getNumSources() && "Reg pair source out of index"); RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg); } @@ -320,7 +323,7 @@ namespace { return RegSrcs[Idx]; } - unsigned getSrcReg(int Idx) const { + Register getSrcReg(int Idx) const { assert(Idx < getNumSources() && "Reg source out of index"); return RegSrcs[Idx].Reg; } @@ -330,7 +333,7 @@ namespace { return RegSrcs[Idx].SubReg; } - bool operator==(const ValueTrackerResult &Other) { + bool operator==(const ValueTrackerResult &Other) const { if (Other.getInst() != getInst()) return false; @@ -373,7 +376,7 @@ namespace { unsigned DefSubReg; /// The register where the value can be found. - unsigned Reg; + Register Reg; /// MachineRegisterInfo used to perform tracking. const MachineRegisterInfo &MRI; @@ -415,11 +418,11 @@ namespace { /// Indeed, when \p Reg is a physical register that constructor does not /// know which definition of \p Reg it should track. /// Use the next constructor to track a physical register. - ValueTracker(unsigned Reg, unsigned DefSubReg, + ValueTracker(Register Reg, unsigned DefSubReg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII = nullptr) : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) { - if (!Register::isPhysicalRegister(Reg)) { + if (!Reg.isPhysical()) { Def = MRI.getVRegDef(Reg); DefIdx = MRI.def_begin(Reg).getOperandNo(); } @@ -824,7 +827,7 @@ public: /// Rewrite the current source with \p NewReg and \p NewSubReg if possible. /// \return True if the rewriting was possible, false otherwise. - virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) = 0; + virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0; }; /// Rewriter for COPY instructions. @@ -852,7 +855,7 @@ public: return true; } - bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { if (CurrentSrcIdx != 1) return false; MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); @@ -897,7 +900,7 @@ public: return true; } - bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { return false; } }; @@ -941,7 +944,7 @@ public: return true; } - bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { if (CurrentSrcIdx != 2) return false; // We are rewriting the inserted reg. @@ -988,7 +991,7 @@ public: return true; } - bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { // The only source we can rewrite is the input register. if (CurrentSrcIdx != 1) return false; @@ -1066,7 +1069,7 @@ public: return MODef.getSubReg() == 0; } - bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { // We cannot rewrite out of bound operands. // Moreover, rewritable sources are at odd positions. if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) @@ -1312,7 +1315,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( /// We only fold loads to virtual registers and the virtual register defined /// has a single user. bool PeepholeOptimizer::isLoadFoldable( - MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { + MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) { if (!MI.canFoldAsLoad() || !MI.mayLoad()) return false; const MCInstrDesc &MCID = MI.getDesc(); @@ -1323,7 +1326,7 @@ bool PeepholeOptimizer::isLoadFoldable( // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting // loads. It should be checked when processing uses of the load, since // uses can be removed during peephole. - if (!MI.getOperand(0).getSubReg() && Register::isVirtualRegister(Reg) && + if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() && MRI->hasOneNonDBGUser(Reg)) { FoldAsLoadDefCandidates.insert(Reg); return true; @@ -1332,15 +1335,15 @@ bool PeepholeOptimizer::isLoadFoldable( } bool PeepholeOptimizer::isMoveImmediate( - MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs, - DenseMap<unsigned, MachineInstr *> &ImmDefMIs) { + MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, + DenseMap<Register, MachineInstr *> &ImmDefMIs) { const MCInstrDesc &MCID = MI.getDesc(); if (!MI.isMoveImmediate()) return false; if (MCID.getNumDefs() != 1) return false; Register Reg = MI.getOperand(0).getReg(); - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { ImmDefMIs.insert(std::make_pair(Reg, &MI)); ImmDefRegs.insert(Reg); return true; @@ -1352,22 +1355,19 @@ bool PeepholeOptimizer::isMoveImmediate( /// Try folding register operands that are defined by move immediate /// instructions, i.e. a trivial constant folding optimization, if /// and only if the def and use are in the same BB. -bool PeepholeOptimizer::foldImmediate(MachineInstr &MI, - SmallSet<unsigned, 4> &ImmDefRegs, - DenseMap<unsigned, MachineInstr *> &ImmDefMIs) { +bool PeepholeOptimizer::foldImmediate( + MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, + DenseMap<Register, MachineInstr *> &ImmDefMIs) { for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isDef()) continue; - // Ignore dead implicit defs. - if (MO.isImplicit() && MO.isDead()) - continue; Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) + if (!Reg.isVirtual()) continue; if (ImmDefRegs.count(Reg) == 0) continue; - DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); + DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg); assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) { ++NumImmFold; @@ -1391,33 +1391,30 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr &MI, // %2 = COPY %0:sub1 // // Should replace %2 uses with %1:sub1 -bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, - SmallSet<unsigned, 4> &CopySrcRegs, - DenseMap<unsigned, MachineInstr *> &CopyMIs) { +bool PeepholeOptimizer::foldRedundantCopy( + MachineInstr &MI, DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs) { assert(MI.isCopy() && "expected a COPY machine instruction"); Register SrcReg = MI.getOperand(1).getReg(); - if (!Register::isVirtualRegister(SrcReg)) + unsigned SrcSubReg = MI.getOperand(1).getSubReg(); + if (!SrcReg.isVirtual()) return false; Register DstReg = MI.getOperand(0).getReg(); - if (!Register::isVirtualRegister(DstReg)) + if (!DstReg.isVirtual()) return false; - if (CopySrcRegs.insert(SrcReg).second) { + RegSubRegPair SrcPair(SrcReg, SrcSubReg); + + if (CopyMIs.insert(std::make_pair(SrcPair, &MI)).second) { // First copy of this reg seen. - CopyMIs.insert(std::make_pair(SrcReg, &MI)); return false; } - MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second; + MachineInstr *PrevCopy = CopyMIs.find(SrcPair)->second; - unsigned SrcSubReg = MI.getOperand(1).getSubReg(); - unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg(); - - // Can't replace different subregister extracts. - if (SrcSubReg != PrevSrcSubReg) - return false; + assert(SrcSubReg == PrevCopy->getOperand(1).getSubReg() && + "Unexpected mismatching subreg!"); Register PrevDstReg = PrevCopy->getOperand(0).getReg(); @@ -1435,12 +1432,12 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, return true; } -bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { - return Register::isPhysicalRegister(Reg) && !MRI->isAllocatable(Reg); +bool PeepholeOptimizer::isNAPhysCopy(Register Reg) { + return Reg.isPhysical() && !MRI->isAllocatable(Reg); } bool PeepholeOptimizer::foldRedundantNAPhysCopy( - MachineInstr &MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) { + MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) { assert(MI.isCopy() && "expected a COPY machine instruction"); if (DisableNAPhysCopyOpt) @@ -1449,17 +1446,17 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) { - // %vreg = COPY %physreg + // %vreg = COPY $physreg // Avoid using a datastructure which can track multiple live non-allocatable // phys->virt copies since LLVM doesn't seem to do this. NAPhysToVirtMIs.insert({SrcReg, &MI}); return false; } - if (!(Register::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) + if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg))) return false; - // %physreg = COPY %vreg + // $physreg = COPY %vreg auto PrevCopy = NAPhysToVirtMIs.find(DstReg); if (PrevCopy == NAPhysToVirtMIs.end()) { // We can't remove the copy: there was an intervening clobber of the @@ -1489,13 +1486,11 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( /// \bried Returns true if \p MO is a virtual register operand. static bool isVirtualRegisterOperand(MachineOperand &MO) { - if (!MO.isReg()) - return false; - return Register::isVirtualRegister(MO.getReg()); + return MO.isReg() && MO.getReg().isVirtual(); } bool PeepholeOptimizer::findTargetRecurrence( - unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs, + Register Reg, const SmallSet<Register, 2> &TargetRegs, RecurrenceCycle &RC) { // Recurrence found if Reg is in TargetRegs. if (TargetRegs.count(Reg)) @@ -1566,7 +1561,7 @@ bool PeepholeOptimizer::findTargetRecurrence( /// %1 of ADD instruction, the redundant move instruction can be /// avoided. bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { - SmallSet<unsigned, 2> TargetRegs; + SmallSet<Register, 2> TargetRegs; for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) { MachineOperand &MO = PHI.getOperand(Idx); assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction"); @@ -1622,20 +1617,20 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // during the scan, if a MI is not in the set, it is assumed to be located // after. Newly created MIs have to be inserted in the set as well. SmallPtrSet<MachineInstr*, 16> LocalMIs; - SmallSet<unsigned, 4> ImmDefRegs; - DenseMap<unsigned, MachineInstr*> ImmDefMIs; - SmallSet<unsigned, 16> FoldAsLoadDefCandidates; + SmallSet<Register, 4> ImmDefRegs; + DenseMap<Register, MachineInstr *> ImmDefMIs; + SmallSet<Register, 16> FoldAsLoadDefCandidates; // Track when a non-allocatable physical register is copied to a virtual // register so that useless moves can be removed. // - // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg` - // without any intervening re-definition of %physreg. - DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs; + // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg` + // without any intervening re-definition of $physreg. + DenseMap<Register, MachineInstr *> NAPhysToVirtMIs; - // Set of virtual registers that are copied from. - SmallSet<unsigned, 4> CopySrcRegs; - DenseMap<unsigned, MachineInstr *> CopySrcMIs; + // Set of pairs of virtual registers and their subregs that are copied + // from. + DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs; bool IsLoopHeader = MLI->isLoopHeader(&MBB); @@ -1646,9 +1641,10 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { ++MII; LocalMIs.insert(MI); - // Skip debug instructions. They should not affect this peephole optimization. + // Skip debug instructions. They should not affect this peephole + // optimization. if (MI->isDebugInstr()) - continue; + continue; if (MI->isPosition()) continue; @@ -1678,7 +1674,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { } else if (MO.isRegMask()) { const uint32_t *RegMask = MO.getRegMask(); for (auto &RegMI : NAPhysToVirtMIs) { - unsigned Def = RegMI.first; + Register Def = RegMI.first; if (MachineOperand::clobbersPhysReg(RegMask, Def)) { LLVM_DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI); @@ -1723,9 +1719,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI->isCopy() && - (foldRedundantCopy(*MI, CopySrcRegs, CopySrcMIs) || - foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) { + if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs) || + foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) { LocalMIs.erase(MI); LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n"); MI->eraseFromParent(); @@ -1763,13 +1758,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { const MachineOperand &MOp = MI->getOperand(i); if (!MOp.isReg()) continue; - unsigned FoldAsLoadDefReg = MOp.getReg(); + Register FoldAsLoadDefReg = MOp.getReg(); if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { // We need to fold load after optimizeCmpInstr, since // optimizeCmpInstr can enable folding by converting SUB to CMP. // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and // we need it for markUsesInDebugValueAsUndef(). - unsigned FoldedReg = FoldAsLoadDefReg; + Register FoldedReg = FoldAsLoadDefReg; MachineInstr *DefMI = nullptr; if (MachineInstr *FoldMI = TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) { diff --git a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp index 4f88f4d3dd6a..82ed386db827 100644 --- a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -82,11 +82,9 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) { for (MachineInstr &MI : MBB) { // If we need to emit noops prior to this instruction, then do so. unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI); - for (unsigned i = 0; i != NumPreNoops; ++i) { - HazardRec->EmitNoop(); - TII->insertNoop(MBB, MachineBasicBlock::iterator(MI)); - ++NumNoops; - } + HazardRec->EmitNoops(NumPreNoops); + TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops); + NumNoops += NumPreNoops; HazardRec->EmitInstruction(&MI); if (HazardRec->atIssueLimit()) { diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 1be9544848ec..80c38f3ec341 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -96,7 +96,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn, ++I; IRBuilder<> Builder(CI->getParent(), CI->getIterator()); - SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end()); + SmallVector<Value *, 8> Args(CI->args()); CallInst *NewCI = Builder.CreateCall(FCache, Args); NewCI->setName(CI->getName()); diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index a489f493d5ee..378aaba2a65f 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -620,12 +620,12 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) { if (!MFI.hasCalls()) NumLeafFuncWithSpills++; - for (MachineBasicBlock *SaveBlock : SaveBlocks) { + for (MachineBasicBlock *SaveBlock : SaveBlocks) insertCSRSaves(*SaveBlock, CSI); - // Update the live-in information of all the blocks up to the save - // point. - updateLiveness(MF); - } + + // Update the live-in information of all the blocks up to the save point. + updateLiveness(MF); + for (MachineBasicBlock *RestoreBlock : RestoreBlocks) insertCSRRestores(*RestoreBlock, CSI); } @@ -1077,7 +1077,26 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); + int64_t OffsetBeforeAlignment = Offset; Offset = alignTo(Offset, StackAlign, Skew); + + // If we have increased the offset to fulfill the alignment constrants, + // then the scavenging spill slots may become harder to reach from the + // stack pointer, float them so they stay close. + if (OffsetBeforeAlignment != Offset && RS && !EarlyScavengingSlots) { + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs() + << "Adjusting emergency spill slots!\n";); + int64_t Delta = Offset - OffsetBeforeAlignment; + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); + I != IE; ++I) { + LLVM_DEBUG(llvm::dbgs() << "Adjusting offset of emergency spill slot #" + << *I << " from " << MFI.getObjectOffset(*I);); + MFI.setObjectOffset(*I, MFI.getObjectOffset(*I) - Delta); + LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(*I) << "\n";); + } + } } // Update frame info to pretend that this is part of the stack... @@ -1209,7 +1228,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, unsigned FrameIdx = MI.getOperand(0).getIndex(); unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx); - int64_t Offset = + StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg); MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/); MI.getOperand(0).setIsDebug(); @@ -1236,7 +1255,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, // Make the DBG_VALUE direct. MI.getDebugOffset().ChangeToRegister(0, false); } - DIExpr = DIExpression::prepend(DIExpr, PrependFlags, Offset); + + DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset); MI.getDebugExpressionOp().setMetadata(DIExpr); continue; } @@ -1252,9 +1272,11 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, "DBG_VALUE machine instruction"); Register Reg; MachineOperand &Offset = MI.getOperand(i + 1); - int refOffset = TFI->getFrameIndexReferencePreferSP( + StackOffset refOffset = TFI->getFrameIndexReferencePreferSP( MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); - Offset.setImm(Offset.getImm() + refOffset + SPAdj); + assert(!refOffset.getScalable() && + "Frame offsets with a scalable component are not supported"); + Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj); MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp new file mode 100644 index 000000000000..9c716a5a37ea --- /dev/null +++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -0,0 +1,95 @@ +//===- PseudoProbeInserter.cpp - Insert annotation for callsite profiling -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements PseudoProbeInserter pass, which inserts pseudo probe +// annotations for call instructions with a pseudo-probe-specific dwarf +// discriminator. such discriminator indicates that the call instruction comes +// with a pseudo probe, and the discriminator value holds information to +// identify the corresponding counter. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/PseudoProbe.h" +#include "llvm/InitializePasses.h" +#include "llvm/Target/TargetMachine.h" +#include <unordered_map> + +#define DEBUG_TYPE "pseudo-probe-inserter" + +using namespace llvm; + +namespace { +class PseudoProbeInserter : public MachineFunctionPass { +public: + static char ID; + + PseudoProbeInserter() : MachineFunctionPass(ID) { + initializePseudoProbeInserterPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Pseudo Probe Inserter"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.isCall()) { + if (DILocation *DL = MI.getDebugLoc()) { + auto Value = DL->getDiscriminator(); + if (DILocation::isPseudoProbeDiscriminator(Value)) { + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::PSEUDO_PROBE)) + .addImm(getFuncGUID(MF.getFunction().getParent(), DL)) + .addImm( + PseudoProbeDwarfDiscriminator::extractProbeIndex(Value)) + .addImm( + PseudoProbeDwarfDiscriminator::extractProbeType(Value)) + .addImm(PseudoProbeDwarfDiscriminator::extractProbeAttributes( + Value)); + Changed = true; + } + } + } + } + } + + return Changed; + } + +private: + uint64_t getFuncGUID(Module *M, DILocation *DL) { + auto *SP = DL->getScope()->getSubprogram(); + auto Name = SP->getLinkageName(); + if (Name.empty()) + Name = SP->getName(); + return Function::getGUID(Name); + } +}; +} // namespace + +char PseudoProbeInserter::ID = 0; +INITIALIZE_PASS_BEGIN(PseudoProbeInserter, DEBUG_TYPE, + "Insert pseudo probe annotations for value profiling", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(PseudoProbeInserter, DEBUG_TYPE, + "Insert pseudo probe annotations for value profiling", + false, false) + +FunctionPass *llvm::createPseudoProbeInserter() { + return new PseudoProbeInserter(); +} diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index 437a6b030096..cebb902f0a4a 100644 --- a/llvm/lib/CodeGen/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -984,11 +984,6 @@ RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const { LaneBitmask M = AR.Mask & BR.Mask; return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef(); } -#ifndef NDEBUG -// RegisterRef NAR = PRI.normalize(AR); -// RegisterRef NBR = PRI.normalize(BR); -// assert(NAR.Reg != NBR.Reg); -#endif // This isn't strictly correct, because the overlap may happen in the // part masked out. if (PRI.alias(AR, BR)) diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index 0bcd27f8ea45..76bf0c280970 100644 --- a/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -23,8 +23,10 @@ // <10.1145/2086696.2086706>. <hal-00647369> // #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" @@ -45,6 +47,7 @@ #include <cstdint> #include <iterator> #include <map> +#include <unordered_map> #include <utility> #include <vector> @@ -108,7 +111,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, const RegisterAggr &DefRRs) { NodeList RDefs; // Return value. SetVector<NodeId> DefQ; - SetVector<NodeId> Owners; + DenseMap<MachineInstr*, uint32_t> OrdMap; // Dead defs will be treated as if they were live, since they are actually // on the data-flow path. They cannot be ignored because even though they @@ -151,18 +154,9 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA)) if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef()) DefQ.insert(RD); - } - - // Remove all non-phi defs that are not aliased to RefRR, and collect - // the owners of the remaining defs. - SetVector<NodeId> Defs; - for (NodeId N : DefQ) { - auto TA = DFG.addr<DefNode*>(N); - bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef; - if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG))) - continue; - Defs.insert(TA.Id); - Owners.insert(TA.Addr->getOwner(DFG).Id); + // Don't visit sibling defs. They share the same reaching def (which + // will be visited anyway), but they define something not aliased to + // this ref. } // Return the MachineBasicBlock containing a given instruction. @@ -174,38 +168,80 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG); return BA.Addr->getCode(); }; - // Less(A,B) iff instruction A is further down in the dominator tree than B. - auto Less = [&Block,this] (NodeId A, NodeId B) -> bool { + + SmallSet<NodeId,32> Defs; + + // Remove all non-phi defs that are not aliased to RefRR, and segregate + // the the remaining defs into buckets for containing blocks. + std::map<NodeId, NodeAddr<InstrNode*>> Owners; + std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks; + for (NodeId N : DefQ) { + auto TA = DFG.addr<DefNode*>(N); + bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef; + if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG))) + continue; + Defs.insert(TA.Id); + NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG); + Owners[TA.Id] = IA; + Blocks[Block(IA)].push_back(IA.Id); + } + + auto Precedes = [this,&OrdMap] (NodeId A, NodeId B) { if (A == B) return false; - auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B); - MachineBasicBlock *BA = Block(OA), *BB = Block(OB); - if (BA != BB) - return MDT.dominates(BB, BA); - // They are in the same block. + NodeAddr<InstrNode*> OA = DFG.addr<InstrNode*>(A); + NodeAddr<InstrNode*> OB = DFG.addr<InstrNode*>(B); bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt; bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt; - if (StmtA) { - if (!StmtB) // OB is a phi and phis dominate statements. - return true; - MachineInstr *CA = NodeAddr<StmtNode*>(OA).Addr->getCode(); - MachineInstr *CB = NodeAddr<StmtNode*>(OB).Addr->getCode(); - // The order must be linear, so tie-break such equalities. - if (CA == CB) - return A < B; - return MDT.dominates(CB, CA); - } else { - // OA is a phi. - if (StmtB) - return false; - // Both are phis. There is no ordering between phis (in terms of - // the data-flow), so tie-break this via node id comparison. + if (StmtA && StmtB) { + const MachineInstr *InA = NodeAddr<StmtNode*>(OA).Addr->getCode(); + const MachineInstr *InB = NodeAddr<StmtNode*>(OB).Addr->getCode(); + assert(InA->getParent() == InB->getParent()); + auto FA = OrdMap.find(InA); + if (FA != OrdMap.end()) + return FA->second < OrdMap.find(InB)->second; + const MachineBasicBlock *BB = InA->getParent(); + for (auto It = BB->begin(), E = BB->end(); It != E; ++It) { + if (It == InA->getIterator()) + return true; + if (It == InB->getIterator()) + return false; + } + llvm_unreachable("InA and InB should be in the same block"); + } + // One of them is a phi node. + if (!StmtA && !StmtB) { + // Both are phis, which are unordered. Break the tie by id numbers. return A < B; } + // Only one of them is a phi. Phis always precede statements. + return !StmtA; }; - std::vector<NodeId> Tmp(Owners.begin(), Owners.end()); - llvm::sort(Tmp, Less); + auto GetOrder = [&OrdMap] (MachineBasicBlock &B) { + uint32_t Pos = 0; + for (MachineInstr &In : B) + OrdMap.insert({&In, ++Pos}); + }; + + // For each block, sort the nodes in it. + std::vector<MachineBasicBlock*> TmpBB; + for (auto &Bucket : Blocks) { + TmpBB.push_back(Bucket.first); + if (Bucket.second.size() > 2) + GetOrder(*Bucket.first); + llvm::sort(Bucket.second, Precedes); + } + + // Sort the blocks with respect to dominance. + llvm::sort(TmpBB, + [this](auto A, auto B) { return MDT.properlyDominates(A, B); }); + + std::vector<NodeId> TmpInst; + for (auto I = TmpBB.rbegin(), E = TmpBB.rend(); I != E; ++I) { + auto &Bucket = Blocks[*I]; + TmpInst.insert(TmpInst.end(), Bucket.rbegin(), Bucket.rend()); + } // The vector is a list of instructions, so that defs coming from // the same instruction don't need to be artificially ordered. @@ -220,6 +256,9 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, // *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be // covered if we added A first, and A would be covered // if we added B first. + // In this example we want both A and B, because we don't want to give + // either one priority over the other, since they belong to the same + // statement. RegisterAggr RRs(DefRRs); @@ -227,7 +266,8 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, return TA.Addr->getKind() == NodeAttrs::Def && Defs.count(TA.Id); }; - for (NodeId T : Tmp) { + + for (NodeId T : TmpInst) { if (!FullChain && RRs.hasCoverOf(RefRR)) break; auto TA = DFG.addr<InstrNode*>(T); @@ -246,7 +286,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, if (FullChain || IsPhi || !RRs.hasCoverOf(QR)) Ds.push_back(DA); } - RDefs.insert(RDefs.end(), Ds.begin(), Ds.end()); + llvm::append_range(RDefs, Ds); for (NodeAddr<DefNode*> DA : Ds) { // When collecting a full chain of definitions, do not consider phi // defs to actually define a register. @@ -260,7 +300,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, auto DeadP = [](const NodeAddr<DefNode*> DA) -> bool { return DA.Addr->getFlags() & NodeAttrs::Dead; }; - RDefs.resize(std::distance(RDefs.begin(), llvm::remove_if(RDefs, DeadP))); + llvm::erase_if(RDefs, DeadP); return RDefs; } @@ -430,13 +470,13 @@ void Liveness::computePhiInfo() { NodeList Blocks = FA.Addr->members(DFG); for (NodeAddr<BlockNode*> BA : Blocks) { auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); - Phis.insert(Phis.end(), Ps.begin(), Ps.end()); + llvm::append_range(Phis, Ps); } // phi use -> (map: reaching phi -> set of registers defined in between) std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp; std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation. - std::map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it. + std::unordered_map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it. // Go over all phis. for (NodeAddr<PhiNode*> PhiA : Phis) { @@ -474,7 +514,7 @@ void Liveness::computePhiInfo() { NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN); uint16_t F = A.Addr->getFlags(); if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) { - RegisterRef R = PRI.normalize(A.Addr->getRegRef(DFG)); + RegisterRef R = A.Addr->getRegRef(DFG); RealUses[R.Reg].insert({A.Id,R.Mask}); } UN = A.Addr->getSibling(); @@ -612,6 +652,23 @@ void Liveness::computePhiInfo() { // is covered, or until reaching the final phi. Only assume that the // reference reaches the phi in the latter case. + // The operation "clearIn" can be expensive. For a given set of intervening + // defs, cache the result of subtracting these defs from a given register + // ref. + using SubMap = std::unordered_map<RegisterRef, RegisterRef>; + std::unordered_map<RegisterAggr, SubMap> Subs; + auto ClearIn = [] (RegisterRef RR, const RegisterAggr &Mid, SubMap &SM) { + if (Mid.empty()) + return RR; + auto F = SM.find(RR); + if (F != SM.end()) + return F->second; + RegisterRef S = Mid.clearIn(RR); + SM.insert({RR, S}); + return S; + }; + + // Go over all phis. for (unsigned i = 0; i < PhiUQ.size(); ++i) { auto PA = DFG.addr<PhiNode*>(PhiUQ[i]); NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG); @@ -619,17 +676,17 @@ void Liveness::computePhiInfo() { for (NodeAddr<UseNode*> UA : PUs) { std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id]; - RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG)); + RegisterRef UR = UA.Addr->getRegRef(DFG); for (const std::pair<const NodeId, RegisterAggr> &P : PUM) { bool Changed = false; const RegisterAggr &MidDefs = P.second; - // Collect the set PropUp of uses that are reached by the current // phi PA, and are not covered by any intervening def between the // currently visited use UA and the upward phi P. if (MidDefs.hasCoverOf(UR)) continue; + SubMap &SM = Subs[MidDefs]; // General algorithm: // for each (R,U) : U is use node of R, U is reached by PA @@ -649,7 +706,7 @@ void Liveness::computePhiInfo() { LaneBitmask M = R.Mask & V.second; if (M.none()) continue; - if (RegisterRef SS = MidDefs.clearIn(RegisterRef(R.Reg, M))) { + if (RegisterRef SS = ClearIn(RegisterRef(R.Reg, M), MidDefs, SM)) { NodeRefSet &RS = RealUseMap[P.first][SS.Reg]; Changed |= RS.insert({V.first,SS.Mask}).second; } @@ -1073,7 +1130,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) { if (UA.Addr->getFlags() & NodeAttrs::Undef) continue; - RegisterRef RR = PRI.normalize(UA.Addr->getRegRef(DFG)); + RegisterRef RR = UA.Addr->getRegRef(DFG); for (NodeAddr<DefNode*> D : getAllReachingDefs(UA)) if (getBlockWithRef(D.Id) != B) LiveIn[RR.Reg].insert({D.Id,RR.Mask}); diff --git a/llvm/lib/CodeGen/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp index bd8661816e71..8760ba118934 100644 --- a/llvm/lib/CodeGen/RDFRegisters.cpp +++ b/llvm/lib/CodeGen/RDFRegisters.cpp @@ -84,18 +84,23 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri, for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) { BitVector PU(TRI.getNumRegUnits()); const uint32_t *MB = RegMasks.get(M); - for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) { - if (!(MB[i/32] & (1u << (i%32)))) + for (unsigned I = 1, E = TRI.getNumRegs(); I != E; ++I) { + if (!(MB[I / 32] & (1u << (I % 32)))) continue; - for (MCRegUnitIterator U(i, &TRI); U.isValid(); ++U) + for (MCRegUnitIterator U(MCRegister::from(I), &TRI); U.isValid(); ++U) PU.set(*U); } MaskInfos[M].Units = PU.flip(); } -} -RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const { - return RR; + AliasInfos.resize(TRI.getNumRegUnits()); + for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) { + BitVector AS(TRI.getNumRegs()); + for (MCRegUnitRootIterator R(U, &TRI); R.isValid(); ++R) + for (MCSuperRegIterator S(*R, &TRI, true); S.isValid(); ++S) + AS.set(*S); + AliasInfos[U].Regs = AS; + } } std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { @@ -321,26 +326,17 @@ RegisterRef RegisterAggr::makeRegRef() const { if (U < 0) return RegisterRef(); - auto AliasedRegs = [this] (uint32_t Unit, BitVector &Regs) { - for (MCRegUnitRootIterator R(Unit, &PRI.getTRI()); R.isValid(); ++R) - for (MCSuperRegIterator S(*R, &PRI.getTRI(), true); S.isValid(); ++S) - Regs.set(*S); - }; - // Find the set of all registers that are aliased to all the units // in this aggregate. // Get all the registers aliased to the first unit in the bit vector. - BitVector Regs(PRI.getTRI().getNumRegs()); - AliasedRegs(U, Regs); + BitVector Regs = PRI.getUnitAliases(U); U = Units.find_next(U); // For each other unit, intersect it with the set of all registers // aliased that unit. while (U >= 0) { - BitVector AR(PRI.getTRI().getNumRegs()); - AliasedRegs(U, AR); - Regs &= AR; + Regs &= PRI.getUnitAliases(U); U = Units.find_next(U); } @@ -378,3 +374,8 @@ RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG, Pos = End ? Masks.end() : Masks.begin(); Index = End ? Masks.size() : 0; } + +raw_ostream &rdf::operator<<(raw_ostream &OS, const RegisterAggr &A) { + A.print(OS); + return OS; +} diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 5bd8b4b8e27f..d16e90a7e0b4 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -29,7 +29,7 @@ static bool isValidRegUse(const MachineOperand &MO) { return isValidReg(MO) && MO.isUse(); } -static bool isValidRegUseOf(const MachineOperand &MO, int PhysReg) { +static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg) { return isValidRegUse(MO) && MO.getReg() == PhysReg; } @@ -37,7 +37,7 @@ static bool isValidRegDef(const MachineOperand &MO) { return isValidReg(MO) && MO.isDef(); } -static bool isValidRegDefOf(const MachineOperand &MO, int PhysReg) { +static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg) { return isValidRegDef(MO) && MO.getReg() == PhysReg; } @@ -121,7 +121,8 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) { for (auto &MO : MI->operands()) { if (!isValidRegDef(MO)) continue; - for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) { + for (MCRegUnitIterator Unit(MO.getReg().asMCReg(), TRI); Unit.isValid(); + ++Unit) { // This instruction explicitly defines the current reg unit. LLVM_DEBUG(dbgs() << printReg(*Unit, TRI) << ":\t" << CurInstr << '\t' << *MI); @@ -143,10 +144,9 @@ void ReachingDefAnalysis::reprocessBasicBlock(MachineBasicBlock *MBB) { "Unexpected basic block number."); // Count number of non-debug instructions for end of block adjustment. - int NumInsts = 0; - for (const MachineInstr &MI : *MBB) - if (!MI.isDebugInstr()) - NumInsts++; + auto NonDbgInsts = + instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end()); + int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end()); // When reprocessing a block, the only thing we need to do is check whether // there is now a more recent incoming reaching definition from a predecessor. @@ -197,10 +197,9 @@ void ReachingDefAnalysis::processBasicBlock( } enterBasicBlock(MBB); - for (MachineInstr &MI : *MBB) { - if (!MI.isDebugInstr()) - processDefs(&MI); - } + for (MachineInstr &MI : + instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) + processDefs(&MI); leaveBasicBlock(MBB); } @@ -254,7 +253,8 @@ void ReachingDefAnalysis::traverse() { #endif } -int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const { +int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, + MCRegister PhysReg) const { assert(InstIds.count(MI) && "Unexpected machine instuction."); int InstId = InstIds.lookup(MI); int DefRes = ReachingDefDefaultVal; @@ -273,13 +273,16 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const { return LatestDef; } -MachineInstr* ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI, - int PhysReg) const { - return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)); +MachineInstr * +ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI, + MCRegister PhysReg) const { + return hasLocalDefBefore(MI, PhysReg) + ? getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)) + : nullptr; } bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B, - int PhysReg) const { + MCRegister PhysReg) const { MachineBasicBlock *ParentA = A->getParent(); MachineBasicBlock *ParentB = B->getParent(); if (ParentA != ParentB) @@ -307,18 +310,19 @@ MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, return nullptr; } -int -ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) const { +int ReachingDefAnalysis::getClearance(MachineInstr *MI, + MCRegister PhysReg) const { assert(InstIds.count(MI) && "Unexpected machine instuction."); return InstIds.lookup(MI) - getReachingDef(MI, PhysReg); } -bool -ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI, int PhysReg) const { +bool ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI, + MCRegister PhysReg) const { return getReachingDef(MI, PhysReg) >= 0; } -void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg, +void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, + MCRegister PhysReg, InstSet &Uses) const { MachineBasicBlock *MBB = Def->getParent(); MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def); @@ -342,12 +346,11 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg, } } -bool -ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg, - InstSet &Uses) const { - for (auto &MI : *MBB) { - if (MI.isDebugInstr()) - continue; +bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, + MCRegister PhysReg, + InstSet &Uses) const { + for (MachineInstr &MI : + instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) { for (auto &MO : MI.operands()) { if (!isValidRegUseOf(MO, PhysReg)) continue; @@ -356,12 +359,14 @@ ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg, Uses.insert(&MI); } } - return isReachingDefLiveOut(&MBB->back(), PhysReg); + auto Last = MBB->getLastNonDebugInstr(); + if (Last == MBB->end()) + return true; + return isReachingDefLiveOut(&*Last, PhysReg); } -void -ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg, - InstSet &Uses) const { +void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg, + InstSet &Uses) const { MachineBasicBlock *MBB = MI->getParent(); // Collect the uses that each def touches within the block. @@ -372,9 +377,7 @@ ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg, if (LiveOut != MI) return; - SmallVector<MachineBasicBlock*, 4> ToVisit; - ToVisit.insert(ToVisit.begin(), MBB->successors().begin(), - MBB->successors().end()); + SmallVector<MachineBasicBlock *, 4> ToVisit(MBB->successors()); SmallPtrSet<MachineBasicBlock*, 4>Visited; while (!ToVisit.empty()) { MachineBasicBlock *MBB = ToVisit.back(); @@ -382,22 +385,33 @@ ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg, if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg)) continue; if (getLiveInUses(MBB, PhysReg, Uses)) - ToVisit.insert(ToVisit.end(), MBB->successors().begin(), - MBB->successors().end()); + llvm::append_range(ToVisit, MBB->successors()); Visited.insert(MBB); } } } -void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg, - InstSet &Defs) const { +void ReachingDefAnalysis::getGlobalReachingDefs(MachineInstr *MI, + MCRegister PhysReg, + InstSet &Defs) const { + if (auto *Def = getUniqueReachingMIDef(MI, PhysReg)) { + Defs.insert(Def); + return; + } + + for (auto *MBB : MI->getParent()->predecessors()) + getLiveOuts(MBB, PhysReg, Defs); +} + +void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, + MCRegister PhysReg, InstSet &Defs) const { SmallPtrSet<MachineBasicBlock*, 2> VisitedBBs; getLiveOuts(MBB, PhysReg, Defs, VisitedBBs); } -void -ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg, - InstSet &Defs, BlockSet &VisitedBBs) const { +void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, + MCRegister PhysReg, InstSet &Defs, + BlockSet &VisitedBBs) const { if (VisitedBBs.count(MBB)) return; @@ -414,26 +428,25 @@ ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg, getLiveOuts(Pred, PhysReg, Defs, VisitedBBs); } -MachineInstr *ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI, - int PhysReg) const { +MachineInstr * +ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI, + MCRegister PhysReg) const { // If there's a local def before MI, return it. MachineInstr *LocalDef = getReachingLocalMIDef(MI, PhysReg); if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI)) return LocalDef; - SmallPtrSet<MachineBasicBlock*, 4> VisitedBBs; SmallPtrSet<MachineInstr*, 2> Incoming; - for (auto *Pred : MI->getParent()->predecessors()) - getLiveOuts(Pred, PhysReg, Incoming, VisitedBBs); - - // If we have a local def and an incoming instruction, then there's not a - // unique instruction def. - if (!Incoming.empty() && LocalDef) - return nullptr; - else if (Incoming.size() == 1) + MachineBasicBlock *Parent = MI->getParent(); + for (auto *Pred : Parent->predecessors()) + getLiveOuts(Pred, PhysReg, Incoming); + + // Check that we have a single incoming value and that it does not + // come from the same block as MI - since it would mean that the def + // is executed after MI. + if (Incoming.size() == 1 && (*Incoming.begin())->getParent() != Parent) return *Incoming.begin(); - else - return LocalDef; + return nullptr; } MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI, @@ -448,7 +461,8 @@ MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI, return getUniqueReachingMIDef(MI, MO.getReg()); } -bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const { +bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, + MCRegister PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); @@ -459,18 +473,21 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const { // Walk backwards through the block to see if the register is live at some // point. - for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) { - LiveRegs.stepBackward(*Last); + for (MachineInstr &Last : + instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) { + LiveRegs.stepBackward(Last); if (LiveRegs.contains(PhysReg)) - return InstIds.lookup(&*Last) > InstIds.lookup(MI); + return InstIds.lookup(&Last) > InstIds.lookup(MI); } return false; } bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI, - int PhysReg) const { + MCRegister PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); - if (getReachingDef(MI, PhysReg) != getReachingDef(&MBB->back(), PhysReg)) + auto Last = MBB->getLastNonDebugInstr(); + if (Last != MBB->end() && + getReachingDef(MI, PhysReg) != getReachingDef(&*Last, PhysReg)) return true; if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) @@ -479,17 +496,17 @@ bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI, return false; } -bool -ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const { +bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, + MCRegister PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); if (!LiveRegs.contains(PhysReg)) return false; - MachineInstr *Last = &MBB->back(); + auto Last = MBB->getLastNonDebugInstr(); int Def = getReachingDef(MI, PhysReg); - if (getReachingDef(Last, PhysReg) != Def) + if (Last != MBB->end() && getReachingDef(&*Last, PhysReg) != Def) return false; // Finally check that the last instruction doesn't redefine the register. @@ -500,18 +517,22 @@ ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const { return true; } -MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, - int PhysReg) const { +MachineInstr * +ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, + MCRegister PhysReg) const { LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); if (!LiveRegs.contains(PhysReg)) return nullptr; - MachineInstr *Last = &MBB->back(); - int Def = getReachingDef(Last, PhysReg); + auto Last = MBB->getLastNonDebugInstr(); + if (Last == MBB->end()) + return nullptr; + + int Def = getReachingDef(&*Last, PhysReg); for (auto &MO : Last->operands()) if (isValidRegDefOf(MO, PhysReg)) - return Last; + return &*Last; return Def < 0 ? nullptr : getInstFromId(MBB, Def); } @@ -528,7 +549,7 @@ static bool mayHaveSideEffects(MachineInstr &MI) { template<typename Iterator> bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From, MachineInstr *To) const { - if (From->getParent() != To->getParent()) + if (From->getParent() != To->getParent() || From == To) return false; SmallSet<int, 2> Defs; @@ -557,12 +578,22 @@ bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From, bool ReachingDefAnalysis::isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const { - return isSafeToMove<MachineBasicBlock::reverse_iterator>(From, To); + using Iterator = MachineBasicBlock::iterator; + // Walk forwards until we find the instruction. + for (auto I = Iterator(From), E = From->getParent()->end(); I != E; ++I) + if (&*I == To) + return isSafeToMove<Iterator>(From, To); + return false; } bool ReachingDefAnalysis::isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const { - return isSafeToMove<MachineBasicBlock::iterator>(From, To); + using Iterator = MachineBasicBlock::reverse_iterator; + // Walk backwards until we find the instruction. + for (auto I = Iterator(From), E = From->getParent()->rend(); I != E; ++I) + if (&*I == To) + return isSafeToMove<Iterator>(From, To); + return false; } bool ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, @@ -612,7 +643,10 @@ ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited, void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI, InstSet &Dead) const { Dead.insert(MI); - auto IsDead = [this, &Dead](MachineInstr *Def, int PhysReg) { + auto IsDead = [this, &Dead](MachineInstr *Def, MCRegister PhysReg) { + if (mayHaveSideEffects(*Def)) + return false; + unsigned LiveDefs = 0; for (auto &MO : Def->operands()) { if (!isValidRegDef(MO)) @@ -642,18 +676,18 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI, } bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, - int PhysReg) const { + MCRegister PhysReg) const { SmallPtrSet<MachineInstr*, 1> Ignore; return isSafeToDefRegAt(MI, PhysReg, Ignore); } -bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, int PhysReg, +bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg, InstSet &Ignore) const { // Check for any uses of the register after MI. if (isRegUsedAfter(MI, PhysReg)) { if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) { SmallPtrSet<MachineInstr*, 2> Uses; - getReachingLocalUses(Def, PhysReg, Uses); + getGlobalUses(Def, PhysReg, Uses); for (auto *Use : Uses) if (!Ignore.count(Use)) return false; diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index d22826853672..aa749ca43e74 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -73,7 +73,7 @@ void RegAllocBase::seedLiveRegs() { NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; enqueue(&LIS->getInterval(Reg)); @@ -87,13 +87,13 @@ void RegAllocBase::allocatePhysRegs() { // Continue assigning vregs one at a time to available physical registers. while (LiveInterval *VirtReg = dequeue()) { - assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); + assert(!VRM->hasPhys(VirtReg->reg()) && "Register already assigned"); // Unused registers can appear when the spiller coalesces snippets. - if (MRI->reg_nodbg_empty(VirtReg->reg)) { + if (MRI->reg_nodbg_empty(VirtReg->reg())) { LLVM_DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); aboutToRemoveInterval(*VirtReg); - LIS->removeInterval(VirtReg->reg); + LIS->removeInterval(VirtReg->reg()); continue; } @@ -104,21 +104,22 @@ void RegAllocBase::allocatePhysRegs() { // register if possible and populate a list of new live intervals that // result from splitting. LLVM_DEBUG(dbgs() << "\nselectOrSplit " - << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) - << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); + << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg())) + << ':' << *VirtReg << " w=" << VirtReg->weight() << '\n'); using VirtRegVec = SmallVector<Register, 4>; VirtRegVec SplitVRegs; - unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); + MCRegister AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! // Probably caused by an inline asm. MachineInstr *MI = nullptr; for (MachineRegisterInfo::reg_instr_iterator - I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end(); - I != E; ) { + I = MRI->reg_instr_begin(VirtReg->reg()), + E = MRI->reg_instr_end(); + I != E;) { MI = &*(I++); if (MI->isInlineAsm()) break; @@ -133,28 +134,29 @@ void RegAllocBase::allocatePhysRegs() { report_fatal_error("ran out of registers during register allocation"); } // Keep going after reporting the error. - VRM->assignVirt2Phys(VirtReg->reg, - RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); + VRM->assignVirt2Phys( + VirtReg->reg(), + RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg())).front()); continue; } if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); - for (unsigned Reg : SplitVRegs) { + for (Register Reg : SplitVRegs) { assert(LIS->hasInterval(Reg)); LiveInterval *SplitVirtReg = &LIS->getInterval(Reg); - assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); - if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + assert(!VRM->hasPhys(SplitVirtReg->reg()) && "Register already assigned"); + if (MRI->reg_nodbg_empty(SplitVirtReg->reg())) { assert(SplitVirtReg->empty() && "Non-empty but used interval"); LLVM_DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); aboutToRemoveInterval(*SplitVirtReg); - LIS->removeInterval(SplitVirtReg->reg); + LIS->removeInterval(SplitVirtReg->reg()); continue; } LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(Register::isVirtualRegister(SplitVirtReg->reg) && + assert(Register::isVirtualRegister(SplitVirtReg->reg()) && "expect split value in virtual register"); enqueue(SplitVirtReg); ++NumNewQueued; diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index 8e931eaae99a..3144605345e9 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -101,8 +101,8 @@ protected: // Each call must guarantee forward progess by returning an available PhysReg // or new set of split live virtual registers. It is up to the splitter to // converge quickly toward fully spilled live ranges. - virtual Register selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<Register> &splitLVRs) = 0; + virtual MCRegister selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<Register> &splitLVRs) = 0; // Use this group name for NamedRegionTimer. static const char TimerGroupName[]; diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 5009bcc0a397..8f2cb48c5d69 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -46,7 +46,7 @@ static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", namespace { struct CompSpillWeight { bool operator()(LiveInterval *A, LiveInterval *B) const { - return A->weight < B->weight; + return A->weight() < B->weight(); } }; } @@ -72,8 +72,8 @@ class RABasic : public MachineFunctionPass, // selectOrSplit(). BitVector UsableRegs; - bool LRE_CanEraseVirtReg(unsigned) override; - void LRE_WillShrinkVirtReg(unsigned) override; + bool LRE_CanEraseVirtReg(Register) override; + void LRE_WillShrinkVirtReg(Register) override; public: RABasic(); @@ -100,8 +100,8 @@ public: return LI; } - Register selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<Register> &SplitVRegs) override; + MCRegister selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<Register> &SplitVRegs) override; /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; @@ -111,10 +111,15 @@ public: MachineFunctionProperties::Property::NoPHIs); } + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + // Helper for spilling all live virtual registers currently unified under preg // that interfere with the most recently queried lvr. Return true if spilling // was successful, and append any new spilled/split intervals to splitLVRs. - bool spillInterferences(LiveInterval &VirtReg, Register PhysReg, + bool spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, SmallVectorImpl<Register> &SplitVRegs); static char ID; @@ -141,7 +146,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false, false) -bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) { +bool RABasic::LRE_CanEraseVirtReg(Register VirtReg) { LiveInterval &LI = LIS->getInterval(VirtReg); if (VRM->hasPhys(VirtReg)) { Matrix->unassign(LI); @@ -156,7 +161,7 @@ bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) { return false; } -void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) { +void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) { if (!VRM->hasPhys(VirtReg)) return; @@ -201,7 +206,7 @@ void RABasic::releaseMemory() { // Spill or split all live virtual registers currently unified under PhysReg // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. -bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg, +bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, SmallVectorImpl<Register> &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. @@ -213,7 +218,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg, Q.collectInterferingVRegs(); for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - if (!Intf->isSpillable() || Intf->weight > VirtReg.weight) + if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight()) return false; Intfs.push_back(Intf); } @@ -227,7 +232,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg, LiveInterval &Spill = *Intfs[i]; // Skip duplicates. - if (!VRM->hasPhys(Spill.reg)) + if (!VRM->hasPhys(Spill.reg())) continue; // Deallocate the interfering vreg by removing it from the union. @@ -253,14 +258,16 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg, // |vregs| * |machineregs|. And since the number of interference tests is // minimal, there is no value in caching them outside the scope of // selectOrSplit(). -Register RABasic::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<Register> &SplitVRegs) { +MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<Register> &SplitVRegs) { // Populate a list of physical register spill candidates. - SmallVector<Register, 8> PhysRegSpillCands; + SmallVector<MCRegister, 8> PhysRegSpillCands; // Check for an available register in this class. - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); - while (Register PhysReg = Order.next()) { + auto Order = + AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); + for (MCRegister PhysReg : Order) { + assert(PhysReg.isValid()); // Check for interference in PhysReg switch (Matrix->checkInterference(VirtReg, PhysReg)) { case LiveRegMatrix::IK_Free: @@ -279,8 +286,9 @@ Register RABasic::selectOrSplit(LiveInterval &VirtReg, } // Try to spill another interfering reg with less spill weight. - for (SmallVectorImpl<Register>::iterator PhysRegI = PhysRegSpillCands.begin(), - PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { + for (auto PhysRegI = PhysRegSpillCands.begin(), + PhysRegE = PhysRegSpillCands.end(); + PhysRegI != PhysRegE; ++PhysRegI) { if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue; @@ -310,10 +318,9 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>(), getAnalysis<LiveRegMatrix>()); - - calculateSpillWeightsAndHints(*LIS, *MF, VRM, - getAnalysis<MachineLoopInfo>(), - getAnalysis<MachineBlockFrequencyInfo>()); + VirtRegAuxInfo VRAI(*MF, *LIS, *VRM, getAnalysis<MachineLoopInfo>(), + getAnalysis<MachineBlockFrequencyInfo>()); + VRAI.calculateSpillWeightsAndHints(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 5396f9f3a143..6e548d4a93c8 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -56,6 +56,10 @@ STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); STATISTIC(NumCoalesced, "Number of copies coalesced"); +// FIXME: Remove this switch when all testcases are fixed! +static cl::opt<bool> IgnoreMissingDefs("rafast-ignore-missing-defs", + cl::Hidden); + static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); @@ -85,8 +89,9 @@ namespace { MachineInstr *LastUse = nullptr; ///< Last instr to use reg. Register VirtReg; ///< Virtual register number. MCPhysReg PhysReg = 0; ///< Currently held here. - unsigned short LastOpNum = 0; ///< OpNum on LastUse. - bool Dirty = false; ///< Register needs spill. + bool LiveOut = false; ///< Register is possibly live out. + bool Reloaded = false; ///< Register was reloaded. + bool Error = false; ///< Could not allocate. explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {} @@ -100,7 +105,13 @@ namespace { /// available in a physical register. LiveRegMap LiveVirtRegs; + /// Stores assigned virtual registers present in the bundle MI. + DenseMap<Register, MCPhysReg> BundleVirtRegsMap; + DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap; + /// List of DBG_VALUE that we encountered without the vreg being assigned + /// because they were placed after the last use of the vreg. + DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues; /// Has a bit set for every virtual register for which it was determined /// that it is alive across blocks. @@ -112,9 +123,13 @@ namespace { /// immediately without checking aliases. regFree, - /// A reserved register has been assigned explicitly (e.g., setting up a - /// call parameter), and it remains reserved until it is used. - regReserved + /// A pre-assigned register has been assigned before register allocation + /// (e.g., setting up a call parameter). + regPreAssigned, + + /// Used temporarily in reloadAtBegin() to mark register units that are + /// live-in to the basic block. + regLiveIn, /// A register state may also be a virtual register number, indication /// that the physical register is currently allocated to a virtual @@ -124,15 +139,17 @@ namespace { /// Maps each physical register to a RegUnitState enum or virtual register. std::vector<unsigned> RegUnitStates; - SmallVector<Register, 16> VirtDead; SmallVector<MachineInstr *, 32> Coalesced; using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>; /// Set of register units that are used in the current instruction, and so /// cannot be allocated. RegUnitSet UsedInInstr; + RegUnitSet PhysRegUses; + SmallVector<uint16_t, 8> DefOperandIndexes; void setPhysRegState(MCPhysReg PhysReg, unsigned NewState); + bool isPhysRegFree(MCPhysReg PhysReg) const; /// Mark a physreg as used in this instruction. void markRegUsedInInstr(MCPhysReg PhysReg) { @@ -141,13 +158,29 @@ namespace { } /// Check if a physreg or any of its aliases are used in this instruction. - bool isRegUsedInInstr(MCPhysReg PhysReg) const { - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { if (UsedInInstr.count(*Units)) return true; + if (LookAtPhysRegUses && PhysRegUses.count(*Units)) + return true; + } return false; } + /// Mark physical register as being used in a register use operand. + /// This is only used by the special livethrough handling code. + void markPhysRegUsedInInstr(MCPhysReg PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + PhysRegUses.insert(*Units); + } + + /// Remove mark of physical register being used in the instruction. + void unmarkRegUsedInInstr(MCPhysReg PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + UsedInInstr.erase(*Units); + } + enum : unsigned { spillClean = 50, spillDirty = 100, @@ -173,31 +206,29 @@ namespace { MachineFunctionProperties::Property::NoVRegs); } + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + private: bool runOnMachineFunction(MachineFunction &MF) override; void allocateBasicBlock(MachineBasicBlock &MBB); + + void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts, + Register Reg) const; + void allocateInstruction(MachineInstr &MI); void handleDebugValue(MachineInstr &MI); - void handleThroughOperands(MachineInstr &MI, - SmallVectorImpl<Register> &VirtDead); - bool isLastUseOfLocalReg(const MachineOperand &MO) const; - - void addKillFlag(const LiveReg &LRI); -#ifndef NDEBUG - bool verifyRegStateMapping(const LiveReg &LR) const; -#endif + void handleBundle(MachineInstr &MI); - void killVirtReg(LiveReg &LR); - void killVirtReg(Register VirtReg); - void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); - void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg); + bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + void freePhysReg(MCPhysReg PhysReg); - void usePhysReg(MachineOperand &MO); - void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, - unsigned NewState); unsigned calcSpillCost(MCPhysReg PhysReg) const; - void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); LiveRegMap::iterator findLiveVirtReg(Register VirtReg) { return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); @@ -207,21 +238,31 @@ namespace { return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } - void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint); + void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg); + void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint, + bool LookAtPhysRegUses = false); void allocVirtRegUndef(MachineOperand &MO); - MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, - Register Hint); - LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, - Register Hint); - void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut); - bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); + void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg, + MCPhysReg Reg); + void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg); + void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, + bool LookAtPhysRegUses = false); + void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg); + + MachineBasicBlock::iterator + getMBBBeginInsertionPoint(MachineBasicBlock &MBB, + SmallSet<Register, 2> &PrologLiveIns) const; + + void reloadAtBegin(MachineBasicBlock &MBB); + void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); Register traceCopies(Register VirtReg) const; Register traceCopyChain(Register Reg) const; int getStackSpaceFor(Register VirtReg); void spill(MachineBasicBlock::iterator Before, Register VirtReg, - MCPhysReg AssignedReg, bool Kill); + MCPhysReg AssignedReg, bool Kill, bool LiveOut); void reload(MachineBasicBlock::iterator Before, Register VirtReg, MCPhysReg PhysReg); @@ -243,6 +284,14 @@ void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { RegUnitStates[*UI] = NewState; } +bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != regFree) + return false; + } + return true; +} + /// This allocates space for the specified virtual register to be held on the /// stack. int RegAllocFast::getStackSpaceFor(Register VirtReg) { @@ -263,6 +312,20 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) { return FrameIdx; } +static bool dominates(MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator A, + MachineBasicBlock::const_iterator B) { + auto MBBEnd = MBB.end(); + if (B == MBBEnd) + return true; + + MachineBasicBlock::const_iterator I = MBB.begin(); + for (; &*I != A && &*I != B; ++I) + ; + + return &*I == A; +} + /// Returns false if \p VirtReg is known to not live out of the current block. bool RegAllocFast::mayLiveOut(Register VirtReg) { if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) { @@ -270,23 +333,38 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) { return !MBB->succ_empty(); } - // If this block loops back to itself, it would be necessary to check whether - // the use comes after the def. + const MachineInstr *SelfLoopDef = nullptr; + + // If this block loops back to itself, it is necessary to check whether the + // use comes after the def. if (MBB->isSuccessor(MBB)) { - MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); - return true; + SelfLoopDef = MRI->getUniqueVRegDef(VirtReg); + if (!SelfLoopDef) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } } // See if the first \p Limit uses of the register are all in the current // block. static const unsigned Limit = 8; unsigned C = 0; - for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) { + for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) { if (UseInst.getParent() != MBB || ++C >= Limit) { MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); // Cannot be live-out if there are no successors. return !MBB->succ_empty(); } + + if (SelfLoopDef) { + // Try to handle some simple cases to avoid spilling and reloading every + // value inside a self looping block. + if (SelfLoopDef == &UseInst || + !dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } + } } return false; @@ -313,7 +391,7 @@ bool RegAllocFast::mayLiveIn(Register VirtReg) { /// Insert spill instruction for \p AssignedReg before \p Before. Update /// DBG_VALUEs with \p VirtReg operands with the stack slot. void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, - MCPhysReg AssignedReg, bool Kill) { + MCPhysReg AssignedReg, bool Kill, bool LiveOut) { LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " in " << printReg(AssignedReg, TRI)); int FI = getStackSpaceFor(VirtReg); @@ -323,15 +401,32 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); ++NumStores; - // If this register is used by DBG_VALUE then insert new DBG_VALUE to - // identify spilled location as the place to find corresponding variable's - // value. + MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator(); + + // When we spill a virtual register, we will have spill instructions behind + // every definition of it, meaning we can switch all the DBG_VALUEs over + // to just reference the stack slot. SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg]; for (MachineInstr *DBG : LRIDbgValues) { MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI); assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV); + + if (LiveOut) { + // We need to insert a DBG_VALUE at the end of the block if the spill slot + // is live out, but there is another use of the value after the + // spill. This will allow LiveDebugValues to see the correct live out + // value to propagate to the successors. + MachineInstr *ClonedDV = MBB->getParent()->CloneMachineInstr(NewDV); + MBB->insert(FirstTerm, ClonedDV); + LLVM_DEBUG(dbgs() << "Cloning debug info due to live out spill\n"); + } + + // Rewrite unassigned dbg_values to use the stack slot. + MachineOperand &MO = DBG->getOperand(0); + if (MO.isReg() && MO.getReg() == 0) + updateDbgValueForSpill(*DBG, FI); } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value @@ -350,113 +445,75 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg, ++NumLoads; } -/// Return true if MO is the only remaining reference to its virtual register, -/// and it is guaranteed to be a block-local register. -bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const { - // If the register has ever been spilled or reloaded, we conservatively assume - // it is a global register used in multiple blocks. - if (StackSlotForVirtReg[MO.getReg()] != -1) - return false; - - // Check that the use/def chain has exactly one operand - MO. - MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); - if (&*I != &MO) - return false; - return ++I == MRI->reg_nodbg_end(); -} - -/// Set kill flags on last use of a virtual register. -void RegAllocFast::addKillFlag(const LiveReg &LR) { - if (!LR.LastUse) return; - MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); - if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { - if (MO.getReg() == LR.PhysReg) - MO.setIsKill(); - // else, don't do anything we are problably redefining a - // subreg of this register and given we don't track which - // lanes are actually dead, we cannot insert a kill flag here. - // Otherwise we may end up in a situation like this: - // ... = (MO) physreg:sub1, implicit killed physreg - // ... <== Here we would allow later pass to reuse physreg:sub1 - // which is potentially wrong. - // LR:sub0 = ... - // ... = LR.sub1 <== This is going to use physreg:sub1 - } -} - -#ifndef NDEBUG -bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { - for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { - if (RegUnitStates[*UI] != LR.VirtReg) - return false; - } +/// Get basic block begin insertion point. +/// This is not just MBB.begin() because surprisingly we have EH_LABEL +/// instructions marking the begin of a basic block. This means we must insert +/// new instructions after such labels... +MachineBasicBlock::iterator +RegAllocFast::getMBBBeginInsertionPoint( + MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const { + MachineBasicBlock::iterator I = MBB.begin(); + while (I != MBB.end()) { + if (I->isLabel()) { + ++I; + continue; + } - return true; -} -#endif + // Most reloads should be inserted after prolog instructions. + if (!TII->isBasicBlockPrologue(*I)) + break; -/// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(LiveReg &LR) { - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); - addKillFlag(LR); - MCPhysReg PhysReg = LR.PhysReg; - setPhysRegState(PhysReg, regFree); - LR.PhysReg = 0; -} + // However if a prolog instruction reads a register that needs to be + // reloaded, the reload should be inserted before the prolog. + for (MachineOperand &MO : I->operands()) { + if (MO.isReg()) + PrologLiveIns.insert(MO.getReg()); + } -/// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(Register VirtReg) { - assert(Register::isVirtualRegister(VirtReg) && - "killVirtReg needs a virtual register"); - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - if (LRI != LiveVirtRegs.end() && LRI->PhysReg) - killVirtReg(*LRI); -} + ++I; + } -/// This method spills the value specified by VirtReg into the corresponding -/// stack slot if needed. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, - Register VirtReg) { - assert(Register::isVirtualRegister(VirtReg) && - "Spilling a physical register is illegal!"); - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Spilling unmapped virtual register"); - spillVirtReg(MI, *LRI); + return I; } -/// Do the actual work of spilling. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); - - MCPhysReg PhysReg = LR.PhysReg; +/// Reload all currently assigned virtual registers. +void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) { + if (LiveVirtRegs.empty()) + return; - if (LR.Dirty) { - // If this physreg is used by the instruction, we want to kill it on the - // instruction, not on the spill. - bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; - LR.Dirty = false; + for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) { + MCPhysReg Reg = P.PhysReg; + // Set state to live-in. This possibly overrides mappings to virtual + // registers but we don't care anymore at this point. + setPhysRegState(Reg, regLiveIn); + } - spill(MI, LR.VirtReg, PhysReg, SpillKill); - if (SpillKill) - LR.LastUse = nullptr; // Don't kill register again - } - killVirtReg(LR); -} + SmallSet<Register, 2> PrologLiveIns; -/// Spill all dirty virtregs without killing them. -void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) { - if (LiveVirtRegs.empty()) - return; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order // of spilling here is deterministic, if arbitrary. - for (LiveReg &LR : LiveVirtRegs) { - if (!LR.PhysReg) + MachineBasicBlock::iterator InsertBefore + = getMBBBeginInsertionPoint(MBB, PrologLiveIns); + for (const LiveReg &LR : LiveVirtRegs) { + MCPhysReg PhysReg = LR.PhysReg; + if (PhysReg == 0) continue; - if (OnlyLiveOut && !mayLiveOut(LR.VirtReg)) + + MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI); + if (RegUnitStates[FirstUnit] == regLiveIn) continue; - spillVirtReg(MI, LR); + + assert((&MBB != &MBB.getParent()->front() || IgnoreMissingDefs) && + "no reload in start block. Missing vreg def?"); + + if (PrologLiveIns.count(PhysReg)) { + // FIXME: Theoretically this should use an insert point skipping labels + // but I'm not sure how labels should interact with prolog instruction + // that need reloads. + reload(MBB.begin(), LR.VirtReg, PhysReg); + } else + reload(InsertBefore, LR.VirtReg, PhysReg); } LiveVirtRegs.clear(); } @@ -464,51 +521,74 @@ void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) { /// Handle the direct use of a physical register. Check that the register is /// not used by a virtreg. Kill the physreg, marking it free. This may add /// implicit kills to MO->getParent() and invalidate MO. -void RegAllocFast::usePhysReg(MachineOperand &MO) { - // Ignore undef uses. - if (MO.isUndef()) - return; - - Register PhysReg = MO.getReg(); - assert(PhysReg.isPhysical() && "Bad usePhysReg operand"); - - markRegUsedInInstr(PhysReg); - - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - switch (RegUnitStates[*UI]) { - case regReserved: - RegUnitStates[*UI] = regFree; - LLVM_FALLTHROUGH; - case regFree: - break; - default: - llvm_unreachable("Unexpected reg unit state"); - } - } +bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) { + assert(Register::isPhysicalRegister(Reg) && "expected physreg"); + bool displacedAny = displacePhysReg(MI, Reg); + setPhysRegState(Reg, regPreAssigned); + markRegUsedInInstr(Reg); + return displacedAny; +} - // All aliases are disabled, bring register into working set. - setPhysRegState(PhysReg, regFree); - MO.setIsKill(); +bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) { + bool displacedAny = displacePhysReg(MI, Reg); + setPhysRegState(Reg, regPreAssigned); + return displacedAny; } /// Mark PhysReg as reserved or free after spilling any virtregs. This is very /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. -void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, - MCPhysReg PhysReg, unsigned NewState) { +bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) { + bool displacedAny = false; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - switch (unsigned VirtReg = RegUnitStates[*UI]) { - default: - spillVirtReg(MI, VirtReg); + unsigned Unit = *UI; + switch (unsigned VirtReg = RegUnitStates[Unit]) { + default: { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "datastructures in sync"); + MachineBasicBlock::iterator ReloadBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + reload(ReloadBefore, VirtReg, LRI->PhysReg); + + setPhysRegState(LRI->PhysReg, regFree); + LRI->PhysReg = 0; + LRI->Reloaded = true; + displacedAny = true; + break; + } + case regPreAssigned: + RegUnitStates[Unit] = regFree; + displacedAny = true; break; case regFree: - case regReserved: break; } } + return displacedAny; +} - markRegUsedInInstr(PhysReg); - setPhysRegState(PhysReg, NewState); +void RegAllocFast::freePhysReg(MCPhysReg PhysReg) { + LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':'); + + MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI); + switch (unsigned VirtReg = RegUnitStates[FirstUnit]) { + case regFree: + LLVM_DEBUG(dbgs() << '\n'); + return; + case regPreAssigned: + LLVM_DEBUG(dbgs() << '\n'); + setPhysRegState(PhysReg, regFree); + return; + default: { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end()); + LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n'); + setPhysRegState(LRI->PhysReg, regFree); + LRI->PhysReg = 0; + } + return; + } } /// Return the cost of spilling clearing out PhysReg and aliases so it is free @@ -516,35 +596,61 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, /// disabled - it can be allocated directly. /// \returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { - if (isRegUsedInInstr(PhysReg)) { - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) - << " is already used in instr.\n"); - return spillImpossible; - } - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { switch (unsigned VirtReg = RegUnitStates[*UI]) { case regFree: break; - case regReserved: - LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " - << printReg(PhysReg, TRI) << " is reserved already.\n"); + case regPreAssigned: + LLVM_DEBUG(dbgs() << "Cannot spill pre-assigned " + << printReg(PhysReg, TRI) << '\n'); return spillImpossible; default: { - LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Missing VirtReg entry"); - return LRI->Dirty ? spillDirty : spillClean; + bool SureSpill = StackSlotForVirtReg[VirtReg] != -1 || + findLiveVirtReg(VirtReg)->LiveOut; + return SureSpill ? spillClean : spillDirty; } } } return 0; } +void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition, + Register VirtReg, MCPhysReg Reg) { + auto UDBGValIter = DanglingDbgValues.find(VirtReg); + if (UDBGValIter == DanglingDbgValues.end()) + return; + + SmallVectorImpl<MachineInstr*> &Dangling = UDBGValIter->second; + for (MachineInstr *DbgValue : Dangling) { + assert(DbgValue->isDebugValue()); + MachineOperand &MO = DbgValue->getOperand(0); + if (!MO.isReg()) + continue; + + // Test whether the physreg survives from the definition to the DBG_VALUE. + MCPhysReg SetToReg = Reg; + unsigned Limit = 20; + for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()), + E = DbgValue->getIterator(); I != E; ++I) { + if (I->modifiesRegister(Reg, TRI) || --Limit == 0) { + LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue + << '\n'); + SetToReg = 0; + break; + } + } + MO.setReg(SetToReg); + if (SetToReg != 0) + MO.setIsRenamable(); + } + Dangling.clear(); +} + /// This method updates local state so that we know that PhysReg is the /// proper container for VirtReg now. The physical register must not be used /// for anything else when this is called. -void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { +void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR, + MCPhysReg PhysReg) { Register VirtReg = LR.VirtReg; LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to " << printReg(PhysReg, TRI) << '\n'); @@ -552,6 +658,8 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { assert(PhysReg != 0 && "Trying to assign no register"); LR.PhysReg = PhysReg; setPhysRegState(PhysReg, VirtReg); + + assignDanglingDebugValues(AtMI, VirtReg, PhysReg); } static bool isCoalescable(const MachineInstr &MI) { @@ -595,11 +703,10 @@ Register RegAllocFast::traceCopies(Register VirtReg) const { } /// Allocates a physical register for VirtReg. -void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { +void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, + Register Hint0, bool LookAtPhysRegUses) { const Register VirtReg = LR.VirtReg; - - assert(Register::isVirtualRegister(VirtReg) && - "Can only allocate virtual registers"); + assert(LR.PhysReg == 0); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg) @@ -607,41 +714,36 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { << " with hint " << printReg(Hint0, TRI) << '\n'); // Take hint when possible. - if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && - RC.contains(Hint0)) { - // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint0); - if (Cost < spillDirty) { + if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0) && + !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) { + // Take hint if the register is currently free. + if (isPhysRegFree(Hint0)) { LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) << '\n'); - if (Cost) - definePhysReg(MI, Hint0, regFree); - assignVirtToPhysReg(LR, Hint0); + assignVirtToPhysReg(MI, LR, Hint0); return; } else { - LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) - << "occupied\n"); + LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI) + << " occupied\n"); } } else { Hint0 = Register(); } + // Try other hint. Register Hint1 = traceCopies(VirtReg); - if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && - RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) { - // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint1); - if (Cost < spillDirty) { + if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) && + !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) { + // Take hint if the register is currently free. + if (isPhysRegFree(Hint1)) { LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) - << '\n'); - if (Cost) - definePhysReg(MI, Hint1, regFree); - assignVirtToPhysReg(LR, Hint1); + << '\n'); + assignVirtToPhysReg(MI, LR, Hint1); return; } else { - LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) - << "occupied\n"); + LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI) + << " occupied\n"); } } else { Hint1 = Register(); @@ -652,15 +754,20 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); for (MCPhysReg PhysReg : AllocationOrder) { LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' '); + if (isRegUsedInInstr(PhysReg, LookAtPhysRegUses)) { + LLVM_DEBUG(dbgs() << "already used in instr.\n"); + continue; + } + unsigned Cost = calcSpillCost(PhysReg); LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n'); // Immediate take a register with cost 0. if (Cost == 0) { - assignVirtToPhysReg(LR, PhysReg); + assignVirtToPhysReg(MI, LR, PhysReg); return; } - if (PhysReg == Hint1 || PhysReg == Hint0) + if (PhysReg == Hint0 || PhysReg == Hint1) Cost -= spillPrefBonus; if (Cost < BestCost) { @@ -676,13 +783,14 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { MI.emitError("inline assembly requires more registers than available"); else MI.emitError("ran out of registers during register allocation"); - definePhysReg(MI, *AllocationOrder.begin(), regFree); - assignVirtToPhysReg(LR, *AllocationOrder.begin()); + + LR.Error = true; + LR.PhysReg = 0; return; } - definePhysReg(MI, BestReg, regFree); - assignVirtToPhysReg(LR, BestReg); + displacePhysReg(MI, BestReg); + assignVirtToPhysReg(MI, LR, BestReg); } void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { @@ -710,212 +818,173 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { MO.setIsRenamable(true); } -/// Allocates a register for VirtReg and mark it as dirty. -MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, - Register VirtReg, Register Hint) { - assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); +/// Variation of defineVirtReg() with special handling for livethrough regs +/// (tied or earlyclobber) that may interfere with preassigned uses. +void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg) { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + if (LRI != LiveVirtRegs.end()) { + MCPhysReg PrevReg = LRI->PhysReg; + if (PrevReg != 0 && isRegUsedInInstr(PrevReg, true)) { + LLVM_DEBUG(dbgs() << "Need new assignment for " << printReg(PrevReg, TRI) + << " (tied/earlyclobber resolution)\n"); + freePhysReg(PrevReg); + LRI->PhysReg = 0; + allocVirtReg(MI, *LRI, 0, true); + MachineBasicBlock::iterator InsertBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to " + << printReg(PrevReg, TRI) << '\n'); + BuildMI(*MBB, InsertBefore, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY), PrevReg) + .addReg(LRI->PhysReg, llvm::RegState::Kill); + } + MachineOperand &MO = MI.getOperand(OpNum); + if (MO.getSubReg() && !MO.isUndef()) { + LRI->LastUse = &MI; + } + } + return defineVirtReg(MI, OpNum, VirtReg, true); +} + +/// Allocates a register for VirtReg definition. Typically the register is +/// already assigned from a use of the virtreg, however we still need to +/// perform an allocation if: +/// - It is a dead definition without any uses. +/// - The value is live out and all uses are in different basic blocks. +void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg, bool LookAtPhysRegUses) { + assert(VirtReg.isVirtual() && "Not a virtual register"); + MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - if (!LRI->PhysReg) { - // If there is no hint, peek at the only use of this register. - if ((!Hint || !Hint.isPhysical()) && - MRI->hasOneNonDBGUse(VirtReg)) { - const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); - // It's a copy, use the destination register as a hint. - if (UseMI.isCopyLike()) - Hint = UseMI.getOperand(0).getReg(); + if (New) { + if (!MO.isDead()) { + if (mayLiveOut(VirtReg)) { + LRI->LiveOut = true; + } else { + // It is a dead def without the dead flag; add the flag now. + MO.setIsDead(true); + } } - allocVirtReg(MI, *LRI, Hint); - } else if (LRI->LastUse) { - // Redefining a live register - kill at the last use, unless it is this - // instruction defining VirtReg multiple times. - if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) - addKillFlag(*LRI); } - assert(LRI->PhysReg && "Register not assigned"); - LRI->LastUse = &MI; - LRI->LastOpNum = OpNum; - LRI->Dirty = true; - markRegUsedInInstr(LRI->PhysReg); - return LRI->PhysReg; + if (LRI->PhysReg == 0) + allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses); + else { + assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) && + "TODO: preassign mismatch"); + LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI) + << " use existing assignment to " + << printReg(LRI->PhysReg, TRI) << '\n'); + } + + MCPhysReg PhysReg = LRI->PhysReg; + assert(PhysReg != 0 && "Register not assigned"); + if (LRI->Reloaded || LRI->LiveOut) { + if (!MI.isImplicitDef()) { + MachineBasicBlock::iterator SpillBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: " + << LRI->Reloaded << '\n'); + bool Kill = LRI->LastUse == nullptr; + spill(SpillBefore, VirtReg, PhysReg, Kill, LRI->LiveOut); + LRI->LastUse = nullptr; + } + LRI->LiveOut = false; + LRI->Reloaded = false; + } + if (MI.getOpcode() == TargetOpcode::BUNDLE) { + BundleVirtRegsMap[VirtReg] = PhysReg; + } + markRegUsedInInstr(PhysReg); + setPhysReg(MI, MO, PhysReg); } -/// Make sure VirtReg is available in a physreg and return it. -RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI, - unsigned OpNum, - Register VirtReg, - Register Hint) { - assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); +/// Allocates a register for a VirtReg use. +void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg) { + assert(VirtReg.isVirtual() && "Not a virtual register"); + MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - MachineOperand &MO = MI.getOperand(OpNum); - if (!LRI->PhysReg) { - allocVirtReg(MI, *LRI, Hint); - reload(MI, VirtReg, LRI->PhysReg); - } else if (LRI->Dirty) { - if (isLastUseOfLocalReg(MO)) { - LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n'); - if (MO.isUse()) - MO.setIsKill(); - else - MO.setIsDead(); - } else if (MO.isKill()) { - LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n'); - MO.setIsKill(false); - } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n'); - MO.setIsDead(false); + if (New) { + MachineOperand &MO = MI.getOperand(OpNum); + if (!MO.isKill()) { + if (mayLiveOut(VirtReg)) { + LRI->LiveOut = true; + } else { + // It is a last (killing) use without the kill flag; add the flag now. + MO.setIsKill(true); + } } - } else if (MO.isKill()) { - // We must remove kill flags from uses of reloaded registers because the - // register would be killed immediately, and there might be a second use: - // %foo = OR killed %x, %x - // This would cause a second reload of %x into a different register. - LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n'); - MO.setIsKill(false); - } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n'); - MO.setIsDead(false); + } else { + assert((!MO.isKill() || LRI->LastUse == &MI) && "Invalid kill flag"); } - assert(LRI->PhysReg && "Register not assigned"); + + // If necessary allocate a register. + if (LRI->PhysReg == 0) { + assert(!MO.isTied() && "tied op should be allocated"); + Register Hint; + if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) { + Hint = MI.getOperand(0).getReg(); + assert(Hint.isPhysical() && + "Copy destination should already be assigned"); + } + allocVirtReg(MI, *LRI, Hint, false); + if (LRI->Error) { + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); + setPhysReg(MI, MO, *AllocationOrder.begin()); + return; + } + } + LRI->LastUse = &MI; - LRI->LastOpNum = OpNum; + + if (MI.getOpcode() == TargetOpcode::BUNDLE) { + BundleVirtRegsMap[VirtReg] = LRI->PhysReg; + } markRegUsedInInstr(LRI->PhysReg); - return *LRI; + setPhysReg(MI, MO, LRI->PhysReg); } /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This /// may invalidate any operand pointers. Return true if the operand kills its /// register. -bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, +void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg) { - bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); MO.setIsRenamable(true); - return MO.isKill() || Dead; + return; } // Handle subregister index. - MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register()); + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : MCRegister()); MO.setIsRenamable(true); - MO.setSubReg(0); + // Note: We leave the subreg number around a little longer in case of defs. + // This is so that the register freeing logic in allocateInstruction can still + // recognize this as subregister defs. The code there will clear the number. + if (!MO.isDef()) + MO.setSubReg(0); // A kill flag implies killing the full register. Add corresponding super // register kill. if (MO.isKill()) { MI.addRegisterKilled(PhysReg, TRI, true); - return true; + return; } // A <def,read-undef> of a sub-register requires an implicit def of the full // register. - if (MO.isDef() && MO.isUndef()) - MI.addRegisterDefined(PhysReg, TRI); - - return Dead; -} - -// Handles special instruction operand like early clobbers and tied ops when -// there are additional physreg defines. -void RegAllocFast::handleThroughOperands(MachineInstr &MI, - SmallVectorImpl<Register> &VirtDead) { - LLVM_DEBUG(dbgs() << "Scanning for through registers:"); - SmallSet<Register, 8> ThroughRegs; - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Reg.isVirtual()) - continue; - if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || - (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { - if (ThroughRegs.insert(Reg).second) - LLVM_DEBUG(dbgs() << ' ' << printReg(Reg)); - } - } - - // If any physreg defines collide with preallocated through registers, - // we must spill and reallocate. - LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) continue; - Register Reg = MO.getReg(); - if (!Reg || !Reg.isPhysical()) - continue; - markRegUsedInInstr(Reg); - - for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) { - if (!ThroughRegs.count(RegUnitStates[*UI])) - continue; - - // Need to spill any aliasing registers. - for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { - for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) { - definePhysReg(MI, *SI, regFree); - } - } - } - } - - SmallVector<Register, 8> PartialDefs; - LLVM_DEBUG(dbgs() << "Allocating tied uses.\n"); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) - continue; - if (MO.isUse()) { - if (!MO.isTied()) continue; - LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO - << ") is tied to operand " << MI.findTiedOperandIdx(I) - << ".\n"); - LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); - MCPhysReg PhysReg = LR.PhysReg; - setPhysReg(MI, MO, PhysReg); - // Note: we don't update the def operand yet. That would cause the normal - // def-scan to attempt spilling. - } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) { - LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n'); - // Reload the register, but don't assign to the operand just yet. - // That would confuse the later phys-def processing pass. - LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); - PartialDefs.push_back(LR.PhysReg); - } - } - - LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n"); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) - continue; - if (!MO.isEarlyClobber()) - continue; - // Note: defineVirtReg may invalidate MO. - MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0); - if (setPhysReg(MI, MI.getOperand(I), PhysReg)) - VirtDead.push_back(Reg); - } - - // Restore UsedInInstr to a state usable for allocating normal virtual uses. - UsedInInstr.clear(); - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; - Register Reg = MO.getReg(); - if (!Reg || !Reg.isPhysical()) - continue; - LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) - << " as used in instr\n"); - markRegUsedInInstr(Reg); + if (MO.isDef() && MO.isUndef()) { + if (MO.isDead()) + MI.addRegisterDead(PhysReg, TRI, true); + else + MI.addRegisterDefined(PhysReg, TRI); } - - // Also mark PartialDefs as used to avoid reallocation. - for (Register PartialDef : PartialDefs) - markRegUsedInInstr(PartialDef); } #ifndef NDEBUG @@ -926,15 +995,21 @@ void RegAllocFast::dumpState() const { switch (unsigned VirtReg = RegUnitStates[Unit]) { case regFree: break; - case regReserved: + case regPreAssigned: dbgs() << " " << printRegUnit(Unit, TRI) << "[P]"; break; + case regLiveIn: + llvm_unreachable("Should not have regLiveIn in map"); default: { dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg); LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry"); - if (I->Dirty) - dbgs() << "[D]"; + if (I->LiveOut || I->Reloaded) { + dbgs() << '['; + if (I->LiveOut) dbgs() << 'O'; + if (I->Reloaded) dbgs() << 'R'; + dbgs() << ']'; + } assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present"); break; } @@ -957,111 +1032,277 @@ void RegAllocFast::dumpState() const { } #endif -void RegAllocFast::allocateInstruction(MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // If this is a copy, we may be able to coalesce. - Register CopySrcReg; - Register CopyDstReg; - unsigned CopySrcSub = 0; - unsigned CopyDstSub = 0; - if (MI.isCopy()) { - CopyDstReg = MI.getOperand(0).getReg(); - CopySrcReg = MI.getOperand(1).getReg(); - CopyDstSub = MI.getOperand(0).getSubReg(); - CopySrcSub = MI.getOperand(1).getSubReg(); +/// Count number of defs consumed from each register class by \p Reg +void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts, + Register Reg) const { + assert(RegClassDefCounts.size() == TRI->getNumRegClasses()); + + if (Reg.isVirtual()) { + const TargetRegisterClass *OpRC = MRI->getRegClass(Reg); + for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses(); + RCIdx != RCIdxEnd; ++RCIdx) { + const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx); + // FIXME: Consider aliasing sub/super registers. + if (OpRC->hasSubClassEq(IdxRC)) + ++RegClassDefCounts[RCIdx]; + } + + return; } - // Track registers used by instruction. - UsedInInstr.clear(); + for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses(); + RCIdx != RCIdxEnd; ++RCIdx) { + const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx); + for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) { + if (IdxRC->contains(*Alias)) { + ++RegClassDefCounts[RCIdx]; + break; + } + } + } +} - // First scan. - // Mark physreg uses and early clobbers as used. - // Find the end of the virtreg operands - unsigned VirtOpEnd = 0; - bool hasTiedOps = false; - bool hasEarlyClobbers = false; - bool hasPartialRedefs = false; - bool hasPhysDefs = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - // Make sure MRI knows about registers clobbered by regmasks. - if (MO.isRegMask()) { - MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - continue; +void RegAllocFast::allocateInstruction(MachineInstr &MI) { + // The basic algorithm here is: + // 1. Mark registers of def operands as free + // 2. Allocate registers to use operands and place reload instructions for + // registers displaced by the allocation. + // + // However we need to handle some corner cases: + // - pre-assigned defs and uses need to be handled before the other def/use + // operands are processed to avoid the allocation heuristics clashing with + // the pre-assignment. + // - The "free def operands" step has to come last instead of first for tied + // operands and early-clobbers. + + UsedInInstr.clear(); + BundleVirtRegsMap.clear(); + + // Scan for special cases; Apply pre-assigned register defs to state. + bool HasPhysRegUse = false; + bool HasRegMask = false; + bool HasVRegDef = false; + bool HasDef = false; + bool HasEarlyClobber = false; + bool NeedToAssignLiveThroughs = false; + for (MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { + Register Reg = MO.getReg(); + if (Reg.isVirtual()) { + if (MO.isDef()) { + HasDef = true; + HasVRegDef = true; + if (MO.isEarlyClobber()) { + HasEarlyClobber = true; + NeedToAssignLiveThroughs = true; + } + if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef())) + NeedToAssignLiveThroughs = true; + } + } else if (Reg.isPhysical()) { + if (!MRI->isReserved(Reg)) { + if (MO.isDef()) { + HasDef = true; + bool displacedAny = definePhysReg(MI, Reg); + if (MO.isEarlyClobber()) + HasEarlyClobber = true; + if (!displacedAny) + MO.setIsDead(true); + } + if (MO.readsReg()) + HasPhysRegUse = true; + } + } + } else if (MO.isRegMask()) { + HasRegMask = true; } - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Reg) continue; - if (Register::isVirtualRegister(Reg)) { - VirtOpEnd = i+1; - if (MO.isUse()) { - hasTiedOps = hasTiedOps || - MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; + } + + // Allocate virtreg defs. + if (HasDef) { + if (HasVRegDef) { + // Special handling for early clobbers, tied operands or subregister defs: + // Compared to "normal" defs these: + // - Must not use a register that is pre-assigned for a use operand. + // - In order to solve tricky inline assembly constraints we change the + // heuristic to figure out a good operand order before doing + // assignments. + if (NeedToAssignLiveThroughs) { + DefOperandIndexes.clear(); + PhysRegUses.clear(); + + // Track number of defs which may consume a register from the class. + std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0); + assert(RegClassDefCounts[0] == 0); + + LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n"); + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (MO.readsReg()) { + if (Reg.isPhysical()) { + LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI) + << '\n'); + markPhysRegUsedInInstr(Reg); + } + } + + if (MO.isDef()) { + if (Reg.isVirtual()) + DefOperandIndexes.push_back(I); + + addRegClassDefCounts(RegClassDefCounts, Reg); + } + } + + llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) { + const MachineOperand &MO0 = MI.getOperand(I0); + const MachineOperand &MO1 = MI.getOperand(I1); + Register Reg0 = MO0.getReg(); + Register Reg1 = MO1.getReg(); + const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0); + const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1); + + // Identify regclass that are easy to use up completely just in this + // instruction. + unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size(); + unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size(); + + bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()]; + bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()]; + if (SmallClass0 > SmallClass1) + return true; + if (SmallClass0 < SmallClass1) + return false; + + // Allocate early clobbers and livethrough operands first. + bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() || + (MO0.getSubReg() == 0 && !MO0.isUndef()); + bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() || + (MO1.getSubReg() == 0 && !MO1.isUndef()); + if (Livethrough0 > Livethrough1) + return true; + if (Livethrough0 < Livethrough1) + return false; + + // Tie-break rule: operand index. + return I0 < I1; + }); + + for (uint16_t OpIdx : DefOperandIndexes) { + MachineOperand &MO = MI.getOperand(OpIdx); + LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); + unsigned Reg = MO.getReg(); + if (MO.isEarlyClobber() || MO.isTied() || + (MO.getSubReg() && !MO.isUndef())) { + defineLiveThroughVirtReg(MI, OpIdx, Reg); + } else { + defineVirtReg(MI, OpIdx, Reg); + } + } } else { - if (MO.isEarlyClobber()) - hasEarlyClobbers = true; - if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) - hasPartialRedefs = true; + // Assign virtual register defs. + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (Reg.isVirtual()) + defineVirtReg(MI, I, Reg); + } } - continue; } - if (!MRI->isAllocatable(Reg)) continue; - if (MO.isUse()) { - usePhysReg(MO); - } else if (MO.isEarlyClobber()) { - definePhysReg(MI, Reg, - (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); - hasEarlyClobbers = true; - } else - hasPhysDefs = true; + + // Free registers occupied by defs. + // Iterate operands in reverse order, so we see the implicit super register + // defs first (we added them earlier in case of <def,read-undef>). + for (unsigned I = MI.getNumOperands(); I-- > 0;) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + + // subreg defs don't free the full register. We left the subreg number + // around as a marker in setPhysReg() to recognize this case here. + if (MO.getSubReg() != 0) { + MO.setSubReg(0); + continue; + } + + // Do not free tied operands and early clobbers. + if (MO.isTied() || MO.isEarlyClobber()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + assert(Reg.isPhysical()); + if (MRI->isReserved(Reg)) + continue; + freePhysReg(Reg); + unmarkRegUsedInInstr(Reg); + } } - // The instruction may have virtual register operands that must be allocated - // the same register at use-time and def-time: early clobbers and tied - // operands. If there are also physical defs, these registers must avoid - // both physical defs and uses, making them more constrained than normal - // operands. - // Similarly, if there are multiple defs and tied operands, we must make - // sure the same register is allocated to uses and defs. - // We didn't detect inline asm tied operands above, so just make this extra - // pass for all inline asm. - if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { - handleThroughOperands(MI, VirtDead); - // Don't attempt coalescing when we have funny stuff going on. - CopyDstReg = Register(); - // Pretend we have early clobbers so the use operands get marked below. - // This is not necessary for the common case of a single tied use. - hasEarlyClobbers = true; + // Displace clobbered registers. + if (HasRegMask) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) { + // MRI bookkeeping. + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + + // Displace clobbered registers. + const uint32_t *Mask = MO.getRegMask(); + for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(), + LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) { + MCPhysReg PhysReg = LRI->PhysReg; + if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg)) + displacePhysReg(MI, PhysReg); + } + } + } } - // Second scan. - // Allocate virtreg uses. + // Apply pre-assigned register uses to state. + if (HasPhysRegUse) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg.isPhysical()) + continue; + if (MRI->isReserved(Reg)) + continue; + bool displacedAny = usePhysReg(MI, Reg); + if (!displacedAny && !MRI->isReserved(Reg)) + MO.setIsKill(true); + } + } + + // Allocate virtreg uses and insert reloads as necessary. bool HasUndefUse = false; - for (unsigned I = 0; I != VirtOpEnd; ++I) { + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; + if (!MO.isReg() || !MO.isUse()) + continue; Register Reg = MO.getReg(); if (!Reg.isVirtual()) continue; - if (MO.isUse()) { - if (MO.isUndef()) { - HasUndefUse = true; - // There is no need to allocate a register for an undef use. - continue; - } - // Populate MayLiveAcrossBlocks in case the use block is allocated before - // the def block (removing the vreg uses). - mayLiveIn(Reg); - - LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg); - MCPhysReg PhysReg = LR.PhysReg; - CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0; - if (setPhysReg(MI, MO, PhysReg)) - killVirtReg(LR); + if (MO.isUndef()) { + HasUndefUse = true; + continue; } + + + // Populate MayLiveAcrossBlocks in case the use block is allocated before + // the def block (removing the vreg uses). + mayLiveIn(Reg); + + + assert(!MO.isInternalRead() && "Bundles not supported"); + assert(MO.readsReg() && "reading use"); + useVirtReg(MI, I, Reg); } // Allocate undef operands. This is a separate step because in a situation @@ -1080,76 +1321,40 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { } } - // Track registers defined by instruction - early clobbers and tied uses at - // this point. - UsedInInstr.clear(); - if (hasEarlyClobbers) { - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Reg || !Reg.isPhysical()) + // Free early clobbers. + if (HasEarlyClobber) { + for (unsigned I = MI.getNumOperands(); I-- > 0; ) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber()) continue; - // Look for physreg defs and tied uses. - if (!MO.isDef() && !MO.isTied()) continue; - markRegUsedInInstr(Reg); - } - } - - unsigned DefOpEnd = MI.getNumOperands(); - if (MI.isCall()) { - // Spill all virtregs before a call. This serves one purpose: If an - // exception is thrown, the landing pad is going to expect to find - // registers in their spill slots. - // Note: although this is appealing to just consider all definitions - // as call-clobbered, this is not correct because some of those - // definitions may be used later on and we do not want to reuse - // those for virtual registers in between. - LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); - spillAll(MI, /*OnlyLiveOut*/ false); - } - - // Third scan. - // Mark all physreg defs as used before allocating virtreg defs. - for (unsigned I = 0; I != DefOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) - continue; - Register Reg = MO.getReg(); - - if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg)) - continue; - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); - } + // subreg defs don't free the full register. We left the subreg number + // around as a marker in setPhysReg() to recognize this case here. + if (MO.getSubReg() != 0) { + MO.setSubReg(0); + continue; + } - // Fourth scan. - // Allocate defs and collect dead defs. - for (unsigned I = 0; I != DefOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) - continue; - Register Reg = MO.getReg(); + Register Reg = MO.getReg(); + if (!Reg) + continue; + assert(Reg.isPhysical() && "should have register assigned"); + + // We sometimes get odd situations like: + // early-clobber %x0 = INSTRUCTION %x0 + // which is semantically questionable as the early-clobber should + // apply before the use. But in practice we consider the use to + // happen before the early clobber now. Don't free the early clobber + // register in this case. + if (MI.readsRegister(Reg, TRI)) + continue; - // We have already dealt with phys regs in the previous scan. - if (Reg.isPhysical()) - continue; - MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); - if (setPhysReg(MI, MI.getOperand(I), PhysReg)) { - VirtDead.push_back(Reg); - CopyDstReg = Register(); // cancel coalescing; - } else - CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0; + freePhysReg(Reg); + } } - // Kill dead defs after the scan to ensure that multiple defs of the same - // register are allocated identically. We didn't need to do this for uses - // because we are crerating our own kill flags, and they are always at the - // last use. - for (Register VirtReg : VirtDead) - killVirtReg(VirtReg); - VirtDead.clear(); - LLVM_DEBUG(dbgs() << "<< " << MI); - if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) { + if (MI.isCopy() && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && + MI.getNumOperands() == 2) { LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n"); Coalesced.push_back(&MI); } @@ -1166,23 +1371,22 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { if (!Register::isVirtualRegister(Reg)) return; + // Already spilled to a stackslot? + int SS = StackSlotForVirtReg[Reg]; + if (SS != -1) { + // Modify DBG_VALUE now that the value is in a spill slot. + updateDbgValueForSpill(MI, SS); + LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI); + return; + } + // See if this virtual register has already been allocated to a physical // register or spilled to a stack slot. LiveRegMap::iterator LRI = findLiveVirtReg(Reg); if (LRI != LiveVirtRegs.end() && LRI->PhysReg) { setPhysReg(MI, MO, LRI->PhysReg); } else { - int SS = StackSlotForVirtReg[Reg]; - if (SS != -1) { - // Modify DBG_VALUE now that the value is in a spill slot. - updateDbgValueForSpill(MI, SS); - LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI); - return; - } - - // We can't allocate a physreg for a DebugValue, sorry! - LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(Register()); + DanglingDbgValues[Reg].push_back(&MI); } // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so @@ -1190,6 +1394,30 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { LiveDbgValueMap[Reg].push_back(&MI); } +void RegAllocFast::handleBundle(MachineInstr &MI) { + MachineBasicBlock::instr_iterator BundledMI = MI.getIterator(); + ++BundledMI; + while (BundledMI->isBundledWithPred()) { + for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) { + MachineOperand &MO = BundledMI->getOperand(I); + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (!Reg.isVirtual()) + continue; + + DenseMap<Register, MCPhysReg>::iterator DI; + DI = BundleVirtRegsMap.find(Reg); + assert(DI != BundleVirtRegsMap.end() && "Unassigned virtual register"); + + setPhysReg(MI, MO, DI->second); + } + + ++BundledMI; + } +} + void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); @@ -1197,18 +1425,15 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); - MachineBasicBlock::iterator MII = MBB.begin(); - - // Add live-in registers as live. - for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins()) - if (MRI->isAllocatable(LI.PhysReg)) - definePhysReg(MII, LI.PhysReg, regReserved); + for (MachineBasicBlock *Succ : MBB.successors()) { + for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins()) + setPhysRegState(LI.PhysReg, regPreAssigned); + } - VirtDead.clear(); Coalesced.clear(); - // Otherwise, sequentially allocate each instruction in the MBB. - for (MachineInstr &MI : MBB) { + // Traverse block in reverse order allocating instructions one by one. + for (MachineInstr &MI : reverse(MBB)) { LLVM_DEBUG( dbgs() << "\n>> " << MI << "Regs:"; dumpState() @@ -1222,11 +1447,22 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { } allocateInstruction(MI); + + // Once BUNDLE header is assigned registers, same assignments need to be + // done for bundled MIs. + if (MI.getOpcode() == TargetOpcode::BUNDLE) { + handleBundle(MI); + } } + LLVM_DEBUG( + dbgs() << "Begin Regs:"; + dumpState() + ); + // Spill all physical registers holding virtual registers now. - LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n"); - spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true); + LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n"); + reloadAtBegin(MBB); // Erase all the coalesced copies. We are delaying it until now because // LiveVirtRegs might refer to the instrs. @@ -1234,6 +1470,20 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { MBB.erase(MI); NumCoalesced += Coalesced.size(); + for (auto &UDBGPair : DanglingDbgValues) { + for (MachineInstr *DbgValue : UDBGPair.second) { + assert(DbgValue->isDebugValue() && "expected DBG_VALUE"); + MachineOperand &MO = DbgValue->getOperand(0); + // Nothing to do if the vreg was spilled in the meantime. + if (!MO.isReg()) + continue; + LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue + << '\n'); + MO.setReg(0); + } + } + DanglingDbgValues.clear(); + LLVM_DEBUG(MBB.dump()); } @@ -1247,8 +1497,11 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { MFI = &MF.getFrameInfo(); MRI->freezeReservedRegs(MF); RegClassInfo.runOnMachineFunction(MF); + unsigned NumRegUnits = TRI->getNumRegUnits(); UsedInInstr.clear(); - UsedInInstr.setUniverse(TRI->getNumRegUnits()); + UsedInInstr.setUniverse(NumRegUnits); + PhysRegUses.clear(); + PhysRegUses.setUniverse(NumRegUnits); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 41cf00261265..166414e4ffa1 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -147,7 +147,7 @@ class RAGreedy : public MachineFunctionPass, // Convenient shortcuts. using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>; using SmallLISet = SmallPtrSet<LiveInterval *, 4>; - using SmallVirtRegSet = SmallSet<unsigned, 16>; + using SmallVirtRegSet = SmallSet<Register, 16>; // context MachineFunction *MF; @@ -172,6 +172,7 @@ class RAGreedy : public MachineFunctionPass, std::unique_ptr<Spiller> SpillerInstance; PQueue Queue; unsigned NextCascade; + std::unique_ptr<VirtRegAuxInfo> VRAI; // Live ranges pass through a number of stages as we try to allocate them. // Some of the stages may also create new live ranges: @@ -247,19 +248,19 @@ class RAGreedy : public MachineFunctionPass, IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; LiveRangeStage getStage(const LiveInterval &VirtReg) const { - return ExtraRegInfo[VirtReg.reg].Stage; + return ExtraRegInfo[VirtReg.reg()].Stage; } void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); - ExtraRegInfo[VirtReg.reg].Stage = Stage; + ExtraRegInfo[VirtReg.reg()].Stage = Stage; } template<typename Iterator> void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); for (;Begin != End; ++Begin) { - unsigned Reg = *Begin; + Register Reg = *Begin; if (ExtraRegInfo[Reg].Stage == RS_New) ExtraRegInfo[Reg].Stage = NewStage; } @@ -290,8 +291,8 @@ class RAGreedy : public MachineFunctionPass, public: using EvictorInfo = - std::pair<unsigned /* evictor */, unsigned /* physreg */>; - using EvicteeInfo = llvm::DenseMap<unsigned /* evictee */, EvictorInfo>; + std::pair<Register /* evictor */, MCRegister /* physreg */>; + using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>; private: /// Each Vreg that has been evicted in the last stage of selectOrSplit will @@ -307,14 +308,14 @@ class RAGreedy : public MachineFunctionPass, /// longer relevant. /// \param Evictee The evictee Vreg for whom we want to clear collected /// eviction info. - void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); } + void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); } /// Track new eviction. /// The Evictor vreg has evicted the Evictee vreg from Physreg. /// \param PhysReg The physical register Evictee was evicted from. /// \param Evictor The evictor Vreg that evicted Evictee. /// \param Evictee The evictee Vreg. - void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) { + void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) { Evictees[Evictee].first = Evictor; Evictees[Evictee].second = PhysReg; } @@ -323,7 +324,7 @@ class RAGreedy : public MachineFunctionPass, /// \param Evictee The evictee vreg. /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if /// nobody has evicted Evictee from PhysReg. - EvictorInfo getEvictor(unsigned Evictee) { + EvictorInfo getEvictor(Register Evictee) { if (Evictees.count(Evictee)) { return Evictees[Evictee]; } @@ -348,7 +349,7 @@ class RAGreedy : public MachineFunctionPass, /// Global live range splitting candidate info. struct GlobalSplitCandidate { // Register intended for assignment, or 0. - unsigned PhysReg; + MCRegister PhysReg; // SplitKit interval index for this candidate. unsigned IntvIdx; @@ -360,7 +361,7 @@ class RAGreedy : public MachineFunctionPass, BitVector LiveBundles; SmallVector<unsigned, 8> ActiveBlocks; - void reset(InterferenceCache &Cache, unsigned Reg) { + void reset(InterferenceCache &Cache, MCRegister Reg) { PhysReg = Reg; IntvIdx = 0; Intf.setPhysReg(Cache, Reg); @@ -368,12 +369,12 @@ class RAGreedy : public MachineFunctionPass, ActiveBlocks.clear(); } - // Set B[i] = C for every live bundle where B[i] was NoCand. + // Set B[I] = C for every live bundle where B[I] was NoCand. unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { unsigned Count = 0; - for (unsigned i : LiveBundles.set_bits()) - if (B[i] == NoCand) { - B[i] = C; + for (unsigned I : LiveBundles.set_bits()) + if (B[I] == NoCand) { + B[I] = C; Count++; } return Count; @@ -417,7 +418,8 @@ public: Spiller &spiller() override { return *SpillerInstance; } void enqueue(LiveInterval *LI) override; LiveInterval *dequeue() override; - Register selectOrSplit(LiveInterval&, SmallVectorImpl<Register>&) override; + MCRegister selectOrSplit(LiveInterval &, + SmallVectorImpl<Register> &) override; void aboutToRemoveInterval(LiveInterval &) override; /// Perform register allocation. @@ -428,15 +430,20 @@ public: MachineFunctionProperties::Property::NoPHIs); } + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + static char ID; private: - Register selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &, - SmallVirtRegSet &, unsigned = 0); + MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &, + SmallVirtRegSet &, unsigned = 0); - bool LRE_CanEraseVirtReg(unsigned) override; - void LRE_WillShrinkVirtReg(unsigned) override; - void LRE_DidCloneVirtReg(unsigned, unsigned) override; + bool LRE_CanEraseVirtReg(Register) override; + void LRE_WillShrinkVirtReg(Register) override; + void LRE_DidCloneVirtReg(Register, Register) override; void enqueue(PQueue &CurQueue, LiveInterval *LI); LiveInterval *dequeue(PQueue &CurQueue); @@ -444,7 +451,7 @@ private: bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); bool growRegion(GlobalSplitCandidate &Cand); - bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand, + bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand, unsigned BBNumber, const AllocationOrder &Order); bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, @@ -455,20 +462,20 @@ private: bool *CanCauseEvictionChain); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); - void calcGapWeights(unsigned, SmallVectorImpl<float>&); + void calcGapWeights(MCRegister, SmallVectorImpl<float> &); Register canReassign(LiveInterval &VirtReg, Register PrevReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); - bool canEvictInterference(LiveInterval&, Register, bool, EvictionCost&, - const SmallVirtRegSet&); - bool canEvictInterferenceInRange(LiveInterval &VirtReg, Register oPhysReg, + bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &, + const SmallVirtRegSet &); + bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost); - unsigned getCheapestEvicteeWeight(const AllocationOrder &Order, - LiveInterval &VirtReg, SlotIndex Start, - SlotIndex End, float *BestEvictWeight); - void evictInterference(LiveInterval&, Register, - SmallVectorImpl<Register>&); - bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, + MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, SlotIndex Start, + SlotIndex End, float *BestEvictWeight); + void evictInterference(LiveInterval &, MCRegister, + SmallVectorImpl<Register> &); + bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters); @@ -478,8 +485,8 @@ private: unsigned tryEvict(LiveInterval&, AllocationOrder&, SmallVectorImpl<Register>&, unsigned, const SmallVirtRegSet&); - unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<Register>&); + MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &); /// Calculate cost of region splitting. unsigned calculateRegionSplitCost(LiveInterval &VirtReg, AllocationOrder &Order, @@ -492,9 +499,10 @@ private: SmallVectorImpl<Register> &NewVRegs); /// Check other options before using a callee-saved register for the first /// time. - unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, - Register PhysReg, unsigned &CostPerUseLimit, - SmallVectorImpl<Register> &NewVRegs); + MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, + AllocationOrder &Order, MCRegister PhysReg, + unsigned &CostPerUseLimit, + SmallVectorImpl<Register> &NewVRegs); void initializeCSRCost(); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<Register>&); @@ -528,8 +536,8 @@ private: }; using HintsInfo = SmallVector<HintInfo, 4>; - BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); - void collectHintInfo(unsigned, HintsInfo &); + BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister); + void collectHintInfo(Register, HintsInfo &); bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; @@ -626,7 +634,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { +bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) { LiveInterval &LI = LIS->getInterval(VirtReg); if (VRM->hasPhys(VirtReg)) { Matrix->unassign(LI); @@ -641,7 +649,7 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { return false; } -void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { +void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) { if (!VRM->hasPhys(VirtReg)) return; @@ -651,7 +659,7 @@ void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { enqueue(&LI); } -void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { +void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) { // Cloning a register we haven't even heard about yet? Just ignore it. if (!ExtraRegInfo.inBounds(Old)) return; @@ -677,9 +685,8 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); - const unsigned Reg = LI->reg; - assert(Register::isVirtualRegister(Reg) && - "Can only enqueue virtual registers"); + const Register Reg = LI->reg(); + assert(Reg.isVirtual() && "Can only enqueue virtual registers"); unsigned Prio; ExtraRegInfo.grow(Reg); @@ -756,26 +763,33 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs, const SmallVirtRegSet &FixedRegisters) { - Order.rewind(); Register PhysReg; - while ((PhysReg = Order.next())) - if (!Matrix->checkInterference(VirtReg, PhysReg)) - break; - if (!PhysReg || Order.isHint()) + for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { + assert(*I); + if (!Matrix->checkInterference(VirtReg, *I)) { + if (I.isHint()) + return *I; + else + PhysReg = *I; + } + } + if (!PhysReg.isValid()) return PhysReg; // PhysReg is available, but there may be a better choice. // If we missed a simple hint, try to cheaply evict interference from the // preferred register. - if (Register Hint = MRI->getSimpleHint(VirtReg.reg)) + if (Register Hint = MRI->getSimpleHint(VirtReg.reg())) if (Order.isHint(Hint)) { - LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n'); + MCRegister PhysHint = Hint.asMCReg(); + LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n'); EvictionCost MaxCost; MaxCost.setBrokenHints(1); - if (canEvictInterference(VirtReg, Hint, true, MaxCost, FixedRegisters)) { - evictInterference(VirtReg, Hint, NewVRegs); - return Hint; + if (canEvictInterference(VirtReg, PhysHint, true, MaxCost, + FixedRegisters)) { + evictInterference(VirtReg, PhysHint, NewVRegs); + return PhysHint; } // Record the missed hint, we may be able to recover // at the end if the surrounding allocation changed. @@ -800,13 +814,14 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, //===----------------------------------------------------------------------===// Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); - Register PhysReg; - while ((PhysReg = Order.next())) { - if (PhysReg == PrevReg) + auto Order = + AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); + MCRegister PhysReg; + for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { + if ((*I).id() == PrevReg.id()) continue; - MCRegUnitIterator Units(PhysReg, TRI); + MCRegUnitIterator Units(*I, TRI); for (; Units.isValid(); ++Units) { // Instantiate a "subquery", not to be confused with the Queries array. LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]); @@ -815,7 +830,7 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { } // If no units have interference, break out with the current PhysReg. if (!Units.isValid()) - break; + PhysReg = *I; } if (PhysReg) LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from " @@ -846,8 +861,8 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, if (CanSplit && IsHint && !BreaksHint) return true; - if (A.weight > B.weight) { - LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); + if (A.weight() > B.weight()) { + LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n'); return true; } return false; @@ -862,7 +877,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, +bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) { // It is only possible to evict virtual register interference. @@ -878,7 +893,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, // // This works out so a register without a cascade number is allowed to evict // anything, and it can be evicted by anything. - unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade; if (!Cascade) Cascade = NextCascade; @@ -890,15 +905,14 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, return false; // Check if any interfering live range is heavier than MaxWeight. - for (unsigned i = Q.interferingVRegs().size(); i; --i) { - LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - assert(Register::isVirtualRegister(Intf->reg) && + for (LiveInterval *Intf : reverse(Q.interferingVRegs())) { + assert(Register::isVirtualRegister(Intf->reg()) && "Only expecting virtual register interference from query"); // Do not allow eviction of a virtual register if we are in the middle // of last-chance recoloring and this virtual register is one that we // have scavenged a physical register for. - if (FixedRegisters.count(Intf->reg)) + if (FixedRegisters.count(Intf->reg())) return false; // Never evict spill products. They cannot split or spill. @@ -910,12 +924,14 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, // // Also allow urgent evictions of unspillable ranges from a strictly // larger allocation order. - bool Urgent = !VirtReg.isSpillable() && - (Intf->isSpillable() || - RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < - RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); + bool Urgent = + !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) < + RegClassInfo.getNumAllocatableRegs( + MRI->getRegClass(Intf->reg()))); // Only evict older cascades or live ranges without a cascade. - unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; + unsigned IntfCascade = ExtraRegInfo[Intf->reg()].Cascade; if (Cascade <= IntfCascade) { if (!Urgent) return false; @@ -924,10 +940,10 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, Cost.BrokenHints += 10; } // Would this break a satisfied hint? - bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg()); // Update eviction cost. Cost.BrokenHints += BreaksHint; - Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight()); // Abort if this would be too expensive. if (!(Cost < MaxCost)) return false; @@ -960,7 +976,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, /// when returning true. /// \return True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, - Register PhysReg, SlotIndex Start, + MCRegister PhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost) { EvictionCost Cost; @@ -969,25 +985,23 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // Check if any interfering live range is heavier than MaxWeight. - for (unsigned i = Q.interferingVRegs().size(); i; --i) { - LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - + for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { // Check if interference overlast the segment in interest. if (!Intf->overlaps(Start, End)) continue; // Cannot evict non virtual reg interference. - if (!Register::isVirtualRegister(Intf->reg)) + if (!Register::isVirtualRegister(Intf->reg())) return false; // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) return false; // Would this break a satisfied hint? - bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg()); // Update eviction cost. Cost.BrokenHints += BreaksHint; - Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight()); // Abort if this would be too expensive. if (!(Cost < MaxCost)) return false; @@ -1012,17 +1026,17 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, /// \param BestEvictweight The eviction cost of that eviction /// \return The PhysReg which is the best candidate for eviction and the /// eviction cost in BestEvictweight -unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, - LiveInterval &VirtReg, - SlotIndex Start, SlotIndex End, - float *BestEvictweight) { +MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, + SlotIndex Start, SlotIndex End, + float *BestEvictweight) { EvictionCost BestEvictCost; BestEvictCost.setMax(); - BestEvictCost.MaxWeight = VirtReg.weight; - unsigned BestEvicteePhys = 0; + BestEvictCost.MaxWeight = VirtReg.weight(); + MCRegister BestEvicteePhys; // Go over all physical registers and find the best candidate for eviction - for (auto PhysReg : Order.getOrder()) { + for (MCRegister PhysReg : Order.getOrder()) { if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End, BestEvictCost)) @@ -1038,14 +1052,14 @@ unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, /// evictInterference - Evict any interferring registers that prevent VirtReg /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. -void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg, +void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, SmallVectorImpl<Register> &NewVRegs) { // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. - unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade; if (!Cascade) - Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; + Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++; LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); @@ -1064,21 +1078,20 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg, } // Evict them second. This will invalidate the queries. - for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { - LiveInterval *Intf = Intfs[i]; + for (LiveInterval *Intf : Intfs) { // The same VirtReg may be present in multiple RegUnits. Skip duplicates. - if (!VRM->hasPhys(Intf->reg)) + if (!VRM->hasPhys(Intf->reg())) continue; - LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg); + LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg()); Matrix->unassign(*Intf); - assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg].Cascade = Cascade; + ExtraRegInfo[Intf->reg()].Cascade = Cascade; ++NumEvicted; - NewVRegs.push_back(Intf->reg); + NewVRegs.push_back(Intf->reg()); } } @@ -1107,17 +1120,17 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // Keep track of the cheapest interference seen so far. EvictionCost BestCost; BestCost.setMax(); - unsigned BestPhys = 0; + MCRegister BestPhys; unsigned OrderLimit = Order.getOrder().size(); // When we are just looking for a reduced cost per use, don't break any // hints, and only evict smaller spill weights. if (CostPerUseLimit < ~0u) { BestCost.BrokenHints = 0; - BestCost.MaxWeight = VirtReg.weight; + BestCost.MaxWeight = VirtReg.weight(); // Check of any registers in RC are below CostPerUseLimit. - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg()); unsigned MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " @@ -1134,8 +1147,10 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, } } - Order.rewind(); - while (MCRegister PhysReg = Order.next(OrderLimit)) { + for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; + ++I) { + MCRegister PhysReg = *I; + assert(PhysReg); if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1156,7 +1171,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, BestPhys = PhysReg; // Stop if the hint can be used. - if (Order.isHint()) + if (I.isHint()) break; } @@ -1183,9 +1198,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, // Reset interference dependent info. SplitConstraints.resize(UseBlocks.size()); BlockFrequency StaticCost = 0; - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + for (unsigned I = 0; I != UseBlocks.size(); ++I) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[I]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[I]; BC.Number = BI.MBB->getNumber(); Intf.moveToBlock(BC.Number); @@ -1256,8 +1271,7 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, unsigned TBS[GroupSize]; unsigned B = 0, T = 0; - for (unsigned i = 0; i != Blocks.size(); ++i) { - unsigned Number = Blocks[i]; + for (unsigned Number : Blocks) { Intf.moveToBlock(Number); if (!Intf.hasInterference()) { @@ -1314,8 +1328,7 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { while (true) { ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); // Find new through blocks in the periphery of PrefRegBundles. - for (int i = 0, e = NewBundles.size(); i != e; ++i) { - unsigned Bundle = NewBundles[i]; + for (unsigned Bundle : NewBundles) { // Look at all blocks connected to Bundle in the full graph. ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); @@ -1367,7 +1380,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { return false; // Compact regions don't correspond to any physreg. - Cand.reset(IntfCache, 0); + Cand.reset(IntfCache, MCRegister::NoRegister); LLVM_DEBUG(dbgs() << "Compact region bundles"); @@ -1395,8 +1408,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { } LLVM_DEBUG({ - for (int i : Cand.LiveBundles.set_bits()) - dbgs() << " EB#" << i; + for (int I : Cand.LiveBundles.set_bits()) + dbgs() << " EB#" << I; dbgs() << ".\n"; }); return true; @@ -1407,8 +1420,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { BlockFrequency RAGreedy::calcSpillCost() { BlockFrequency Cost = 0; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + for (const SplitAnalysis::BlockInfo &BI : UseBlocks) { unsigned Number = BI.MBB->getNumber(); // We normally only need one spill instruction - a load or a store. Cost += SpillPlacer->getBlockFrequency(Number); @@ -1473,20 +1485,20 @@ BlockFrequency RAGreedy::calcSpillCost() { /// artifact of Evictee. /// \return True if splitting Evictee may cause a bad eviction chain, false /// otherwise. -bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee, +bool RAGreedy::splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand, unsigned BBNumber, const AllocationOrder &Order) { EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee); unsigned Evictor = VregEvictorInfo.first; - unsigned PhysReg = VregEvictorInfo.second; + MCRegister PhysReg = VregEvictorInfo.second; // No actual evictor. if (!Evictor || !PhysReg) return false; float MaxWeight = 0; - unsigned FutureEvictedPhysReg = + MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee), Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); @@ -1511,10 +1523,9 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee, // Now, check to see if the local interval we will create is going to be // expensive enough to evict somebody If so, this may cause a bad eviction // chain. - VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI); float splitArtifactWeight = - VRAI.futureWeight(LIS->getInterval(Evictee), - Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); + VRAI->futureWeight(LIS->getInterval(Evictee), + Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight) return false; @@ -1548,16 +1559,15 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, // Check if the local interval will evict a cheaper interval. float CheapestEvictWeight = 0; - unsigned FutureEvictedPhysReg = getCheapestEvicteeWeight( + MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight( Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(), Cand.Intf.last(), &CheapestEvictWeight); // Have we found an interval that can be evicted? if (FutureEvictedPhysReg) { - VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI); float splitArtifactWeight = - VRAI.futureWeight(LIS->getInterval(VirtRegToSplit), - Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); + VRAI->futureWeight(LIS->getInterval(VirtRegToSplit), + Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); // Will the weight of the local interval be higher than the cheapest evictee // weight? If so it will evict it and will not cause a spill. if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight) @@ -1578,11 +1588,11 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, bool *CanCauseEvictionChain) { BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; - unsigned VirtRegToSplit = SA->getParent().reg; + Register VirtRegToSplit = SA->getParent().reg(); ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + for (unsigned I = 0; I != UseBlocks.size(); ++I) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[I]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[I]; bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)]; bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; unsigned Ins = 0; @@ -1620,8 +1630,7 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); } - for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { - unsigned Number = Cand.ActiveBlocks[i]; + for (unsigned Number : Cand.ActiveBlocks) { bool RegIn = LiveBundles[Bundles->getBundle(Number, false)]; bool RegOut = LiveBundles[Bundles->getBundle(Number, true)]; if (!RegIn && !RegOut) @@ -1679,13 +1688,12 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // Isolate even single instructions when dealing with a proper sub-class. // That guarantees register class inflation for the stack interval because it // is all copies. - unsigned Reg = SA->getParent().reg; + Register Reg = SA->getParent().reg(); bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); // First handle all the blocks with uses. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + for (const SplitAnalysis::BlockInfo &BI : UseBlocks) { unsigned Number = BI.MBB->getNumber(); unsigned IntvIn = 0, IntvOut = 0; SlotIndex IntfIn, IntfOut; @@ -1730,8 +1738,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, BitVector Todo = SA->getThroughBlocks(); for (unsigned c = 0; c != UsedCands.size(); ++c) { ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { - unsigned Number = Blocks[i]; + for (unsigned Number : Blocks) { if (!Todo.test(Number)) continue; Todo.reset(Number); @@ -1774,8 +1781,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // - Candidate intervals can be assigned to Cand.PhysReg. // - Block-local splits are candidates for local splitting. // - DCE leftovers should go back on the queue. - for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); + for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { + LiveInterval &Reg = LIS->getInterval(LREdit.get(I)); // Ignore old intervals from DCE. if (getStage(Reg) != RS_New) @@ -1783,14 +1790,14 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // Remainder interval. Don't try splitting again, spill if it doesn't // allocate. - if (IntvMap[i] == 0) { + if (IntvMap[I] == 0) { setStage(Reg, RS_Spill); continue; } // Global intervals. Allow repeated splitting as long as the number of live // blocks is strictly decreasing. - if (IntvMap[i] < NumGlobalIntvs) { + if (IntvMap[I] < NumGlobalIntvs) { if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks << " blocks as original.\n"); @@ -1808,10 +1815,11 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, MF->verify(this, "After splitting live range around region"); } -unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<Register> &NewVRegs) { +MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<Register> &NewVRegs) { if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg)) - return 0; + return MCRegister::NoRegister; unsigned NumCands = 0; BlockFrequency SpillCost = calcSpillCost(); BlockFrequency BestCost; @@ -1841,12 +1849,12 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, // current max frequency. if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) && CanCauseEvictionChain) { - return 0; + return MCRegister::NoRegister; } // No solutions found, fall back to single block splitting. if (!HasCompact && BestCand == NoCand) - return 0; + return MCRegister::NoRegister; return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); } @@ -1857,8 +1865,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, unsigned &NumCands, bool IgnoreCSR, bool *CanCauseEvictionChain) { unsigned BestCand = NoCand; - Order.rewind(); - while (unsigned PhysReg = Order.next()) { + for (MCPhysReg PhysReg : Order) { + assert(PhysReg); if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) continue; @@ -1867,12 +1875,12 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, if (NumCands == IntfCache.getMaxCursors()) { unsigned WorstCount = ~0u; unsigned Worst = 0; - for (unsigned i = 0; i != NumCands; ++i) { - if (i == BestCand || !GlobalCand[i].PhysReg) + for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) { + if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg) continue; - unsigned Count = GlobalCand[i].LiveBundles.count(); + unsigned Count = GlobalCand[CandIndex].LiveBundles.count(); if (Count < WorstCount) { - Worst = i; + Worst = CandIndex; WorstCount = Count; } } @@ -1923,8 +1931,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, LLVM_DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; - for (int i : Cand.LiveBundles.set_bits()) - dbgs() << " EB#" << i; + for (int I : Cand.LiveBundles.set_bits()) + dbgs() << " EB#" << I; dbgs() << ".\n"; }); if (Cost < BestCost) { @@ -1942,7 +1950,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, // See splitCanCauseEvictionChain for detailed description of bad // eviction chain scenarios. LLVM_DEBUG(dbgs() << "Best split candidate of vreg " - << printReg(VirtReg.reg, TRI) << " may "); + << printReg(VirtReg.reg(), TRI) << " may "); if (!(*CanCauseEvictionChain)) LLVM_DEBUG(dbgs() << "not "); LLVM_DEBUG(dbgs() << "cause bad eviction chain\n"); @@ -2001,13 +2009,12 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs) { assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); - Register Reg = VirtReg.reg; + Register Reg = VirtReg.reg(); bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); SE->reset(LREdit, SplitSpillMode); ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + for (const SplitAnalysis::BlockInfo &BI : UseBlocks) { if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) SE->splitSingleBlock(BI); } @@ -2026,9 +2033,9 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Sort out the new intervals created by splitting. The remainder interval // goes straight to spilling, the new local ranges get to stay RS_New. - for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &LI = LIS->getInterval(LREdit.get(i)); - if (getStage(LI) == RS_New && IntvMap[i] == 0) + for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { + LiveInterval &LI = LIS->getInterval(LREdit.get(I)); + if (getStage(LI) == RS_New && IntvMap[I] == 0) setStage(LI, RS_Spill); } @@ -2044,7 +2051,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// Get the number of allocatable registers that match the constraints of \p Reg /// on \p MI and that are also in \p SuperRC. static unsigned getNumAllocatableRegsForConstraints( - const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, + const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const RegisterClassInfo &RCI) { assert(SuperRC && "Invalid register class"); @@ -2067,7 +2074,7 @@ static unsigned getNumAllocatableRegsForConstraints( unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs) { - const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); // There is no point to this if there are no larger sub-classes. if (!RegClassInfo.isProperSubClass(CurRC)) return 0; @@ -2091,18 +2098,18 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, // the constraints on the virtual register. // Otherwise, splitting just inserts uncoalescable copies that do not help // the allocation. - for (unsigned i = 0; i != Uses.size(); ++i) { - if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) + for (const auto &Use : Uses) { + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) if (MI->isFullCopy() || SuperRCNumAllocatableRegs == - getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, - TRI, RCI)) { - LLVM_DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); + getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, + TII, TRI, RCI)) { + LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); continue; } SE->openIntv(); - SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); - SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); + SlotIndex SegStart = SE->enterIntvBefore(Use); + SlotIndex SegStop = SE->leaveIntvAfter(Use); SE->useIntv(SegStart, SegStop); } @@ -2113,7 +2120,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); + DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Assign all new registers to RS_Spill. This was the last chance. @@ -2128,9 +2135,9 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// calcGapWeights - Compute the maximum spill weight that needs to be evicted /// in order to use PhysReg between two entries in SA->UseSlots. /// -/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. +/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1]. /// -void RAGreedy::calcGapWeights(unsigned PhysReg, +void RAGreedy::calcGapWeights(MCRegister PhysReg, SmallVectorImpl<float> &GapWeight) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); @@ -2169,7 +2176,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; // Update the gaps covered by IntI. - const float weight = IntI.value()->weight; + const float weight = IntI.value()->weight(); for (; Gap != NumGaps; ++Gap) { GapWeight[Gap] = std::max(GapWeight[Gap], weight); if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) @@ -2231,8 +2238,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, LLVM_DEBUG({ dbgs() << "tryLocalSplit: "; - for (unsigned i = 0, e = Uses.size(); i != e; ++i) - dbgs() << ' ' << Uses[i]; + for (const auto &Use : Uses) + dbgs() << ' ' << Use; dbgs() << '\n'; }); @@ -2244,25 +2251,25 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:"); // Constrain to VirtReg's live range. - unsigned ri = + unsigned RI = llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin(); - unsigned re = RMS.size(); - for (unsigned i = 0; i != NumGaps && ri != re; ++i) { - // Look for Uses[i] <= RMS <= Uses[i+1]. - assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); - if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) + unsigned RE = RMS.size(); + for (unsigned I = 0; I != NumGaps && RI != RE; ++I) { + // Look for Uses[I] <= RMS <= Uses[I + 1]. + assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I])); + if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI])) continue; // Skip a regmask on the same instruction as the last use. It doesn't // overlap the live range. - if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) + if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps) break; - LLVM_DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' - << Uses[i + 1]); - RegMaskGaps.push_back(i); + LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-' + << Uses[I + 1]); + RegMaskGaps.push_back(I); // Advance ri to the next gap. A regmask on one of the uses counts in // both gaps. - while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) - ++ri; + while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1])) + ++RI; } LLVM_DEBUG(dbgs() << '\n'); } @@ -2297,16 +2304,16 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, (1.0f / MBFI->getEntryFreq()); SmallVector<float, 8> GapWeight; - Order.rewind(); - while (unsigned PhysReg = Order.next()) { + for (MCPhysReg PhysReg : Order) { + assert(PhysReg); // Keep track of the largest spill weight that would need to be evicted in - // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. + // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1]. calcGapWeights(PhysReg, GapWeight); // Remove any gaps with regmask clobbers. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) - for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) - GapWeight[RegMaskGaps[i]] = huge_valf; + for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I) + GapWeight[RegMaskGaps[I]] = huge_valf; // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -2324,7 +2331,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore] - << '-' << Uses[SplitAfter] << " i=" << MaxGap); + << '-' << Uses[SplitAfter] << " I=" << MaxGap); // Stop before the interval gets so big we wouldn't be making progress. if (!LiveBefore && !LiveAfter) { @@ -2373,8 +2380,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Recompute the max when necessary. if (GapWeight[SplitBefore - 1] >= MaxGap) { MaxGap = GapWeight[SplitBefore]; - for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) - MaxGap = std::max(MaxGap, GapWeight[i]); + for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I) + MaxGap = std::max(MaxGap, GapWeight[I]); } continue; } @@ -2409,7 +2416,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->useIntv(SegStart, SegStop); SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); + DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); // If the new range has the same number of instructions as before, mark it as // RS_Split2 so the next split will be forced to make progress. Otherwise, @@ -2420,10 +2427,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (NewGaps >= NumGaps) { LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: "); assert(!ProgressRequired && "Didn't make progress when it was required."); - for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) - if (IntvMap[i] == 1) { - setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); - LLVM_DEBUG(dbgs() << printReg(LREdit.get(i))); + for (unsigned I = 0, E = IntvMap.size(); I != E; ++I) + if (IntvMap[I] == 1) { + setStage(LIS->getInterval(LREdit.get(I)), RS_Split2); + LLVM_DEBUG(dbgs() << printReg(LREdit.get(I))); } LLVM_DEBUG(dbgs() << '\n'); } @@ -2477,7 +2484,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // ranges already made dubious progress with region splitting, so they go // straight to single block splitting. if (getStage(VirtReg) < RS_Split2) { - unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); + MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) return PhysReg; } @@ -2507,11 +2514,10 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) { /// for \p VirtReg. /// \p FixedRegisters contains all the virtual registers that cannot be /// recolored. -bool -RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, - SmallLISet &RecoloringCandidates, - const SmallVirtRegSet &FixedRegisters) { - const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); +bool RAGreedy::mayRecolorAllInterferences( + MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); @@ -2523,16 +2529,16 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, CutOffInfo |= CO_Interf; return false; } - for (unsigned i = Q.interferingVRegs().size(); i; --i) { - LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + for (LiveInterval *Intf : reverse(Q.interferingVRegs())) { // If Intf is done and sit on the same register class as VirtReg, // it would not be recolorable as it is in the same state as VirtReg. // However, if VirtReg has tied defs and Intf doesn't, then // there is still a point in examining if it can be recolorable. if (((getStage(*Intf) == RS_Done && - MRI->getRegClass(Intf->reg) == CurRC) && - !(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) || - FixedRegisters.count(Intf->reg)) { + MRI->getRegClass(Intf->reg()) == CurRC) && + !(hasTiedDef(MRI, VirtReg.reg()) && + !hasTiedDef(MRI, Intf->reg()))) || + FixedRegisters.count(Intf->reg())) { LLVM_DEBUG( dbgs() << "Early abort: the interference is not recolorable.\n"); return false; @@ -2587,6 +2593,9 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { + if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg)) + return ~0u; + LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); // Ranges must be Done. assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && @@ -2605,15 +2614,15 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, SmallLISet RecoloringCandidates; // Record the original mapping virtual register to physical register in case // the recoloring fails. - DenseMap<Register, Register> VirtRegToPhysReg; + DenseMap<Register, MCRegister> VirtRegToPhysReg; // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in // this recoloring "session". - assert(!FixedRegisters.count(VirtReg.reg)); - FixedRegisters.insert(VirtReg.reg); + assert(!FixedRegisters.count(VirtReg.reg())); + FixedRegisters.insert(VirtReg.reg()); SmallVector<Register, 4> CurrentNewVRegs; - Order.rewind(); - while (Register PhysReg = Order.next()) { + for (MCRegister PhysReg : Order) { + assert(PhysReg.isValid()); LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " << printReg(PhysReg, TRI) << '\n'); RecoloringCandidates.clear(); @@ -2644,7 +2653,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, for (SmallLISet::iterator It = RecoloringCandidates.begin(), EndIt = RecoloringCandidates.end(); It != EndIt; ++It) { - Register ItVirtReg = (*It)->reg; + Register ItVirtReg = (*It)->reg(); enqueue(RecoloringQueue, *It); assert(VRM->hasPhys(ItVirtReg) && "Interferences are supposed to be with allocated variables"); @@ -2697,10 +2706,10 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, for (SmallLISet::iterator It = RecoloringCandidates.begin(), EndIt = RecoloringCandidates.end(); It != EndIt; ++It) { - Register ItVirtReg = (*It)->reg; + Register ItVirtReg = (*It)->reg(); if (VRM->hasPhys(ItVirtReg)) Matrix->unassign(**It); - Register ItPhysReg = VirtRegToPhysReg[ItVirtReg]; + MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg]; Matrix->assign(**It, ItPhysReg); } } @@ -2724,8 +2733,8 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, while (!RecoloringQueue.empty()) { LiveInterval *LI = dequeue(RecoloringQueue); LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); - Register PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, - Depth + 1); + MCRegister PhysReg = + selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); // When splitting happens, the live-range may actually be empty. // In that case, this is okay to continue the recoloring even // if we did not find an alternative color for it. Indeed, @@ -2743,7 +2752,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, << " succeeded with: " << printReg(PhysReg, TRI) << '\n'); Matrix->assign(*LI, PhysReg); - FixedRegisters.insert(LI->reg); + FixedRegisters.insert(LI->reg()); } return true; } @@ -2752,12 +2761,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, // Main Entry Point //===----------------------------------------------------------------------===// -Register RAGreedy::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<Register> &NewVRegs) { +MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<Register> &NewVRegs) { CutOffInfo = CO_None; LLVMContext &Ctx = MF->getFunction().getContext(); SmallVirtRegSet FixedRegisters; - Register Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); if (Reg == ~0U && (CutOffInfo != CO_None)) { uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); if (CutOffEncountered == CO_Depth) @@ -2782,11 +2791,10 @@ Register RAGreedy::selectOrSplit(LiveInterval &VirtReg, /// Spilling a live range in the cold path can have lower cost than using /// the CSR for the first time. Returns the physical register if we decide /// to use the CSR; otherwise return 0. -unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, - AllocationOrder &Order, - Register PhysReg, - unsigned &CostPerUseLimit, - SmallVectorImpl<Register> &NewVRegs) { +MCRegister +RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, + MCRegister PhysReg, unsigned &CostPerUseLimit, + SmallVectorImpl<Register> &NewVRegs) { if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost // is lower than CSRCost. @@ -2851,7 +2859,7 @@ void RAGreedy::initializeCSRCost() { /// Collect the hint info for \p Reg. /// The results are stored into \p Out. /// \p Out is not cleared before being populated. -void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { +void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) { for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { if (!Instr.isFullCopy()) continue; @@ -2863,9 +2871,8 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { continue; } // Get the current assignment. - Register OtherPhysReg = Register::isPhysicalRegister(OtherReg) - ? OtherReg - : VRM->getPhys(OtherReg); + MCRegister OtherPhysReg = + OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg); // Push the collected information. Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, OtherPhysReg)); @@ -2876,7 +2883,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { /// \p PhysReg was used. /// \return The cost of \p List for \p PhysReg. BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, - unsigned PhysReg) { + MCRegister PhysReg) { BlockFrequency Cost = 0; for (const HintInfo &Info : List) { if (Info.PhysReg != PhysReg) @@ -2897,11 +2904,11 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { // We have a broken hint, check if it is possible to fix it by // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted // some register and PhysReg may be available for the other live-ranges. - SmallSet<unsigned, 4> Visited; + SmallSet<Register, 4> Visited; SmallVector<unsigned, 2> RecoloringCandidates; HintsInfo Info; - unsigned Reg = VirtReg.reg; - Register PhysReg = VRM->getPhys(Reg); + Register Reg = VirtReg.reg(); + MCRegister PhysReg = VRM->getPhys(Reg); // Start the recoloring algorithm from the input live-interval, then // it will propagate to the ones that are copy-related with it. Visited.insert(Reg); @@ -2922,7 +2929,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { // Get the live interval mapped with this virtual register to be able // to check for the interference with the new color. LiveInterval &LI = LIS->getInterval(Reg); - Register CurrPhys = VRM->getPhys(Reg); + MCRegister CurrPhys = VRM->getPhys(Reg); // Check that the new color matches the register class constraints and // that it is free for this live range. if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || @@ -3003,33 +3010,35 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { /// getting rid of 2 copies. void RAGreedy::tryHintsRecoloring() { for (LiveInterval *LI : SetOfBrokenHints) { - assert(Register::isVirtualRegister(LI->reg) && + assert(Register::isVirtualRegister(LI->reg()) && "Recoloring is possible only for virtual registers"); // Some dead defs may be around (e.g., because of debug uses). // Ignore those. - if (!VRM->hasPhys(LI->reg)) + if (!VRM->hasPhys(LI->reg())) continue; tryHintRecoloring(*LI); } } -Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, - SmallVectorImpl<Register> &NewVRegs, - SmallVirtRegSet &FixedRegisters, - unsigned Depth) { +MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, + SmallVectorImpl<Register> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { unsigned CostPerUseLimit = ~0u; // First try assigning a free register. - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { + auto Order = + AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); + if (MCRegister PhysReg = + tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { // If VirtReg got an assignment, the eviction info is no longre relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg); + LastEvicted.clearEvicteeInfo(VirtReg.reg()); // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) { - Register CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, - CostPerUseLimit, NewVRegs); + MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, + CostPerUseLimit, NewVRegs); if (CSRReg || !NewVRegs.empty()) // Return now if we decide to use a CSR or create new vregs due to // pre-splitting. @@ -3040,7 +3049,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, LiveRangeStage Stage = getStage(VirtReg); LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " - << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); + << ExtraRegInfo[VirtReg.reg()].Cascade << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary // queue. The RS_Split ranges already failed to do this, and they should not @@ -3049,7 +3058,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, if (Register PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit, FixedRegisters)) { - Register Hint = MRI->getSimpleHint(VirtReg.reg); + Register Hint = MRI->getSimpleHint(VirtReg.reg()); // If VirtReg has a hint and that hint is broken record this // virtual register as a recoloring candidate for broken hint. // Indeed, since we evicted a variable in its neighborhood it is @@ -3059,7 +3068,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, SetOfBrokenHints.insert(&VirtReg); // If VirtReg eviction someone, the eviction info for it as an evictee is // no longre relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg); + LastEvicted.clearEvicteeInfo(VirtReg.reg()); return PhysReg; } @@ -3071,7 +3080,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, if (Stage < RS_Split) { setStage(VirtReg, RS_Split); LLVM_DEBUG(dbgs() << "wait for second round\n"); - NewVRegs.push_back(VirtReg.reg); + NewVRegs.push_back(VirtReg.reg()); return 0; } @@ -3081,7 +3090,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) { // If VirtReg got split, the eviction info is no longer relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg); + LastEvicted.clearEvicteeInfo(VirtReg.reg()); return PhysReg; } } @@ -3093,14 +3102,16 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, Depth); // Finally spill VirtReg itself. - if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) { + if ((EnableDeferredSpilling || + TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) && + getStage(VirtReg) < RS_Memory) { // TODO: This is experimental and in particular, we do not model // the live range splitting done by spilling correctly. // We would need a deep integration with the spiller to do the // right thing here. Anyway, that is still good for early testing. setStage(VirtReg, RS_Memory); LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n"); - NewVRegs.push_back(VirtReg.reg); + NewVRegs.push_back(VirtReg.reg()); } else { NamedRegionTimer T("spill", "Spiller", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); @@ -3111,7 +3122,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // Tell LiveDebugVariables about the new ranges. Ranges not being covered by // the new regs are kept in LDV (still mapping to the old register), until // we rewrite spilled locations in LDV at a later stage. - DebugVars->splitRegister(VirtReg.reg, LRE.regs(), *LIS); + DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS); if (VerifyEnabled) MF->verify(this, "After spilling"); @@ -3230,7 +3241,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); - calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); + VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI); + + VRAI->calculateSpillWeightsAndHints(); LLVM_DEBUG(LIS->dump()); diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index 7590dbf1b977..7c5af1a0c56e 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -140,14 +140,13 @@ public: MachineFunctionProperties::Property::NoPHIs); } + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + private: - using LI2NodeMap = std::map<const LiveInterval *, unsigned>; - using Node2LIMap = std::vector<const LiveInterval *>; - using AllowedSet = std::vector<unsigned>; - using AllowedSetMap = std::vector<AllowedSet>; - using RegPair = std::pair<unsigned, unsigned>; - using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>; - using RegSet = std::set<unsigned>; + using RegSet = std::set<Register>; char *customPassID; @@ -199,7 +198,7 @@ public: for (auto NId : G.nodeIds()) { PBQP::PBQPNum SpillCost = - LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight; + LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight(); if (SpillCost == 0.0) SpillCost = std::numeric_limits<PBQP::PBQPNum>::min(); else @@ -231,9 +230,9 @@ private: return false; if (NRegs < MRegs) - return D.count(IKey(NRegs, MRegs)) > 0; + return D.contains(IKey(NRegs, MRegs)); - return D.count(IKey(MRegs, NRegs)) > 0; + return D.contains(IKey(MRegs, NRegs)); } void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, @@ -290,7 +289,7 @@ private: // If two intervals end at the same point, we need a way to break the tie or // the set will assume they're actually equal and refuse to insert a // "duplicate". Just compare the vregs - fast and guaranteed unique. - return std::get<0>(I1)->reg < std::get<0>(I2)->reg; + return std::get<0>(I1)->reg() < std::get<0>(I2)->reg(); } static bool isAtLastSegment(const IntervalInfo &I) { @@ -331,7 +330,7 @@ public: // Start by building the inactive set. for (auto NId : G.nodeIds()) { - unsigned VReg = G.getNodeMetadata(NId).getVReg(); + Register VReg = G.getNodeMetadata(NId).getVReg(); LiveInterval &LI = LIS.getInterval(VReg); assert(!LI.empty() && "PBQP graph contains node for empty interval"); Inactive.push(std::make_tuple(&LI, 0, NId)); @@ -413,9 +412,9 @@ private: PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); bool NodesInterfere = false; for (unsigned I = 0; I != NRegs.size(); ++I) { - unsigned PRegN = NRegs[I]; + MCRegister PRegN = NRegs[I]; for (unsigned J = 0; J != MRegs.size(); ++J) { - unsigned PRegM = MRegs[J]; + MCRegister PRegM = MRegs[J]; if (TRI.regsOverlap(PRegN, PRegM)) { M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); NodesInterfere = true; @@ -448,11 +447,10 @@ public: if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg()) continue; - unsigned DstReg = CP.getDstReg(); - unsigned SrcReg = CP.getSrcReg(); + Register DstReg = CP.getDstReg(); + Register SrcReg = CP.getSrcReg(); - const float Scale = 1.0f / MBFI.getEntryFreq(); - PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale; + PBQP::PBQPNum CBenefit = MBFI.getBlockFreqRelativeToEntryBlock(&MBB); if (CP.isPhys()) { if (!MF.getRegInfo().isAllocatable(DstReg)) @@ -464,7 +462,7 @@ public: G.getNodeMetadata(NId).getAllowedRegs(); unsigned PRegOpt = 0; - while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg) + while (PRegOpt < Allowed.size() && Allowed[PRegOpt].id() != DstReg) ++PRegOpt; if (PRegOpt < Allowed.size()) { @@ -509,9 +507,9 @@ private: assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch."); assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch."); for (unsigned I = 0; I != Allowed1.size(); ++I) { - unsigned PReg1 = Allowed1[I]; + MCRegister PReg1 = Allowed1[I]; for (unsigned J = 0; J != Allowed2.size(); ++J) { - unsigned PReg2 = Allowed2[J]; + MCRegister PReg2 = Allowed2[J]; if (PReg1 == PReg2) CostMat[I + 1][J + 1] -= Benefit; } @@ -519,6 +517,20 @@ private: } }; +/// PBQP-specific implementation of weight normalization. +class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo { + float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr) override { + // All intervals have a spill weight that is mostly proportional to the + // number of uses, with uses in loops having a bigger weight. + return NumInstr * VirtRegAuxInfo::normalize(UseDefFreq, Size, 1); + } + +public: + PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, + const MachineLoopInfo &Loops, + const MachineBlockFrequencyInfo &MBFI) + : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {} +}; } // end anonymous namespace // Out-of-line destructor/anchor for PBQPRAConstraint. @@ -558,18 +570,19 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, // Iterate over all live ranges. for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = Register::index2VirtReg(I); + Register Reg = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; VRegsToAlloc.insert(Reg); } } -static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, +static bool isACalleeSavedRegister(MCRegister Reg, + const TargetRegisterInfo &TRI, const MachineFunction &MF) { const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs(); for (unsigned i = 0; CSR[i] != 0; ++i) - if (TRI.regsOverlap(reg, CSR[i])) + if (TRI.regsOverlap(Reg, CSR[i])) return true; return false; } @@ -583,12 +596,12 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, const TargetRegisterInfo &TRI = *G.getMetadata().MF.getSubtarget().getRegisterInfo(); - std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); + std::vector<Register> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); - std::map<unsigned, std::vector<unsigned>> VRegAllowedMap; + std::map<Register, std::vector<MCRegister>> VRegAllowedMap; while (!Worklist.empty()) { - unsigned VReg = Worklist.back(); + Register VReg = Worklist.back(); Worklist.pop_back(); LiveInterval &VRegLI = LIS.getInterval(VReg); @@ -596,8 +609,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, // If this is an empty interval move it to the EmptyIntervalVRegs set then // continue. if (VRegLI.empty()) { - EmptyIntervalVRegs.insert(VRegLI.reg); - VRegsToAlloc.erase(VRegLI.reg); + EmptyIntervalVRegs.insert(VRegLI.reg()); + VRegsToAlloc.erase(VRegLI.reg()); continue; } @@ -608,10 +621,10 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps); // Compute an initial allowed set for the current vreg. - std::vector<unsigned> VRegAllowed; + std::vector<MCRegister> VRegAllowed; ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF); for (unsigned I = 0; I != RawPRegOrder.size(); ++I) { - unsigned PReg = RawPRegOrder[I]; + MCRegister PReg(RawPRegOrder[I]); if (MRI.isReserved(PReg)) continue; @@ -639,10 +652,11 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, if (VRegAllowed.empty()) { SmallVector<Register, 8> NewVRegs; spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); - Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end()); + llvm::append_range(Worklist, NewVRegs); continue; - } else - VRegAllowedMap[VReg] = std::move(VRegAllowed); + } + + VRegAllowedMap[VReg.id()] = std::move(VRegAllowed); } for (auto &KV : VRegAllowedMap) { @@ -685,7 +699,7 @@ void RegAllocPBQP::spillVReg(Register VReg, const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); (void)TRI; LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: " - << LRE.getParent().weight << ", New vregs: "); + << LRE.getParent().weight() << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. @@ -693,8 +707,8 @@ void RegAllocPBQP::spillVReg(Register VReg, I != E; ++I) { const LiveInterval &LI = LIS.getInterval(*I); assert(!LI.empty() && "Empty spill range."); - LLVM_DEBUG(dbgs() << printReg(LI.reg, &TRI) << " "); - VRegsToAlloc.insert(LI.reg); + LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " "); + VRegsToAlloc.insert(LI.reg()); } LLVM_DEBUG(dbgs() << ")\n"); @@ -718,11 +732,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, // Iterate over the nodes mapping the PBQP solution to a register // assignment. for (auto NId : G.nodeIds()) { - unsigned VReg = G.getNodeMetadata(NId).getVReg(); - unsigned AllocOption = Solution.getSelection(NId); + Register VReg = G.getNodeMetadata(NId).getVReg(); + unsigned AllocOpt = Solution.getSelection(NId); - if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) { - unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; + if (AllocOpt != PBQP::RegAlloc::getSpillOptionIdx()) { + MCRegister PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOpt - 1]; LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> " << TRI.getName(PReg) << "\n"); assert(PReg != 0 && "Invalid preg selected."); @@ -750,12 +764,12 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, I != E; ++I) { LiveInterval &LI = LIS.getInterval(*I); - unsigned PReg = MRI.getSimpleHint(LI.reg); + Register PReg = MRI.getSimpleHint(LI.reg()); if (PReg == 0) { - const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); + const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg()); const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF); - for (unsigned CandidateReg : RawPRegOrder) { + for (MCRegister CandidateReg : RawPRegOrder) { if (!VRM.getRegInfo().isReserved(CandidateReg)) { PReg = CandidateReg; break; @@ -765,7 +779,7 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, "No un-reserved physical registers in this register class"); } - VRM.assignVirt2Phys(LI.reg, PReg); + VRM.assignVirt2Phys(LI.reg(), PReg); } } @@ -779,13 +793,6 @@ void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) { DeadRemats.clear(); } -static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, - unsigned NumInstr) { - // All intervals have a spill weight that is mostly proportional to the number - // of uses, with uses in loops having a bigger weight. - return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1); -} - bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { LiveIntervals &LIS = getAnalysis<LiveIntervals>(); MachineBlockFrequencyInfo &MBFI = @@ -793,8 +800,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { VirtRegMap &VRM = getAnalysis<VirtRegMap>(); - calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(), - MBFI, normalizePBQPSpillWeight); + PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI); + VRAI.calculateSpillWeightsAndHints(); std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM)); @@ -878,7 +885,7 @@ static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, return Printable([NId, &G](raw_ostream &OS) { const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); - unsigned VReg = G.getNodeMetadata(NId).getVReg(); + Register VReg = G.getNodeMetadata(NId).getVReg(); const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')'; }); diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp index 1523bd4d1649..0488db3d09cb 100644 --- a/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -188,7 +188,14 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { } assert(RC && "Failed to find register class"); compute(RC); - unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); - return TRI->getRegPressureSetLimit(*MF, Idx) - - TRI->getRegClassWeight(RC).RegWeight * NReserved; + unsigned NAllocatableRegs = getNumAllocatableRegs(RC); + unsigned RegPressureSetLimit = TRI->getRegPressureSetLimit(*MF, Idx); + // If all the regs are reserved, return raw RegPressureSetLimit. + // One example is VRSAVERC in PowerPC. + // Avoid returning zero, getRegPressureSetLimit(Idx) assumes computePSetLimit + // return non-zero value. + if (NAllocatableRegs == 0) + return RegPressureSetLimit; + unsigned NReserved = RC->getNumRegs() - NAllocatableRegs; + return RegPressureSetLimit - TRI->getRegClassWeight(RC).RegWeight * NReserved; } diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 17160a9f42cd..7fdc85a6e444 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -137,13 +137,13 @@ namespace { /// ordered-by-slot-index set of DBG_VALUEs, to help quick /// identification of whether coalescing may change location validity. using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>; - DenseMap<unsigned, std::vector<DbgValueLoc>> DbgVRegToValues; + DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues; /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached. /// To avoid repeatedly merging sets of DbgValueLocs, instead record /// which vregs have been coalesced, and where to. This map is from /// vreg => {set of vregs merged in}. - DenseMap<unsigned, SmallVector<unsigned, 4>> DbgMergedVRegNums; + DenseMap<Register, SmallVector<Register, 4>> DbgMergedVRegNums; /// A LaneMask to remember on which subregister live ranges we need to call /// shrinkToUses() later. @@ -173,16 +173,16 @@ namespace { SmallVector<MachineInstr*, 8> DeadDefs; /// Virtual registers to be considered for register class inflation. - SmallVector<unsigned, 8> InflateRegs; + SmallVector<Register, 8> InflateRegs; /// The collection of live intervals which should have been updated /// immediately after rematerialiation but delayed until /// lateLiveIntervalUpdate is called. - DenseSet<unsigned> ToBeUpdated; + DenseSet<Register> ToBeUpdated; /// Record how many times the large live interval with many valnos /// has been tried to join with other live interval. - DenseMap<unsigned, unsigned long> LargeLIVisitCounter; + DenseMap<Register, unsigned long> LargeLIVisitCounter; /// Recursively eliminate dead defs in DeadDefs. void eliminateDeadDefs(); @@ -211,6 +211,18 @@ namespace { /// live interval update is costly. void lateLiveIntervalUpdate(); + /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange + /// has no value defined in the predecessors. If the incoming value is the + /// same as defined by the copy itself, the value is considered undefined. + bool copyValueUndefInPredecessors(LiveRange &S, + const MachineBasicBlock *MBB, + LiveQueryResult SLRQ); + + /// Set necessary undef flags on subregister uses after pruning out undef + /// lane segments from the subrange. + void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg, + LaneBitmask PrunedLanes); + /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the /// src/dst of the copy instruction CopyMI. This returns true if the copy /// was successfully coalesced away. If it is not currently possible to @@ -285,7 +297,7 @@ namespace { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -351,7 +363,7 @@ namespace { JoinVals &LHSVals, LiveRange &RHS, JoinVals &RHSVals); - void checkMergingChangesDbgValuesImpl(unsigned Reg, LiveRange &OtherRange, + void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange, LiveRange &RegRange, JoinVals &Vals2); public: @@ -388,8 +400,8 @@ INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri, - const MachineInstr *MI, unsigned &Src, - unsigned &Dst, unsigned &SrcSub, + const MachineInstr *MI, Register &Src, + Register &Dst, unsigned &SrcSub, unsigned &DstSub) { if (MI->isCopy()) { Dst = MI->getOperand(0).getReg(); @@ -424,12 +436,13 @@ static bool isSplitEdge(const MachineBasicBlock *MBB) { } bool CoalescerPair::setRegisters(const MachineInstr *MI) { - SrcReg = DstReg = 0; + SrcReg = DstReg = Register(); SrcIdx = DstIdx = 0; NewRC = nullptr; Flipped = CrossClass = false; - unsigned Src, Dst, SrcSub, DstSub; + Register Src, Dst; + unsigned SrcSub = 0, DstSub = 0; if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) return false; Partial = SrcSub || DstSub; @@ -523,7 +536,8 @@ bool CoalescerPair::flip() { bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (!MI) return false; - unsigned Src, Dst, SrcSub, DstSub; + Register Src, Dst; + unsigned SrcSub = 0, DstSub = 0; if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) return false; @@ -536,8 +550,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { } // Now check that Dst matches DstReg. - if (Register::isPhysicalRegister(DstReg)) { - if (!Register::isPhysicalRegister(Dst)) + if (DstReg.isPhysical()) { + if (!Dst.isPhysical()) return false; assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); // DstSub could be set for a physreg from INSERT_SUBREG. @@ -547,7 +561,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (!SrcSub) return DstReg == Dst; // This is a partial register copy. Check that the parts match. - return TRI.getSubReg(DstReg, SrcSub) == Dst; + return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst; } else { // DstReg is virtual. if (DstReg != Dst) @@ -649,7 +663,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // in IntB, we can merge them. if (ValS+1 != BS) return false; - LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI)); + LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI)); SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; // We are about to delete CopyMI, so need to remove it as the 'instruction @@ -692,13 +706,13 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); + int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true); if (UIdx != -1) { ValSEndInst->getOperand(UIdx).setIsKill(false); } // Rewrite the copy. - CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); + CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI); // If the copy instruction was killing the destination register or any // subrange before the merge trim the live range. bool RecomputeLiveRange = AS->end == CopyIdx; @@ -817,7 +831,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return { false, false }; // If DefMI is a two-address instruction then commuting it will change the // destination register. - int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg()); assert(DefIdx != -1); unsigned UseOpIdx; if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) @@ -838,7 +852,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); Register NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) + if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill()) return { false, false }; // Make sure there are no other definitions of IntB that would reach the @@ -848,7 +862,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. - for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) { + for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) { MachineInstr *UseMI = MO.getParent(); unsigned OpNo = &MO - &UseMI->getOperand(0); SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI); @@ -870,9 +884,9 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return { false, false }; - if (Register::isVirtualRegister(IntA.reg) && - Register::isVirtualRegister(IntB.reg) && - !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) + if (Register::isVirtualRegister(IntA.reg()) && + Register::isVirtualRegister(IntB.reg()) && + !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg()))) return { false, false }; if (NewMI != DefMI) { LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI); @@ -891,9 +905,10 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // = B // Update uses of IntA of the specific Val# with IntB. - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()), UE = MRI->use_end(); - UI != UE; /* ++UI is below because of possible MI removal */) { + UI != UE; + /* ++UI is below because of possible MI removal */) { MachineOperand &UseMO = *UI; ++UI; if (UseMO.isUndef()) @@ -920,7 +935,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; if (!UseMI->isCopy()) continue; - if (UseMI->getOperand(0).getReg() != IntB.reg || + if (UseMI->getOperand(0).getReg() != IntB.reg() || UseMI->getOperand(0).getSubReg()) continue; @@ -951,10 +966,10 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); if (IntA.hasSubRanges() || IntB.hasSubRanges()) { if (!IntA.hasSubRanges()) { - LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg()); IntA.createSubRangeFrom(Allocator, Mask, IntA); } else if (!IntB.hasSubRanges()) { - LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg()); IntB.createSubRangeFrom(Allocator, Mask, IntB); } SlotIndex AIdx = CopyIdx.getRegSlot(true); @@ -1100,8 +1115,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, continue; } // Check DefMI is a reverse copy and it is in BB Pred. - if (DefMI->getOperand(0).getReg() != IntA.reg || - DefMI->getOperand(1).getReg() != IntB.reg || + if (DefMI->getOperand(0).getReg() != IntA.reg() || + DefMI->getOperand(1).getReg() != IntB.reg() || DefMI->getParent() != Pred) { CopyLeftBB = Pred; continue; @@ -1158,8 +1173,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // Insert new copy to CopyLeftBB. MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(), - TII->get(TargetOpcode::COPY), IntB.reg) - .addReg(IntA.reg); + TII->get(TargetOpcode::COPY), IntB.reg()) + .addReg(IntA.reg()); SlotIndex NewCopyIdx = LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot(); IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); @@ -1212,7 +1227,10 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, } ++I; } - LIS->extendToIndices(SR, EndPoints); + SmallVector<SlotIndex, 8> Undefs; + IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI, + *LIS->getSlotIndexes()); + LIS->extendToIndices(SR, EndPoints, Undefs); } // If any dead defs were extended, truncate them. shrinkToUses(&IntB); @@ -1224,9 +1242,9 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just /// defining a subregister. -static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { - assert(!Register::isPhysicalRegister(Reg) && - "This code cannot handle physreg aliasing"); +static bool definesFullReg(const MachineInstr &MI, Register Reg) { + assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing"); + for (const MachineOperand &Op : MI.operands()) { if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) continue; @@ -1242,9 +1260,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy) { IsDefCopy = false; - unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); - unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); if (Register::isPhysicalRegister(SrcReg)) return false; @@ -1291,8 +1309,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { - if (Register::isPhysicalRegister(DstReg)) { - unsigned NewDstReg = DstReg; + if (DstReg.isPhysical()) { + Register NewDstReg = DstReg; unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefMI->getOperand(0).getSubReg()); @@ -1366,7 +1384,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). // We need to remember these so we can add intervals once we insert // NewMI into SlotIndexes. - SmallVector<unsigned, 4> NewMIImplDefs; + SmallVector<MCRegister, 4> NewMIImplDefs; for (unsigned i = NewMI.getDesc().getNumOperands(), e = NewMI.getNumOperands(); i != e; ++i) { @@ -1374,11 +1392,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (MO.isReg() && MO.isDef()) { assert(MO.isImplicit() && MO.isDead() && Register::isPhysicalRegister(MO.getReg())); - NewMIImplDefs.push_back(MO.getReg()); + NewMIImplDefs.push_back(MO.getReg().asMCReg()); } } - if (Register::isVirtualRegister(DstReg)) { + if (DstReg.isVirtual()) { unsigned NewIdx = NewMI.getOperand(0).getSubReg(); if (DefRC != nullptr) { @@ -1513,7 +1531,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { - unsigned Reg = NewMIImplDefs[i]; + MCRegister Reg = NewMIImplDefs[i]; for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); @@ -1571,7 +1589,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { // Note that we do not query CoalescerPair here but redo isMoveInstr as the // CoalescerPair may have a new register class with adjusted subreg indices // at this point. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + Register SrcReg, DstReg; + unsigned SrcSubIdx = 0, DstSubIdx = 0; if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) return nullptr; @@ -1696,7 +1715,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } } -void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, +void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx) { bool DstIsPhys = Register::isPhysicalRegister(DstReg); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); @@ -1752,7 +1771,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg); + LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); LaneBitmask UnusedLanes = FullMask & ~UsedLanes; DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt); @@ -1802,6 +1821,49 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { return false; } +bool RegisterCoalescer::copyValueUndefInPredecessors( + LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) { + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + SlotIndex PredEnd = LIS->getMBBEndIdx(Pred); + if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) { + // If this is a self loop, we may be reading the same value. + if (V->id != SLRQ.valueOutOrDead()->id) + return false; + } + } + + return true; +} + +void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, + Register Reg, + LaneBitmask PrunedLanes) { + // If we had other instructions in the segment reading the undef sublane + // value, we need to mark them with undef. + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + unsigned SubRegIdx = MO.getSubReg(); + if (SubRegIdx == 0 || MO.isUndef()) + continue; + + LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx); + SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()); + for (LiveInterval::SubRange &S : LI.subranges()) { + if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) { + MO.setIsUndef(); + break; + } + } + } + + LI.removeEmptySubRanges(); + + // A def of a subregister may be a use of other register lanes. Replacing + // such a def with a def of a different register will eliminate the use, + // and may cause the recorded live range to be larger than the actual + // liveness in the program IR. + LIS->shrinkToUses(&LI); +} + bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { Again = false; LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); @@ -1861,16 +1923,35 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { VNInfo *ReadVNI = LRQ.valueIn(); assert(ReadVNI && "No value before copy and no <undef> flag."); assert(ReadVNI != DefVNI && "Cannot read and define the same value."); - LI.MergeValueNumberInto(DefVNI, ReadVNI); + + // Track incoming undef lanes we need to eliminate from the subrange. + LaneBitmask PrunedLanes; + MachineBasicBlock *MBB = CopyMI->getParent(); // Process subregister liveranges. for (LiveInterval::SubRange &S : LI.subranges()) { LiveQueryResult SLRQ = S.Query(CopyIdx); if (VNInfo *SDefVNI = SLRQ.valueDefined()) { - VNInfo *SReadVNI = SLRQ.valueIn(); - S.MergeValueNumberInto(SDefVNI, SReadVNI); + if (VNInfo *SReadVNI = SLRQ.valueIn()) + SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI); + + // If this copy introduced an undef subrange from an incoming value, + // we need to eliminate the undef live in values from the subrange. + if (copyValueUndefInPredecessors(S, MBB, SLRQ)) { + LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n"); + PrunedLanes |= S.LaneMask; + S.removeValNo(SDefVNI); + } } } + + LI.MergeValueNumberInto(DefVNI, ReadVNI); + if (PrunedLanes.any()) { + LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: " + << PrunedLanes << '\n'); + setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes); + } + LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); } deleteInstr(CopyMI); @@ -1885,7 +1966,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. - bool IsDefCopy; + bool IsDefCopy = false; if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; if (IsDefCopy) @@ -1924,7 +2005,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. - bool IsDefCopy; + bool IsDefCopy = false; if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; @@ -1938,7 +2019,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (Changed) { deleteInstr(CopyMI); if (Shrink) { - unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); LiveInterval &DstLI = LIS->getInterval(DstReg); shrinkToUses(&DstLI); LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n'); @@ -1991,7 +2072,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { continue; LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) << ")\n"); - LIS->shrinkToUses(S, LI.reg); + LIS->shrinkToUses(S, LI.reg()); } LI.removeEmptySubRanges(); } @@ -2030,8 +2111,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { } bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { - unsigned DstReg = CP.getDstReg(); - unsigned SrcReg = CP.getSrcReg(); + Register DstReg = CP.getDstReg(); + Register SrcReg = CP.getSrcReg(); assert(CP.isPhys() && "Must be a physreg copy"); assert(MRI->isReserved(DstReg) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(SrcReg); @@ -2128,7 +2209,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of " << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n"); - LIS->removePhysRegDefAt(DstReg, CopyRegIdx); + LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx); // Create a new dead def at the new def location. for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { LiveRange &LR = LIS->getRegUnit(*UI); @@ -2219,7 +2300,7 @@ class JoinVals { LiveRange &LR; /// (Main) register we work on. - const unsigned Reg; + const Register Reg; /// Reg (and therefore the values in this liverange) will end up as /// subregister SubIdx in the coalesced register. Either CP.DstIdx or @@ -2339,7 +2420,7 @@ class JoinVals { LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; /// Find the ultimate value that VNI was copied from. - std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; + std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const; bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const; @@ -2378,7 +2459,7 @@ class JoinVals { /// Return true if MI uses any of the given Lanes from Reg. /// This does not include partial redefinitions of Reg. - bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const; + bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const; /// Determine if ValNo is a copy of a value number in LR or Other.LR that will /// be pruned: @@ -2389,14 +2470,15 @@ class JoinVals { bool isPrunedValue(unsigned ValNo, JoinVals &Other); public: - JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask, - SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp, + JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask, + SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp, LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, bool TrackSubRegLiveness) - : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), - SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), - NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), - TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {} + : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), + SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), + NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), + TRI(TRI), Assignments(LR.getNumValNums(), -1), + Vals(LR.getNumValNums()) {} /// Analyze defs in LR and compute a value mapping in NewVNInfo. /// Returns false if any conflicts were impossible to resolve. @@ -2462,9 +2544,9 @@ LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) return L; } -std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( - const VNInfo *VNI) const { - unsigned TrackReg = Reg; +std::pair<const VNInfo *, Register> +JoinVals::followCopyChain(const VNInfo *VNI) const { + Register TrackReg = Reg; while (!VNI->isPHIDef()) { SlotIndex Def = VNI->def; @@ -2473,7 +2555,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( if (!MI->isFullCopy()) return std::make_pair(VNI, TrackReg); Register SrcReg = MI->getOperand(1).getReg(); - if (!Register::isVirtualRegister(SrcReg)) + if (!SrcReg.isVirtual()) return std::make_pair(VNI, TrackReg); const LiveInterval &LI = LIS->getInterval(SrcReg); @@ -2518,13 +2600,13 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const { const VNInfo *Orig0; - unsigned Reg0; + Register Reg0; std::tie(Orig0, Reg0) = followCopyChain(Value0); if (Orig0 == Value1 && Reg0 == Other.Reg) return true; const VNInfo *Orig1; - unsigned Reg1; + Register Reg1; std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); // If both values are undefined, and the source registers are the same // register, the values are identical. Filter out cases where only one @@ -2685,14 +2767,8 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { return CR_Replace; // Check for simple erasable conflicts. - if (DefMI->isImplicitDef()) { - // We need the def for the subregister if there is nothing else live at the - // subrange at this point. - if (TrackSubRegLiveness - && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none()) - return CR_Replace; + if (DefMI->isImplicitDef()) return CR_Erase; - } // Include the non-conflict where DefMI is a coalescable copy that kills // OtherVNI. We still want the copy erased and value numbers merged. @@ -2881,7 +2957,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, return true; } -bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx, +bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx, LaneBitmask Lanes) const { if (MI.isDebugInstr()) return false; @@ -3353,7 +3429,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) { if (LI.valnos.size() < LargeIntervalSizeThreshold) return false; - auto &Counter = LargeLIVisitCounter[LI.reg]; + auto &Counter = LargeLIVisitCounter[LI.reg()]; if (Counter < LargeIntervalFreqThreshold) { Counter++; return false; @@ -3456,8 +3532,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Kill flags are going to be wrong if the live ranges were overlapping. // Eventually, we should simply clear all kill flags when computing live // ranges. They are reinserted after register allocation. - MRI->clearKillFlags(LHS.reg); - MRI->clearKillFlags(RHS.reg); + MRI->clearKillFlags(LHS.reg()); + MRI->clearKillFlags(RHS.reg()); if (!EndPoints.empty()) { // Recompute the parts of the live range we had to remove because of @@ -3525,20 +3601,20 @@ void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP, JoinVals &LHSVals, LiveRange &RHS, JoinVals &RHSVals) { - auto ScanForDstReg = [&](unsigned Reg) { + auto ScanForDstReg = [&](Register Reg) { checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals); }; - auto ScanForSrcReg = [&](unsigned Reg) { + auto ScanForSrcReg = [&](Register Reg) { checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals); }; // Scan for potentially unsound DBG_VALUEs: examine first the register number // Reg, and then any other vregs that may have been merged into it. - auto PerformScan = [this](unsigned Reg, std::function<void(unsigned)> Func) { + auto PerformScan = [this](Register Reg, std::function<void(Register)> Func) { Func(Reg); if (DbgMergedVRegNums.count(Reg)) - for (unsigned X : DbgMergedVRegNums[Reg]) + for (Register X : DbgMergedVRegNums[Reg]) Func(X); }; @@ -3547,7 +3623,7 @@ void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP, PerformScan(CP.getDstReg(), ScanForDstReg); } -void RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg, +void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherLR, LiveRange &RegLR, JoinVals &RegVals) { @@ -3673,7 +3749,7 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { } void RegisterCoalescer::lateLiveIntervalUpdate() { - for (unsigned reg : ToBeUpdated) { + for (Register reg : ToBeUpdated) { if (!LIS->hasInterval(reg)) continue; LiveInterval &LI = LIS->getInterval(reg); @@ -3707,7 +3783,7 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { /// Check if DstReg is a terminal node. /// I.e., it does not have any affinity other than \p Copy. -static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, +static bool isTerminalReg(Register DstReg, const MachineInstr &Copy, const MachineRegisterInfo *MRI) { assert(Copy.isCopyLike()); // Check if the destination of this copy as any other affinity. @@ -3721,15 +3797,16 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { assert(Copy.isCopyLike()); if (!UseTerminalRule) return false; - unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; + Register SrcReg, DstReg; + unsigned SrcSubReg = 0, DstSubReg = 0; if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) return false; // Check if the destination of this copy has any other affinity. - if (Register::isPhysicalRegister(DstReg) || + if (DstReg.isPhysical() || // If SrcReg is a physical register, the copy won't be coalesced. // Ignoring it may have other side effect (like missing // rematerialization). So keep it. - Register::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI)) + SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI)) return false; // DstReg is a terminal node. Check if it interferes with any other @@ -3745,7 +3822,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { // For now, just consider the copies that are in the same block. if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) continue; - unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; + Register OtherSrcReg, OtherReg; + unsigned OtherSrcSubReg = 0, OtherSubReg = 0; if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, OtherSubReg)) return false; @@ -3930,7 +4008,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { - unsigned Reg = InflateRegs[i]; + Register Reg = InflateRegs[i]; if (MRI->reg_nodbg_empty(Reg)) continue; if (MRI->recomputeRegClass(Reg)) { diff --git a/llvm/lib/CodeGen/RegisterCoalescer.h b/llvm/lib/CodeGen/RegisterCoalescer.h index f505d46cd338..f265d93fb0d6 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.h +++ b/llvm/lib/CodeGen/RegisterCoalescer.h @@ -14,6 +14,8 @@ #ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H #define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H +#include "llvm/CodeGen/Register.h" + namespace llvm { class MachineInstr; @@ -28,10 +30,10 @@ class TargetRegisterInfo; /// The register that will be left after coalescing. It can be a /// virtual or physical register. - unsigned DstReg = 0; + Register DstReg; /// The virtual register that will be coalesced into dstReg. - unsigned SrcReg = 0; + Register SrcReg; /// The sub-register index of the old DstReg in the new coalesced register. unsigned DstIdx = 0; @@ -59,9 +61,9 @@ class TargetRegisterInfo; /// Create a CoalescerPair representing a virtreg-to-physreg copy. /// No need to call setRegisters(). - CoalescerPair(unsigned VirtReg, unsigned PhysReg, + CoalescerPair(Register VirtReg, MCRegister PhysReg, const TargetRegisterInfo &tri) - : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {} + : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {} /// Set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. @@ -92,10 +94,10 @@ class TargetRegisterInfo; /// Return the register (virtual or physical) that will remain /// after coalescing. - unsigned getDstReg() const { return DstReg; } + Register getDstReg() const { return DstReg; } /// Return the virtual register that will be coalesced away. - unsigned getSrcReg() const { return SrcReg; } + Register getSrcReg() const { return SrcReg; } /// Return the subregister index that DstReg will be coalesced into, or 0. unsigned getDstIdx() const { return DstIdx; } diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp index ecbc4ed63ef6..8f1fc103e869 100644 --- a/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/llvm/lib/CodeGen/RegisterPressure.cpp @@ -62,7 +62,7 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, /// Decrease pressure for each pressure set provided by TargetRegisterInfo. static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, - const MachineRegisterInfo &MRI, unsigned Reg, + const MachineRegisterInfo &MRI, Register Reg, LaneBitmask PrevMask, LaneBitmask NewMask) { //assert((NewMask & !PrevMask) == 0 && "Must not add bits"); if (NewMask.any() || PrevMask.none()) @@ -152,7 +152,7 @@ void RegPressureDelta::dump() const { #endif -void RegPressureTracker::increaseRegPressure(unsigned RegUnit, +void RegPressureTracker::increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask) { if (PreviousMask.any() || NewMask.none()) @@ -167,7 +167,7 @@ void RegPressureTracker::increaseRegPressure(unsigned RegUnit, } } -void RegPressureTracker::decreaseRegPressure(unsigned RegUnit, +void RegPressureTracker::decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask) { decreaseSetPressure(CurrSetPressure, *MRI, RegUnit, PreviousMask, NewMask); @@ -360,7 +360,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); assert(isBottomClosed() && "need bottom-up tracking to intialize."); for (const RegisterMaskPair &Pair : P.LiveOutRegs) { - unsigned RegUnit = Pair.RegUnit; + Register RegUnit = Pair.RegUnit; if (Register::isVirtualRegister(RegUnit) && !RPTracker.hasUntiedDef(RegUnit)) increaseSetPressure(LiveThruPressure, *MRI, RegUnit, @@ -369,7 +369,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { } static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits, - unsigned RegUnit) { + Register RegUnit) { auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) { return Other.RegUnit == RegUnit; }); @@ -380,7 +380,7 @@ static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits, static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits, RegisterMaskPair Pair) { - unsigned RegUnit = Pair.RegUnit; + Register RegUnit = Pair.RegUnit; assert(Pair.LaneMask.any()); auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) { return Other.RegUnit == RegUnit; @@ -393,7 +393,7 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits, } static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits, - unsigned RegUnit) { + Register RegUnit) { auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) { return Other.RegUnit == RegUnit; }); @@ -406,7 +406,7 @@ static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits, static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits, RegisterMaskPair Pair) { - unsigned RegUnit = Pair.RegUnit; + Register RegUnit = Pair.RegUnit; assert(Pair.LaneMask.any()); auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) { return Other.RegUnit == RegUnit; @@ -418,11 +418,12 @@ static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits, } } -static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, - const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit, - SlotIndex Pos, LaneBitmask SafeDefault, - bool(*Property)(const LiveRange &LR, SlotIndex Pos)) { - if (Register::isVirtualRegister(RegUnit)) { +static LaneBitmask +getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, + bool TrackLaneMasks, Register RegUnit, SlotIndex Pos, + LaneBitmask SafeDefault, + bool (*Property)(const LiveRange &LR, SlotIndex Pos)) { + if (RegUnit.isVirtual()) { const LiveInterval &LI = LIS.getInterval(RegUnit); LaneBitmask Result; if (TrackLaneMasks && LI.hasSubRanges()) { @@ -448,7 +449,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, - bool TrackLaneMasks, unsigned RegUnit, + bool TrackLaneMasks, Register RegUnit, SlotIndex Pos) { return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, LaneBitmask::getAll(), @@ -457,7 +458,6 @@ static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS, }); } - namespace { /// Collect this instruction's unique uses and defs into SmallVectors for @@ -517,12 +517,13 @@ class RegisterOperandsCollector { } } - void pushReg(unsigned Reg, + void pushReg(Register Reg, SmallVectorImpl<RegisterMaskPair> &RegUnits) const { - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll())); } else if (MRI.isAllocatable(Reg)) { - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid(); + ++Units) addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll())); } } @@ -549,15 +550,16 @@ class RegisterOperandsCollector { } } - void pushRegLanes(unsigned Reg, unsigned SubRegIdx, + void pushRegLanes(Register Reg, unsigned SubRegIdx, SmallVectorImpl<RegisterMaskPair> &RegUnits) const { - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { LaneBitmask LaneMask = SubRegIdx != 0 ? TRI.getSubRegIndexLaneMask(SubRegIdx) : MRI.getMaxLaneMaskForVReg(Reg); addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask)); } else if (MRI.isAllocatable(Reg)) { - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid(); + ++Units) addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll())); } } @@ -580,7 +582,7 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS) { SlotIndex SlotIdx = LIS.getInstructionIndex(MI); for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) { - unsigned Reg = RI->RegUnit; + Register Reg = RI->RegUnit; const LiveRange *LR = getLiveRange(LIS, Reg); if (LR != nullptr) { LiveQueryResult LRQ = LR->Query(SlotIdx); @@ -605,7 +607,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, Pos.getDeadSlot()); // If the def is all that is live after the instruction, then in case // of a subregister def we need a read-undef flag. - unsigned RegUnit = I->RegUnit; + Register RegUnit = I->RegUnit; if (Register::isVirtualRegister(RegUnit) && AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none()) AddFlagsMI->setRegisterDefReadUndef(RegUnit); @@ -631,7 +633,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, } if (AddFlagsMI != nullptr) { for (const RegisterMaskPair &P : DeadDefs) { - unsigned RegUnit = P.RegUnit; + Register RegUnit = P.RegUnit; if (!Register::isVirtualRegister(RegUnit)) continue; LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit, @@ -667,7 +669,7 @@ void PressureDiffs::addInstruction(unsigned Idx, } /// Add a change in pressure to the pressure diff of a given instruction. -void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, +void PressureDiff::addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI) { PSetIterator PSetI = MRI->getPressureSets(RegUnit); int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); @@ -714,7 +716,7 @@ void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair, SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) { assert(Pair.LaneMask.any()); - unsigned RegUnit = Pair.RegUnit; + Register RegUnit = Pair.RegUnit; auto I = llvm::find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) { return Other.RegUnit == RegUnit; }); @@ -742,13 +744,13 @@ void RegPressureTracker::discoverLiveOut(RegisterMaskPair Pair) { void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) { for (const RegisterMaskPair &P : DeadDefs) { - unsigned Reg = P.RegUnit; + Register Reg = P.RegUnit; LaneBitmask LiveMask = LiveRegs.contains(Reg); LaneBitmask BumpedMask = LiveMask | P.LaneMask; increaseRegPressure(Reg, LiveMask, BumpedMask); } for (const RegisterMaskPair &P : DeadDefs) { - unsigned Reg = P.RegUnit; + Register Reg = P.RegUnit; LaneBitmask LiveMask = LiveRegs.contains(Reg); LaneBitmask BumpedMask = LiveMask | P.LaneMask; decreaseRegPressure(Reg, BumpedMask, LiveMask); @@ -770,7 +772,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, // Kill liveness at live defs. // TODO: consider earlyclobbers? for (const RegisterMaskPair &Def : RegOpers.Defs) { - unsigned Reg = Def.RegUnit; + Register Reg = Def.RegUnit; LaneBitmask PreviousMask = LiveRegs.erase(Def); LaneBitmask NewMask = PreviousMask & ~Def.LaneMask; @@ -800,7 +802,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, // Generate liveness for uses. for (const RegisterMaskPair &Use : RegOpers.Uses) { - unsigned Reg = Use.RegUnit; + Register Reg = Use.RegUnit; assert(Use.LaneMask.any()); LaneBitmask PreviousMask = LiveRegs.insert(Use); LaneBitmask NewMask = PreviousMask | Use.LaneMask; @@ -840,7 +842,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, } if (TrackUntiedDefs) { for (const RegisterMaskPair &Def : RegOpers.Defs) { - unsigned RegUnit = Def.RegUnit; + Register RegUnit = Def.RegUnit; if (Register::isVirtualRegister(RegUnit) && (LiveRegs.contains(RegUnit) & Def.LaneMask).none()) UntiedDefs.insert(RegUnit); @@ -911,7 +913,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) { } for (const RegisterMaskPair &Use : RegOpers.Uses) { - unsigned Reg = Use.RegUnit; + Register Reg = Use.RegUnit; LaneBitmask LiveMask = LiveRegs.contains(Reg); LaneBitmask LiveIn = Use.LaneMask & ~LiveMask; if (LiveIn.any()) { @@ -1060,7 +1062,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Kill liveness at live defs. for (const RegisterMaskPair &P : RegOpers.Defs) { - unsigned Reg = P.RegUnit; + Register Reg = P.RegUnit; LaneBitmask LiveLanes = LiveRegs.contains(Reg); LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg); LaneBitmask DefLanes = P.LaneMask; @@ -1069,7 +1071,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { } // Generate liveness for uses. for (const RegisterMaskPair &P : RegOpers.Uses) { - unsigned Reg = P.RegUnit; + Register Reg = P.RegUnit; LaneBitmask LiveLanes = LiveRegs.contains(Reg); LaneBitmask LiveAfter = LiveLanes | P.LaneMask; increaseRegPressure(Reg, LiveLanes, LiveAfter); @@ -1240,7 +1242,7 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask, return LastUseMask; } -LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit, +LaneBitmask RegPressureTracker::getLiveLanesAt(Register RegUnit, SlotIndex Pos) const { assert(RequireIntervals); return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, @@ -1250,7 +1252,7 @@ LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit, }); } -LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit, +LaneBitmask RegPressureTracker::getLastUsedLanes(Register RegUnit, SlotIndex Pos) const { assert(RequireIntervals); return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, @@ -1261,7 +1263,7 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit, }); } -LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit, +LaneBitmask RegPressureTracker::getLiveThroughAt(Register RegUnit, SlotIndex Pos) const { assert(RequireIntervals); return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, @@ -1294,7 +1296,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { if (RequireIntervals) { for (const RegisterMaskPair &Use : RegOpers.Uses) { - unsigned Reg = Use.RegUnit; + Register Reg = Use.RegUnit; LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx); if (LastUseMask.none()) continue; @@ -1317,7 +1319,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { // Generate liveness for defs. for (const RegisterMaskPair &Def : RegOpers.Defs) { - unsigned Reg = Def.RegUnit; + Register Reg = Def.RegUnit; LaneBitmask LiveMask = LiveRegs.contains(Reg); LaneBitmask NewMask = LiveMask | Def.LaneMask; increaseRegPressure(Reg, LiveMask, NewMask); diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index 41b6de1441d7..a833895c115d 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -91,18 +91,18 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) { LiveUnits.addLiveOuts(MBB); // Move internal iterator at the last instruction of the block. - if (MBB.begin() != MBB.end()) { + if (!MBB.empty()) { MBBI = std::prev(MBB.end()); Tracking = true; } } -void RegScavenger::addRegUnits(BitVector &BV, Register Reg) { +void RegScavenger::addRegUnits(BitVector &BV, MCRegister Reg) { for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) BV.set(*RUI); } -void RegScavenger::removeRegUnits(BitVector &BV, Register Reg) { +void RegScavenger::removeRegUnits(BitVector &BV, MCRegister Reg) { for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) BV.reset(*RUI); } @@ -134,9 +134,9 @@ void RegScavenger::determineKillsAndDefs() { } if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Register::isPhysicalRegister(Reg) || isReserved(Reg)) + if (!MO.getReg().isPhysical() || isReserved(MO.getReg())) continue; + MCRegister Reg = MO.getReg().asMCReg(); if (MO.isUse()) { // Ignore undef uses. @@ -154,25 +154,6 @@ void RegScavenger::determineKillsAndDefs() { } } -void RegScavenger::unprocess() { - assert(Tracking && "Cannot unprocess because we're not tracking"); - - MachineInstr &MI = *MBBI; - if (!MI.isDebugInstr()) { - determineKillsAndDefs(); - - // Commit the changes. - setUnused(DefRegUnits); - setUsed(KillRegUnits); - } - - if (MBBI == MBB->begin()) { - MBBI = MachineBasicBlock::iterator(nullptr); - Tracking = false; - } else - --MBBI; -} - void RegScavenger::forward() { // Move ptr forward. if (!Tracking) { @@ -592,9 +573,8 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, RestoreAfter); MCPhysReg Reg = P.first; MachineBasicBlock::iterator SpillBefore = P.second; - assert(Reg != 0 && "No register left to scavenge!"); // Found an available register? - if (SpillBefore == MBB.end()) { + if (Reg != 0 && SpillBefore == MBB.end()) { LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) << '\n'); return Reg; @@ -603,6 +583,8 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, if (!AllowSpill) return 0; + assert(Reg != 0 && "No register left to scavenge!"); + MachineBasicBlock::iterator ReloadAfter = RestoreAfter ? std::next(MBBI) : MBBI; MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); @@ -652,11 +634,10 @@ static Register scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, // we get a single contiguous lifetime. // // Definitions in MRI.def_begin() are unordered, search for the first. - MachineRegisterInfo::def_iterator FirstDef = - std::find_if(MRI.def_begin(VReg), MRI.def_end(), - [VReg, &TRI](const MachineOperand &MO) { - return !MO.getParent()->readsRegister(VReg, &TRI); - }); + MachineRegisterInfo::def_iterator FirstDef = llvm::find_if( + MRI.def_operands(VReg), [VReg, &TRI](const MachineOperand &MO) { + return !MO.getParent()->readsRegister(VReg, &TRI); + }); assert(FirstDef != MRI.def_end() && "Must have one definition that does not redefine vreg"); MachineInstr &DefMI = *FirstDef->getParent(); diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index 4ee28d6bbb46..0872ec303460 100644 --- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -130,7 +130,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { return false; // Create a new VReg for each class. - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); SmallVector<LiveInterval*, 4> Intervals; Intervals.push_back(&LI); @@ -175,7 +175,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes, // across subranges when they are affected by the same MachineOperand. const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); Classes.grow(NumComponents); - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { if (!MO.isDef() && !MO.readsReg()) continue; @@ -212,7 +212,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, const SmallVectorImpl<SubRangeInfo> &SubRangeInfos, const SmallVectorImpl<LiveInterval*> &Intervals) const { const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - unsigned Reg = Intervals[0]->reg; + unsigned Reg = Intervals[0]->reg(); for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), E = MRI->reg_nodbg_end(); I != E; ) { MachineOperand &MO = *I++; @@ -242,7 +242,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, break; } - unsigned VReg = Intervals[ID]->reg; + unsigned VReg = Intervals[ID]->reg(); MO.setReg(VReg); if (MO.isTied() && Reg != VReg) { @@ -304,7 +304,7 @@ void RenameIndependentSubregs::computeMainRangesFixFlags( const SlotIndexes &Indexes = *LIS->getSlotIndexes(); for (size_t I = 0, E = Intervals.size(); I < E; ++I) { LiveInterval &LI = *Intervals[I]; - unsigned Reg = LI.reg; + unsigned Reg = LI.reg(); LI.removeEmptySubRanges(); diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 55478c232dd7..31797631c97b 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -151,7 +151,7 @@ class SafeStack { Value *getStackGuard(IRBuilder<> &IRB, Function &F); /// Load stack guard from the frame and check if it has changed. - void checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI, + void checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI, AllocaInst *StackGuardSlot, Value *StackGuard); /// Find all static allocas, dynamic allocas, return instructions and @@ -160,7 +160,7 @@ class SafeStack { void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas, SmallVectorImpl<AllocaInst *> &DynamicAllocas, SmallVectorImpl<Argument *> &ByValArguments, - SmallVectorImpl<ReturnInst *> &Returns, + SmallVectorImpl<Instruction *> &Returns, SmallVectorImpl<Instruction *> &StackRestorePoints); /// Calculate the allocation size of a given alloca. Returns 0 if the @@ -168,15 +168,13 @@ class SafeStack { uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI); /// Allocate space for all static allocas in \p StaticAllocas, - /// replace allocas with pointers into the unsafe stack and generate code to - /// restore the stack pointer before all return instructions in \p Returns. + /// replace allocas with pointers into the unsafe stack. /// /// \returns A pointer to the top of the unsafe stack after all unsafe static /// allocas are allocated. Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas, ArrayRef<Argument *> ByValArguments, - ArrayRef<ReturnInst *> Returns, Instruction *BasePointer, AllocaInst *StackGuardSlot); @@ -383,7 +381,7 @@ void SafeStack::findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas, SmallVectorImpl<AllocaInst *> &DynamicAllocas, SmallVectorImpl<Argument *> &ByValArguments, - SmallVectorImpl<ReturnInst *> &Returns, + SmallVectorImpl<Instruction *> &Returns, SmallVectorImpl<Instruction *> &StackRestorePoints) { for (Instruction &I : instructions(&F)) { if (auto AI = dyn_cast<AllocaInst>(&I)) { @@ -401,7 +399,10 @@ void SafeStack::findInsts(Function &F, DynamicAllocas.push_back(AI); } } else if (auto RI = dyn_cast<ReturnInst>(&I)) { - Returns.push_back(RI); + if (CallInst *CI = I.getParent()->getTerminatingMustTailCall()) + Returns.push_back(CI); + else + Returns.push_back(RI); } else if (auto CI = dyn_cast<CallInst>(&I)) { // setjmps require stack restore. if (CI->getCalledFunction() && CI->canReturnTwice()) @@ -465,7 +466,7 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, return DynamicTop; } -void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI, +void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI, AllocaInst *StackGuardSlot, Value *StackGuard) { Value *V = IRB.CreateLoad(StackPtrTy, StackGuardSlot); Value *Cmp = IRB.CreateICmpNE(StackGuard, V); @@ -490,8 +491,8 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI, /// prologue into a local variable and restore it in the epilogue. Value *SafeStack::moveStaticAllocasToUnsafeStack( IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas, - ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns, - Instruction *BasePointer, AllocaInst *StackGuardSlot) { + ArrayRef<Argument *> ByValArguments, Instruction *BasePointer, + AllocaInst *StackGuardSlot) { if (StaticAllocas.empty() && ByValArguments.empty()) return BasePointer; @@ -759,7 +760,7 @@ bool SafeStack::run() { SmallVector<AllocaInst *, 16> StaticAllocas; SmallVector<AllocaInst *, 4> DynamicAllocas; SmallVector<Argument *, 4> ByValArguments; - SmallVector<ReturnInst *, 4> Returns; + SmallVector<Instruction *, 4> Returns; // Collect all points where stack gets unwound and needs to be restored // This is only necessary because the runtime (setjmp and unwind code) is @@ -788,7 +789,8 @@ bool SafeStack::run() { // Calls must always have a debug location, or else inlining breaks. So // we explicitly set a artificial debug location here. if (DISubprogram *SP = F.getSubprogram()) - IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP)); + IRB.SetCurrentDebugLocation( + DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP)); if (SafeStackUsePointerAddress) { FunctionCallee Fn = F.getParent()->getOrInsertFunction( "__safestack_pointer_address", StackPtrTy->getPointerTo(0)); @@ -812,7 +814,7 @@ bool SafeStack::run() { StackGuardSlot = IRB.CreateAlloca(StackPtrTy, nullptr); IRB.CreateStore(StackGuard, StackGuardSlot); - for (ReturnInst *RI : Returns) { + for (Instruction *RI : Returns) { IRBuilder<> IRBRet(RI); checkStackGuard(IRBRet, F, *RI, StackGuardSlot, StackGuard); } @@ -820,9 +822,8 @@ bool SafeStack::run() { // The top of the unsafe stack after all unsafe static allocas are // allocated. - Value *StaticTop = - moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, ByValArguments, - Returns, BasePointer, StackGuardSlot); + Value *StaticTop = moveStaticAllocasToUnsafeStack( + IRB, F, StaticAllocas, ByValArguments, BasePointer, StackGuardSlot); // Safe stack object that stores the current unsafe stack top. It is updated // as unsafe dynamic (non-constant-sized) allocas are allocated and freed. @@ -838,7 +839,7 @@ bool SafeStack::run() { DynamicAllocas); // Restore the unsafe stack pointer before each return. - for (ReturnInst *RI : Returns) { + for (Instruction *RI : Returns) { IRB.SetInsertPoint(RI); IRB.CreateStore(BasePointer, UnsafeStackPtr); } diff --git a/llvm/lib/CodeGen/SafeStackLayout.cpp b/llvm/lib/CodeGen/SafeStackLayout.cpp index c823454f825c..5d61b3a146b4 100644 --- a/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "SafeStackLayout.h" -#include "llvm/Analysis/StackLifetime.h" #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -141,10 +140,10 @@ void StackLayout::computeLayout() { // Sort objects by size (largest first) to reduce fragmentation. if (StackObjects.size() > 2) - std::stable_sort(StackObjects.begin() + 1, StackObjects.end(), - [](const StackObject &a, const StackObject &b) { - return a.Size > b.Size; - }); + llvm::stable_sort(drop_begin(StackObjects), + [](const StackObject &a, const StackObject &b) { + return a.Size > b.Size; + }); for (auto &Obj : StackObjects) layoutObject(Obj); diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp deleted file mode 100644 index c93b29617438..000000000000 --- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ /dev/null @@ -1,911 +0,0 @@ -//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===// -// instrinsics -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass replaces masked memory intrinsics - when unsupported by the target -// - with a chain of basic blocks, that deal with the elements one-by-one if the -// appropriate mask bit is set. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include <algorithm> -#include <cassert> - -using namespace llvm; - -#define DEBUG_TYPE "scalarize-masked-mem-intrin" - -namespace { - -class ScalarizeMaskedMemIntrin : public FunctionPass { - const TargetTransformInfo *TTI = nullptr; - const DataLayout *DL = nullptr; - -public: - static char ID; // Pass identification, replacement for typeid - - explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) { - initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - StringRef getPassName() const override { - return "Scalarize Masked Memory Intrinsics"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetTransformInfoWrapperPass>(); - } - -private: - bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); - bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); -}; - -} // end anonymous namespace - -char ScalarizeMaskedMemIntrin::ID = 0; - -INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE, - "Scalarize unsupported masked memory intrinsics", false, false) - -FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() { - return new ScalarizeMaskedMemIntrin(); -} - -static bool isConstantIntVector(Value *Mask) { - Constant *C = dyn_cast<Constant>(Mask); - if (!C) - return false; - - unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - Constant *CElt = C->getAggregateElement(i); - if (!CElt || !isa<ConstantInt>(CElt)) - return false; - } - - return true; -} - -// Translate a masked load intrinsic like -// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, -// <16 x i1> %mask, <16 x i32> %passthru) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// br i1 %2, label %cond.load, label %else -// -// cond.load: ; preds = %0 -// %3 = getelementptr i32* %1, i32 0 -// %4 = load i32* %3 -// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 -// br label %else -// -// else: ; preds = %0, %cond.load -// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ] -// %6 = extractelement <16 x i1> %mask, i32 1 -// br i1 %6, label %cond.load1, label %else2 -// -// cond.load1: ; preds = %else -// %7 = getelementptr i32* %1, i32 1 -// %8 = load i32* %7 -// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 -// br label %else2 -// -// else2: ; preds = %else, %cond.load1 -// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] -// %10 = extractelement <16 x i1> %mask, i32 2 -// br i1 %10, label %cond.load4, label %else5 -// -static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { - Value *Ptr = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); - VectorType *VecType = cast<FixedVectorType>(CI->getType()); - - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { - Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - const Align AdjustedAlignVal = - commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); - // Bitcast %addr from i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); - - // The result vector - Value *VResult = Src0; - - if (isConstantIntVector(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); - VResult = Builder.CreateInsertElement(VResult, Load, Idx); - } - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = and i16 %scalar_mask, i32 1 << Idx - // %cond = icmp ne i16 %mask_1, 0 - // br i1 %mask_1, label %cond.load, label %else - // - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx); - } - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), - "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); - Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - BasicBlock *PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - - // Create the phi to join the new and previous value. - PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(NewVResult, CondBlock); - Phi->addIncoming(VResult, PrevIfBlock); - VResult = Phi; - } - - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - - ModifiedDT = true; -} - -// Translate a masked store intrinsic, like -// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, -// <16 x i1> %mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// br i1 %2, label %cond.store, label %else -// -// cond.store: ; preds = %0 -// %3 = extractelement <16 x i32> %val, i32 0 -// %4 = getelementptr i32* %1, i32 0 -// store i32 %3, i32* %4 -// br label %else -// -// else: ; preds = %0, %cond.store -// %5 = extractelement <16 x i1> %mask, i32 1 -// br i1 %5, label %cond.store1, label %else2 -// -// cond.store1: ; preds = %else -// %6 = extractelement <16 x i32> %val, i32 1 -// %7 = getelementptr i32* %1, i32 1 -// store i32 %6, i32* %7 -// br label %else2 -// . . . -static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { - Value *Src = CI->getArgOperand(0); - Value *Ptr = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); - auto *VecType = cast<VectorType>(Src->getType()); - - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { - Builder.CreateAlignedStore(Src, Ptr, AlignVal); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - const Align AdjustedAlignVal = - commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); - // Bitcast %addr from i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); - - if (isConstantIntVector(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *OneElt = Builder.CreateExtractElement(Src, Idx); - Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); - } - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %mask_1 = and i16 %scalar_mask, i32 1 << Idx - // %cond = icmp ne i16 %mask_1, 0 - // br i1 %mask_1, label %cond.store, label %else - // - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx); - } - - // Create "cond" block - // - // %OneElt = extractelement <16 x i32> %Src, i32 Idx - // %EltAddr = getelementptr i32* %1, i32 0 - // %store i32 %OneElt, i32* %EltAddr - // - BasicBlock *CondBlock = - IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Idx); - Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); - - ModifiedDT = true; -} - -// Translate a masked gather intrinsic like -// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, -// <16 x i1> %Mask, <16 x i32> %Src) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind -// %Mask0 = extractelement <16 x i1> %Mask, i32 0 -// br i1 %Mask0, label %cond.load, label %else -// -// cond.load: -// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// %Load0 = load i32, i32* %Ptr0, align 4 -// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0 -// br label %else -// -// else: -// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0] -// %Mask1 = extractelement <16 x i1> %Mask, i32 1 -// br i1 %Mask1, label %cond.load1, label %else2 -// -// cond.load1: -// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// %Load1 = load i32, i32* %Ptr1, align 4 -// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 -// br label %else2 -// . . . -// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src -// ret <16 x i32> %Result -static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { - Value *Ptrs = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - auto *VecType = cast<FixedVectorType>(CI->getType()); - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); - - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // The result vector - Value *VResult = Src0; - unsigned VectorWidth = VecType->getNumElements(); - - // Shorten the way if the mask is a vector of constants. - if (isConstantIntVector(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); - LoadInst *Load = - Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); - VResult = - Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); - } - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %Mask1 = and i16 %scalar_mask, i32 1 << Idx - // %cond = icmp ne i16 %mask_1, 0 - // br i1 %Mask1, label %cond.load, label %else - // - - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); - } - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); - LoadInst *Load = - Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); - Value *NewVResult = - Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - BasicBlock *PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - - PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(NewVResult, CondBlock); - Phi->addIncoming(VResult, PrevIfBlock); - VResult = Phi; - } - - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - - ModifiedDT = true; -} - -// Translate a masked scatter intrinsic, like -// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, -// <16 x i1> %Mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set. -// -// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind -// %Mask0 = extractelement <16 x i1> %Mask, i32 0 -// br i1 %Mask0, label %cond.store, label %else -// -// cond.store: -// %Elt0 = extractelement <16 x i32> %Src, i32 0 -// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// store i32 %Elt0, i32* %Ptr0, align 4 -// br label %else -// -// else: -// %Mask1 = extractelement <16 x i1> %Mask, i32 1 -// br i1 %Mask1, label %cond.store1, label %else2 -// -// cond.store1: -// %Elt1 = extractelement <16 x i32> %Src, i32 1 -// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// store i32 %Elt1, i32* %Ptr1, align 4 -// br label %else2 -// . . . -static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { - Value *Src = CI->getArgOperand(0); - Value *Ptrs = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); - - assert( - isa<VectorType>(Ptrs->getType()) && - isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) && - "Vector of pointers is expected in masked scatter intrinsic"); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); - unsigned VectorWidth = SrcFVTy->getNumElements(); - - // Shorten the way if the mask is a vector of constants. - if (isConstantIntVector(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *OneElt = - Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - } - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %Mask1 = and i16 %scalar_mask, i32 1 << Idx - // %cond = icmp ne i16 %mask_1, 0 - // br i1 %Mask1, label %cond.store, label %else - // - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); - } - - // Create "cond" block - // - // %Elt1 = extractelement <16 x i32> %Src, i32 1 - // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 - // %store i32 %Elt1, i32* %Ptr1 - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); - - ModifiedDT = true; -} - -static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { - Value *Ptr = CI->getArgOperand(0); - Value *Mask = CI->getArgOperand(1); - Value *PassThru = CI->getArgOperand(2); - - auto *VecType = cast<FixedVectorType>(CI->getType()); - - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - unsigned VectorWidth = VecType->getNumElements(); - - // The result vector - Value *VResult = PassThru; - - // Shorten the way if the mask is a vector of constants. - if (isConstantIntVector(Mask)) { - unsigned MemIndex = 0; - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1), - "Load" + Twine(Idx)); - VResult = - Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); - ++MemIndex; - } - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // br i1 %mask_1, label %cond.load, label %else - // - - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); - } - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), - "cond.load"); - Builder.SetInsertPoint(InsertPt); - - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1)); - Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); - - // Move the pointer if there are more blocks to come. - Value *NewPtr; - if ((Idx + 1) != VectorWidth) - NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - BasicBlock *PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - - // Create the phi to join the new and previous value. - PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - ResultPhi->addIncoming(NewVResult, CondBlock); - ResultPhi->addIncoming(VResult, PrevIfBlock); - VResult = ResultPhi; - - // Add a PHI for the pointer if this isn't the last iteration. - if ((Idx + 1) != VectorWidth) { - PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); - PtrPhi->addIncoming(NewPtr, CondBlock); - PtrPhi->addIncoming(Ptr, PrevIfBlock); - Ptr = PtrPhi; - } - } - - CI->replaceAllUsesWith(VResult); - CI->eraseFromParent(); - - ModifiedDT = true; -} - -static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { - Value *Src = CI->getArgOperand(0); - Value *Ptr = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - - auto *VecType = cast<FixedVectorType>(Src->getType()); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - Type *EltTy = VecType->getElementType(); - - unsigned VectorWidth = VecType->getNumElements(); - - // Shorten the way if the mask is a vector of constants. - if (isConstantIntVector(Mask)) { - unsigned MemIndex = 0; - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) - continue; - Value *OneElt = - Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); - Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); - Builder.CreateAlignedStore(OneElt, NewPtr, Align(1)); - ++MemIndex; - } - CI->eraseFromParent(); - return; - } - - // If the mask is not v1i1, use scalar bit test operations. This generates - // better results on X86 at least. - Value *SclrMask; - if (VectorWidth != 1) { - Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); - SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // br i1 %mask_1, label %cond.store, label %else - // - Value *Predicate; - if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); - Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), - Builder.getIntN(VectorWidth, 0)); - } else { - Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); - } - - // Create "cond" block - // - // %OneElt = extractelement <16 x i32> %Src, i32 Idx - // %EltAddr = getelementptr i32* %1, i32 0 - // %store i32 %OneElt, i32* %EltAddr - // - BasicBlock *CondBlock = - IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Idx); - Builder.CreateAlignedStore(OneElt, Ptr, Align(1)); - - // Move the pointer if there are more blocks to come. - Value *NewPtr; - if ((Idx + 1) != VectorWidth) - NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); - OldBr->eraseFromParent(); - BasicBlock *PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - - // Add a PHI for the pointer if this isn't the last iteration. - if ((Idx + 1) != VectorWidth) { - PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else"); - PtrPhi->addIncoming(NewPtr, CondBlock); - PtrPhi->addIncoming(Ptr, PrevIfBlock); - Ptr = PtrPhi; - } - } - CI->eraseFromParent(); - - ModifiedDT = true; -} - -bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) { - bool EverMadeChange = false; - - TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - DL = &F.getParent()->getDataLayout(); - - bool MadeChange = true; - while (MadeChange) { - MadeChange = false; - for (Function::iterator I = F.begin(); I != F.end();) { - BasicBlock *BB = &*I++; - bool ModifiedDTOnIteration = false; - MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); - - // Restart BB iteration if the dominator tree of the Function was changed - if (ModifiedDTOnIteration) - break; - } - - EverMadeChange |= MadeChange; - } - - return EverMadeChange; -} - -bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { - bool MadeChange = false; - - BasicBlock::iterator CurInstIterator = BB.begin(); - while (CurInstIterator != BB.end()) { - if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++)) - MadeChange |= optimizeCallInst(CI, ModifiedDT); - if (ModifiedDT) - return true; - } - - return MadeChange; -} - -bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, - bool &ModifiedDT) { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); - if (II) { - switch (II->getIntrinsicID()) { - default: - break; - case Intrinsic::masked_load: - // Scalarize unsupported vector masked load - if (TTI->isLegalMaskedLoad( - CI->getType(), - cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue())) - return false; - scalarizeMaskedLoad(CI, ModifiedDT); - return true; - case Intrinsic::masked_store: - if (TTI->isLegalMaskedStore( - CI->getArgOperand(0)->getType(), - cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue())) - return false; - scalarizeMaskedStore(CI, ModifiedDT); - return true; - case Intrinsic::masked_gather: { - unsigned AlignmentInt = - cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); - Type *LoadTy = CI->getType(); - Align Alignment = - DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy); - if (TTI->isLegalMaskedGather(LoadTy, Alignment)) - return false; - scalarizeMaskedGather(CI, ModifiedDT); - return true; - } - case Intrinsic::masked_scatter: { - unsigned AlignmentInt = - cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); - Type *StoreTy = CI->getArgOperand(0)->getType(); - Align Alignment = - DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy); - if (TTI->isLegalMaskedScatter(StoreTy, Alignment)) - return false; - scalarizeMaskedScatter(CI, ModifiedDT); - return true; - } - case Intrinsic::masked_expandload: - if (TTI->isLegalMaskedExpandLoad(CI->getType())) - return false; - scalarizeMaskedExpandLoad(CI, ModifiedDT); - return true; - case Intrinsic::masked_compressstore: - if (TTI->isLegalMaskedCompressStore(CI->getArgOperand(0)->getType())) - return false; - scalarizeMaskedCompressStore(CI, ModifiedDT); - return true; - } - } - - return false; -} diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 10da2d421797..5899da777fe9 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -154,7 +154,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); } else if (const Value *V = MMO->getValue()) { SmallVector<Value *, 4> Objs; - if (!getUnderlyingObjectsForCodeGen(V, Objs, DL)) + if (!getUnderlyingObjectsForCodeGen(V, Objs)) return false; for (Value *V : Objs) { @@ -199,7 +199,10 @@ void ScheduleDAGInstrs::exitRegion() { } void ScheduleDAGInstrs::addSchedBarrierDeps() { - MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr; + MachineInstr *ExitMI = + RegionEnd != BB->end() + ? &*skipDebugInstructionsBackward(RegionEnd, RegionBegin) + : nullptr; ExitSU.setInstr(ExitMI); // Add dependencies on the defs and uses of the instruction. if (ExitMI) { @@ -241,8 +244,6 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg())); for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { - if (!Uses.contains(*Alias)) - continue; for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { SUnit *UseSU = I->SU; if (UseSU == SU) @@ -513,6 +514,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { /// TODO: Handle ExitSU "uses" properly. void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); + assert(!MI->isDebugInstr()); + const MachineOperand &MO = MI->getOperand(OperIdx); Register Reg = MO.getReg(); @@ -804,7 +807,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, DbgMI = nullptr; } - if (MI.isDebugValue()) { + if (MI.isDebugValue() || MI.isDebugRef()) { DbgMI = &MI; continue; } @@ -1184,7 +1187,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { else if (SU == &ExitSU) oss << "<exit>"; else - SU->getInstr()->print(oss, /*SkipOpers=*/true); + SU->getInstr()->print(oss, /*IsStandalone=*/true); return oss.str(); } diff --git a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index a113c30f851b..05b2a3764cca 100644 --- a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -35,7 +35,7 @@ namespace llvm { return true; } - static bool isNodeHidden(const SUnit *Node) { + static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { return (Node->NumPreds > 10 || Node->NumSuccs > 10); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f14b3dba4f31..615bea2a4905 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,12 +24,14 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -410,9 +412,11 @@ namespace { SDValue visitSUBO(SDNode *N); SDValue visitADDE(SDNode *N); SDValue visitADDCARRY(SDNode *N); + SDValue visitSADDO_CARRY(SDNode *N); SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); + SDValue visitSSUBO_CARRY(SDNode *N); SDValue visitMUL(SDNode *N); SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); @@ -464,6 +468,7 @@ namespace { SDValue visitFREEZE(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); + SDValue visitSTRICT_FADD(SDNode *N); SDValue visitFSUB(SDNode *N); SDValue visitFMUL(SDNode *N); SDValue visitFMA(SDNode *N); @@ -539,6 +544,7 @@ namespace { SDValue convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC); + SDValue foldSignChangeInBitcast(SDNode *N); SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC); SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, @@ -586,7 +592,7 @@ namespace { const SDLoc &DL); SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); - SDValue MatchStoreCombine(StoreSDNode *N); + SDValue mergeTruncStores(StoreSDNode *N); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue splitMergedValStore(StoreSDNode *ST); @@ -641,14 +647,18 @@ namespace { // Classify the origin of a stored value. enum class StoreSource { Unknown, Constant, Extract, Load }; StoreSource getStoreSource(SDValue StoreVal) { - if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal)) + switch (StoreVal.getOpcode()) { + case ISD::Constant: + case ISD::ConstantFP: return StoreSource::Constant; - if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || - StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR) + case ISD::EXTRACT_VECTOR_ELT: + case ISD::EXTRACT_SUBVECTOR: return StoreSource::Extract; - if (isa<LoadSDNode>(StoreVal)) + case ISD::LOAD: return StoreSource::Load; - return StoreSource::Unknown; + default: + return StoreSource::Unknown; + } } /// This is a helper function for visitMUL to check the profitability @@ -752,9 +762,7 @@ namespace { /// is legal or custom before legalizing operations, and whether is /// legal (but not custom) after legalization. bool hasOperation(unsigned Opcode, EVT VT) { - if (LegalOperations) - return TLI.isOperationLegal(Opcode, VT); - return TLI.isOperationLegalOrCustom(Opcode, VT); + return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations); } public: @@ -924,23 +932,40 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -// Returns the SDNode if it is a constant float BuildVector -// or constant float. -static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { - if (isa<ConstantFPSDNode>(N)) - return N.getNode(); - if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) - return N.getNode(); - return nullptr; +static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) { + if (!ScalarTy.isSimple()) + return false; + + uint64_t MaskForTy = 0ULL; + switch (ScalarTy.getSimpleVT().SimpleTy) { + case MVT::i8: + MaskForTy = 0xFFULL; + break; + case MVT::i16: + MaskForTy = 0xFFFFULL; + break; + case MVT::i32: + MaskForTy = 0xFFFFFFFFULL; + break; + default: + return false; + break; + } + + APInt Val; + if (ISD::isConstantSplatVector(N, Val)) + return Val.getLimitedValue() == MaskForTy; + + return false; } -// Determines if it is a constant integer or a build vector of constant +// Determines if it is a constant integer or a splat/build vector of constant // integers (and undefs). // Do not permit build vector implicit truncation. static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) return !(Const->isOpaque() && NoOpaques); - if (N.getOpcode() != ISD::BUILD_VECTOR) + if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR) return false; unsigned BitWidth = N.getScalarValueSizeInBits(); for (const SDValue &Op : N->op_values()) { @@ -1554,9 +1579,15 @@ void DAGCombiner::Run(CombineLevel AtLevel) { DAG.ReplaceAllUsesWith(N, &RV); } - // Push the new node and any users onto the worklist - AddToWorklist(RV.getNode()); - AddUsersToWorklist(RV.getNode()); + // Push the new node and any users onto the worklist. Omit this if the + // new node is the EntryToken (e.g. if a store managed to get optimized + // out), because re-visiting the EntryToken and its users will not uncover + // any additional opportunities, but there may be a large number of such + // users, potentially causing compile time explosion. + if (RV.getOpcode() != ISD::EntryToken) { + AddToWorklist(RV.getNode()); + AddUsersToWorklist(RV.getNode()); + } // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to @@ -1589,8 +1620,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::USUBO: return visitSUBO(N); case ISD::ADDE: return visitADDE(N); case ISD::ADDCARRY: return visitADDCARRY(N); + case ISD::SADDO_CARRY: return visitSADDO_CARRY(N); case ISD::SUBE: return visitSUBE(N); case ISD::SUBCARRY: return visitSUBCARRY(N); + case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N); case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -1646,6 +1679,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); + case ISD::STRICT_FADD: return visitSTRICT_FADD(N); case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA(N); @@ -1805,6 +1839,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { if (OptLevel == CodeGenOpt::None) return SDValue(); + // Don't simplify the token factor if the node itself has too many operands. + if (N->getNumOperands() > TokenFactorInlineLimit) + return SDValue(); + // If the sole user is a token factor, we should make sure we have a // chance to merge them together. This prevents TF chains from inhibiting // optimizations. @@ -1890,7 +1928,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { // If this is an Op, we can remove the op from the list. Remark any // search associated with it as from the current OpNumber. - if (SeenOps.count(Op) != 0) { + if (SeenOps.contains(Op)) { Changed = true; DidPruneOps = true; unsigned OrigOpNumber = 0; @@ -2002,6 +2040,62 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { return Const != nullptr && !Const->isOpaque() ? Const : nullptr; } +/// Return true if 'Use' is a load or a store that uses N as its base pointer +/// and that N may be folded in the load / store addressing mode. +static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT; + unsigned AS; + + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); + } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); + } else + return false; + + TargetLowering::AddrMode AM; + if (N->getOpcode() == ISD::ADD) { + AM.HasBaseReg = true; + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else if (N->getOpcode() == ISD::SUB) { + AM.HasBaseReg = true; + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = -Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else + return false; + + return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, + VT.getTypeForEVT(*DAG.getContext()), AS); +} + SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && "Unexpected binary operator"); @@ -2021,12 +2115,12 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { SDValue CT = Sel.getOperand(1); if (!isConstantOrConstantVector(CT, true) && - !isConstantFPBuildVectorOrConstantFP(CT)) + !DAG.isConstantFPBuildVectorOrConstantFP(CT)) return SDValue(); SDValue CF = Sel.getOperand(2); if (!isConstantOrConstantVector(CF, true) && - !isConstantFPBuildVectorOrConstantFP(CF)) + !DAG.isConstantFPBuildVectorOrConstantFP(CF)) return SDValue(); // Bail out if any constants are opaque because we can't constant fold those. @@ -2043,19 +2137,10 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { SDValue CBO = BO->getOperand(SelOpNo ^ 1); if (!CanFoldNonConst && !isConstantOrConstantVector(CBO, true) && - !isConstantFPBuildVectorOrConstantFP(CBO)) + !DAG.isConstantFPBuildVectorOrConstantFP(CBO)) return SDValue(); - EVT VT = Sel.getValueType(); - - // In case of shift value and shift amount may have different VT. For instance - // on x86 shift amount is i8 regardles of LHS type. Bail out if we have - // swapped operands and value types do not match. NB: x86 is fine if operands - // are not swapped with shift amount VT being not bigger than shifted value. - // TODO: that is possible to check for a shift operation, correct VTs and - // still perform optimization on x86 if needed. - if (SelOpNo && VT != CBO.getValueType()) - return SDValue(); + EVT VT = BO->getValueType(0); // We have a select-of-constants followed by a binary operator with a // constant. Eliminate the binop by pulling the constant math into the select. @@ -2065,14 +2150,14 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { : DAG.getNode(BinOpcode, DL, VT, CT, CBO); if (!CanFoldNonConst && !NewCT.isUndef() && !isConstantOrConstantVector(NewCT, true) && - !isConstantFPBuildVectorOrConstantFP(NewCT)) + !DAG.isConstantFPBuildVectorOrConstantFP(NewCT)) return SDValue(); SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) : DAG.getNode(BinOpcode, DL, VT, CF, CBO); if (!CanFoldNonConst && !NewCF.isUndef() && !isConstantOrConstantVector(NewCF, true) && - !isConstantFPBuildVectorOrConstantFP(NewCF)) + !DAG.isConstantFPBuildVectorOrConstantFP(NewCF)) return SDValue(); SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); @@ -2402,8 +2487,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) { - APInt C0 = N0->getConstantOperandAPInt(0); - APInt C1 = N1->getConstantOperandAPInt(0); + const APInt &C0 = N0->getConstantOperandAPInt(0); + const APInt &C1 = N1->getConstantOperandAPInt(0); return DAG.getVScale(DL, VT, C0 + C1); } @@ -2411,9 +2496,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if ((N0.getOpcode() == ISD::ADD) && (N0.getOperand(1).getOpcode() == ISD::VSCALE) && (N1.getOpcode() == ISD::VSCALE)) { - auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0); - auto VS1 = N1->getConstantOperandAPInt(0); - auto VS = DAG.getVScale(DL, VT, VS0 + VS1); + const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0); + const APInt &VS1 = N1->getConstantOperandAPInt(0); + SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1); return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS); } @@ -2631,36 +2716,18 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { return SDValue(); } -static SDValue flipBoolean(SDValue V, const SDLoc &DL, - SelectionDAG &DAG, const TargetLowering &TLI) { - EVT VT = V.getValueType(); - - SDValue Cst; - switch (TLI.getBooleanContents(VT)) { - case TargetLowering::ZeroOrOneBooleanContent: - case TargetLowering::UndefinedBooleanContent: - Cst = DAG.getConstant(1, DL, VT); - break; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - Cst = DAG.getAllOnesConstant(DL, VT); - break; - } - - return DAG.getNode(ISD::XOR, DL, VT, V, Cst); -} - /** * Flips a boolean if it is cheaper to compute. If the Force parameters is set, * then the flip also occurs if computing the inverse is the same cost. * This function returns an empty SDValue in case it cannot flip the boolean * without increasing the cost of the computation. If you want to flip a boolean - * no matter what, use flipBoolean. + * no matter what, use DAG.getLogicalNOT. */ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force) { if (Force && isa<ConstantSDNode>(V)) - return flipBoolean(V, SDLoc(V), DAG, TLI); + return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType()); if (V.getOpcode() != ISD::XOR) return SDValue(); @@ -2687,7 +2754,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, if (IsFlip) return V.getOperand(0); if (Force) - return flipBoolean(V, SDLoc(V), DAG, TLI); + return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType()); return SDValue(); } @@ -2724,8 +2791,8 @@ SDValue DAGCombiner::visitADDO(SDNode *N) { if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) { SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), DAG.getConstant(0, DL, VT), N0.getOperand(0)); - return CombineTo(N, Sub, - flipBoolean(Sub.getValue(1), DL, DAG, TLI)); + return CombineTo( + N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1))); } if (SDValue Combined = visitUADDOLike(N0, N1, N)) @@ -2820,6 +2887,28 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + SDLoc DL(N); + + // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn); + + // fold (saddo_carry x, y, false) -> (saddo x, y) + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0))) + return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1); + } + + return SDValue(); +} + /** * If we are facing some sort of diamond carry propapagtion pattern try to * break it up to generate something like: @@ -3005,8 +3094,8 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDLoc DL(N); SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1, N0.getOperand(0), NotC); - return CombineTo(N, Sub, - flipBoolean(Sub.getValue(1), DL, DAG, TLI)); + return CombineTo( + N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1))); } // Iff the flag result is dead: @@ -3111,6 +3200,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // 0 - X --> X if X is 0 or the minimum signed value. return N1; } + + // Convert 0 - abs(x). + SDValue Result; + if (N1->getOpcode() == ISD::ABS && + !TLI.isOperationLegalOrCustom(ISD::ABS, VT) && + TLI.expandABS(N1.getNode(), Result, DAG, true)) + return Result; } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) @@ -3306,12 +3402,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); SDValue S0 = N1.getOperand(0); - if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) { - unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) - if (C->getAPIntValue() == (OpSizeInBits - 1)) + if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1)) return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); - } } } @@ -3342,7 +3436,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) if (N1.getOpcode() == ISD::VSCALE) { - APInt IntVal = N1.getConstantOperandAPInt(0); + const APInt &IntVal = N1.getConstantOperandAPInt(0); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal)); } @@ -3501,6 +3595,21 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (ssubo_carry x, y, false) -> (ssubo x, y) + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0))) + return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1); + } + + return SDValue(); +} + // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and // UMULFIXSAT here. SDValue DAGCombiner::visitMULFIX(SDNode *N) { @@ -3606,19 +3715,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { getShiftAmountTy(N0.getValueType())))); } - // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub. + // Try to transform: + // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub. // mul x, (2^N + 1) --> add (shl x, N), x // mul x, (2^N - 1) --> sub (shl x, N), x // Examples: x * 33 --> (x << 5) + x // x * 15 --> (x << 4) - x // x * -33 --> -((x << 5) + x) // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) + // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub. + // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M)) + // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M)) + // Examples: x * 0x8800 --> (x << 15) + (x << 11) + // x * 0xf800 --> (x << 16) - (x << 11) + // x * -0x8800 --> -((x << 15) + (x << 11)) + // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16) if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). unsigned MathOp = ISD::DELETED_NODE; APInt MulC = ConstValue1.abs(); + // The constant `2` should be treated as (2^0 + 1). + unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros(); + MulC.lshrInPlace(TZeros); if ((MulC - 1).isPowerOf2()) MathOp = ISD::ADD; else if ((MulC + 1).isPowerOf2()) @@ -3627,12 +3747,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (MathOp != ISD::DELETED_NODE) { unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2(); + ShAmt += TZeros; assert(ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"); SDLoc DL(N); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT)); - SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0); + SDValue R = + TZeros ? DAG.getNode(MathOp, DL, VT, Shl, + DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(TZeros, DL, VT))) + : DAG.getNode(MathOp, DL, VT, Shl, N0); if (ConstValue1.isNegative()) R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R); return R; @@ -3684,11 +3809,42 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). if (N0.getOpcode() == ISD::VSCALE) if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) { - APInt C0 = N0.getConstantOperandAPInt(0); - APInt C1 = NC1->getAPIntValue(); + const APInt &C0 = N0.getConstantOperandAPInt(0); + const APInt &C1 = NC1->getAPIntValue(); return DAG.getVScale(SDLoc(N), VT, C0 * C1); } + // Fold ((mul x, 0/undef) -> 0, + // (mul x, 1) -> x) -> x) + // -> and(x, mask) + // We can replace vectors with '0' and '1' factors with a clearing mask. + if (VT.isFixedLengthVector()) { + unsigned NumElts = VT.getVectorNumElements(); + SmallBitVector ClearMask; + ClearMask.reserve(NumElts); + auto IsClearMask = [&ClearMask](ConstantSDNode *V) { + if (!V || V->isNullValue()) { + ClearMask.push_back(true); + return true; + } + ClearMask.push_back(false); + return V->isOne(); + }; + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) && + ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) { + assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector"); + SDLoc DL(N); + EVT LegalSVT = N1.getOperand(0).getValueType(); + SDValue Zero = DAG.getConstant(0, DL, LegalSVT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT); + SmallVector<SDValue, 16> Mask(NumElts, AllOnes); + for (unsigned I = 0; I != NumElts; ++I) + if (ClearMask[I]) + Mask[I] = Zero; + return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask)); + } + } + // reassociate mul if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) return RMUL; @@ -4108,9 +4264,9 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UREM, DL, VT, N0, N1); } else { - SDValue NegOne = DAG.getAllOnesConstant(DL, VT); if (DAG.isKnownToBeAPowerOfTwo(N1)) { // fold (urem x, pow2) -> (and x, pow2-1) + SDValue NegOne = DAG.getAllOnesConstant(DL, VT); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); @@ -4118,6 +4274,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (N1.getOpcode() == ISD::SHL && DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + SDValue NegOne = DAG.getAllOnesConstant(DL, VT); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); @@ -4186,7 +4343,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. - if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) { + if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() && + !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); @@ -4242,7 +4400,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. - if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) { + if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() && + !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); @@ -4448,6 +4607,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); } + // Simplify the operands using demanded-bits information. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -4916,8 +5079,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, if (!LDST->isSimple()) return false; + EVT LdStMemVT = LDST->getMemoryVT(); + + // Bail out when changing the scalable property, since we can't be sure that + // we're actually narrowing here. + if (LdStMemVT.isScalableVector() != MemVT.isScalableVector()) + return false; + // Verify that we are actually reducing a load width here. - if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) + if (LdStMemVT.bitsLT(MemVT)) return false; // Ensure that this isn't going to produce an unsupported memory access. @@ -5272,6 +5442,31 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return N1; if (ISD::isBuildVectorAllOnes(N1.getNode())) return N0; + + // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load + auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0); + auto *BVec = dyn_cast<BuildVectorSDNode>(N1); + if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD && + N0.hasOneUse() && N1.hasOneUse()) { + EVT LoadVT = MLoad->getMemoryVT(); + EVT ExtVT = VT; + if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) { + // For this AND to be a zero extension of the masked load the elements + // of the BuildVec must mask the bottom bits of the extended element + // type + if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) { + uint64_t ElementSize = + LoadVT.getVectorElementType().getScalarSizeInBits(); + if (Splat->getAPIntValue().isMask(ElementSize)) { + return DAG.getMaskedLoad( + ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), + MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), + LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), + ISD::ZEXTLOAD, MLoad->isExpandingLoad()); + } + } + } + } } // fold (and c1, c2) -> c1&c2 @@ -5440,6 +5635,28 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // fold (and (masked_gather x)) -> (zext_masked_gather x) + if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) { + EVT MemVT = GN0->getMemoryVT(); + EVT ScalarVT = MemVT.getScalarType(); + + if (SDValue(GN0, 0).hasOneUse() && + isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) && + TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { + SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), + GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; + + SDValue ZExtLoad = DAG.getMaskedGather( + DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops, + GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD); + + CombineTo(N, ZExtLoad); + AddToWorklist(ZExtLoad.getNode()); + // Avoid recheck of N. + return SDValue(N, 0); + } + } + // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) @@ -5534,6 +5751,31 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) return V; + // Recognize the following pattern: + // + // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask) + // + // where bitmask is a mask that clears the upper bits of AndVT. The + // number of bits in bitmask must be a power of two. + auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) { + if (LHS->getOpcode() != ISD::SIGN_EXTEND) + return false; + + auto *C = dyn_cast<ConstantSDNode>(RHS); + if (!C) + return false; + + if (!C->getAPIntValue().isMask( + LHS.getOperand(0).getValueType().getFixedSizeInBits())) + return false; + + return true; + }; + + // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...). + if (IsAndZeroExtMask(N0, N1)) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); + return SDValue(); } @@ -6782,11 +7024,11 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, return None; } -static unsigned LittleEndianByteAt(unsigned BW, unsigned i) { +static unsigned littleEndianByteAt(unsigned BW, unsigned i) { return i; } -static unsigned BigEndianByteAt(unsigned BW, unsigned i) { +static unsigned bigEndianByteAt(unsigned BW, unsigned i) { return BW - i - 1; } @@ -6803,8 +7045,8 @@ static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, bool BigEndian = true, LittleEndian = true; for (unsigned i = 0; i < Width; i++) { int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; - LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i); - BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i); + LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i); + BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i); if (!BigEndian && !LittleEndian) return None; } @@ -6847,80 +7089,90 @@ static SDValue stripTruncAndExt(SDValue Value) { /// p[3] = (val >> 0) & 0xFF; /// => /// *((i32)p) = BSWAP(val); -SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { +SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { + // The matching looks for "store (trunc x)" patterns that appear early but are + // likely to be replaced by truncating store nodes during combining. + // TODO: If there is evidence that running this later would help, this + // limitation could be removed. Legality checks may need to be added + // for the created store and optional bswap/rotate. + if (LegalOperations) + return SDValue(); + // Collect all the stores in the chain. SDValue Chain; SmallVector<StoreSDNode *, 8> Stores; for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) { // TODO: Allow unordered atomics when wider type is legal (see D66309) - if (Store->getMemoryVT() != MVT::i8 || + EVT MemVT = Store->getMemoryVT(); + if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || !Store->isSimple() || Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); } - // Handle the simple type only. - unsigned Width = Stores.size(); - EVT VT = EVT::getIntegerVT( - *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits()); - if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) + // There is no reason to continue if we do not have at least a pair of stores. + if (Stores.size() < 2) return SDValue(); - if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT)) + // Handle simple types only. + LLVMContext &Context = *DAG.getContext(); + unsigned NumStores = Stores.size(); + unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits(); + unsigned WideNumBits = NumStores * NarrowNumBits; + EVT WideVT = EVT::getIntegerVT(Context, WideNumBits); + if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64) return SDValue(); - // Check if all the bytes of the combined value we are looking at are stored - // to the same base address. Collect bytes offsets from Base address into - // ByteOffsets. - SDValue CombinedValue; - SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX); + // Check if all bytes of the source value that we are looking at are stored + // to the same base address. Collect offsets from Base address into OffsetMap. + SDValue SourceValue; + SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX); int64_t FirstOffset = INT64_MAX; StoreSDNode *FirstStore = nullptr; Optional<BaseIndexOffset> Base; for (auto Store : Stores) { - // All the stores store different byte of the CombinedValue. A truncate is - // required to get that byte value. + // All the stores store different parts of the CombinedValue. A truncate is + // required to get the partial value. SDValue Trunc = Store->getValue(); if (Trunc.getOpcode() != ISD::TRUNCATE) return SDValue(); - // A shift operation is required to get the right byte offset, except the - // first byte. + // Other than the first/last part, a shift operation is required to get the + // offset. int64_t Offset = 0; - SDValue Value = Trunc.getOperand(0); - if (Value.getOpcode() == ISD::SRL || - Value.getOpcode() == ISD::SRA) { - auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1)); - // Trying to match the following pattern. The shift offset must be - // a constant and a multiple of 8. It is the byte offset in "y". + SDValue WideVal = Trunc.getOperand(0); + if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) && + isa<ConstantSDNode>(WideVal.getOperand(1))) { + // The shift amount must be a constant multiple of the narrow type. + // It is translated to the offset address in the wide source value "y". // - // x = srl y, offset + // x = srl y, ShiftAmtC // i8 z = trunc x // store z, ... - if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8)) + uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1); + if (ShiftAmtC % NarrowNumBits != 0) return SDValue(); - Offset = ShiftOffset->getSExtValue()/8; - Value = Value.getOperand(0); + Offset = ShiftAmtC / NarrowNumBits; + WideVal = WideVal.getOperand(0); } - // Stores must share the same combined value with different offsets. - if (!CombinedValue) - CombinedValue = Value; - else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value)) + // Stores must share the same source value with different offsets. + // Truncate and extends should be stripped to get the single source value. + if (!SourceValue) + SourceValue = WideVal; + else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal)) return SDValue(); - - // The trunc and all the extend operation should be stripped to get the - // real value we are stored. - else if (CombinedValue.getValueType() != VT) { - if (Value.getValueType() == VT || - Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits()) - CombinedValue = Value; - // Give up if the combined value type is smaller than the store size. - if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits()) + else if (SourceValue.getValueType() != WideVT) { + if (WideVal.getValueType() == WideVT || + WideVal.getScalarValueSizeInBits() > + SourceValue.getScalarValueSizeInBits()) + SourceValue = WideVal; + // Give up if the source value type is smaller than the store size. + if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits()) return SDValue(); } - // Stores must share the same base address + // Stores must share the same base address. BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG); int64_t ByteOffsetFromBase = 0; if (!Base) @@ -6928,60 +7180,78 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) return SDValue(); - // Remember the first byte store + // Remember the first store. if (ByteOffsetFromBase < FirstOffset) { FirstStore = Store; FirstOffset = ByteOffsetFromBase; } // Map the offset in the store and the offset in the combined value, and // early return if it has been set before. - if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX) + if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX) return SDValue(); - ByteOffsets[Offset] = ByteOffsetFromBase; + OffsetMap[Offset] = ByteOffsetFromBase; } assert(FirstOffset != INT64_MAX && "First byte offset must be set"); assert(FirstStore && "First store must be set"); - // Check if the bytes of the combined value we are looking at match with - // either big or little endian value store. - Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); - if (!IsBigEndian.hasValue()) - return SDValue(); - - // The node we are looking at matches with the pattern, check if we can - // replace it with a single bswap if needed and store. - - // If the store needs byte swap check if the target supports it - bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian; - - // Before legalize we can introduce illegal bswaps which will be later - // converted to an explicit bswap sequence. This way we end up with a single - // store and byte shuffling instead of several stores and byte shuffling. - if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) - return SDValue(); - // Check that a store of the wide type is both allowed and fast on the target + const DataLayout &Layout = DAG.getDataLayout(); bool Fast = false; - bool Allowed = - TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - *FirstStore->getMemOperand(), &Fast); + bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT, + *FirstStore->getMemOperand(), &Fast); if (!Allowed || !Fast) return SDValue(); - if (VT != CombinedValue.getValueType()) { - assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() && - "Get unexpected store value to combine"); - CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, - CombinedValue); + // Check if the pieces of the value are going to the expected places in memory + // to merge the stores. + auto checkOffsets = [&](bool MatchLittleEndian) { + if (MatchLittleEndian) { + for (unsigned i = 0; i != NumStores; ++i) + if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset) + return false; + } else { // MatchBigEndian by reversing loop counter. + for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j) + if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset) + return false; + } + return true; + }; + + // Check if the offsets line up for the native data layout of this target. + bool NeedBswap = false; + bool NeedRotate = false; + if (!checkOffsets(Layout.isLittleEndian())) { + // Special-case: check if byte offsets line up for the opposite endian. + if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian())) + NeedBswap = true; + else if (NumStores == 2 && checkOffsets(Layout.isBigEndian())) + NeedRotate = true; + else + return SDValue(); + } + + SDLoc DL(N); + if (WideVT != SourceValue.getValueType()) { + assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && + "Unexpected store value to merge"); + SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue); } - if (NeedsBswap) - CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue); + // Before legalize we can introduce illegal bswaps/rotates which will be later + // converted to an explicit bswap sequence. This way we end up with a single + // store and byte shuffling instead of several stores and byte shuffling. + if (NeedBswap) { + SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue); + } else if (NeedRotate) { + assert(WideNumBits % 2 == 0 && "Unexpected type for rotate"); + SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT); + SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt); + } SDValue NewStore = - DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(), - FirstStore->getPointerInfo(), FirstStore->getAlignment()); + DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(), + FirstStore->getPointerInfo(), FirstStore->getAlign()); // Rely on other DAG combine rules to remove the other individual stores. DAG.ReplaceAllUsesWith(N, NewStore.getNode()); @@ -7036,8 +7306,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { "can only analyze providers for individual bytes not bit"); unsigned LoadByteWidth = LoadBitWidth / 8; return IsBigEndianTarget - ? BigEndianByteAt(LoadByteWidth, P.ByteOffset) - : LittleEndianByteAt(LoadByteWidth, P.ByteOffset); + ? bigEndianByteAt(LoadByteWidth, P.ByteOffset) + : littleEndianByteAt(LoadByteWidth, P.ByteOffset); }; Optional<BaseIndexOffset> Base; @@ -7164,10 +7434,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { if (!Allowed || !Fast) return SDValue(); - SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, - SDLoc(N), VT, Chain, FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), MemVT, - FirstLoad->getAlignment()); + SDValue NewLoad = + DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT, + Chain, FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign()); // Transfer chain users from old loads to the new load. for (LoadSDNode *L : Loads) @@ -7337,9 +7607,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0.hasOneUse()) { // FIXME Can we handle multiple uses? Could we token factor the chain // results from the new/old setcc? - SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, - N0.getOperand(0), - N0Opcode == ISD::STRICT_FSETCCS); + SDValue SetCC = + DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, + N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS); CombineTo(N, SetCC); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1)); recursivelyDeleteUnusedNodes(N0.getNode()); @@ -7440,12 +7710,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); SDValue S0 = S.getOperand(0); - if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { - unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) - if (C->getAPIntValue() == (OpSizeInBits - 1)) + if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1)) return DAG.getNode(ISD::ABS, DL, VT, S0); - } } } @@ -7980,10 +8248,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)). if (N0.getOpcode() == ISD::VSCALE) if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) { - auto DL = SDLoc(N); - APInt C0 = N0.getConstantOperandAPInt(0); - APInt C1 = NC1->getAPIntValue(); - return DAG.getVScale(DL, VT, C0 << C1); + const APInt &C0 = N0.getConstantOperandAPInt(0); + const APInt &C1 = NC1->getAPIntValue(); + return DAG.getVScale(SDLoc(N), VT, C0 << C1); } return SDValue(); @@ -8032,12 +8299,6 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, if (NarrowVT != RightOp.getOperand(0).getValueType()) return SDValue(); - // Only transform into mulh if mulh for the narrow type is cheaper than - // a multiply followed by a shift. This should also check if mulh is - // legal for NarrowVT on the target. - if (!TLI.isMulhCheaperThanMulShift(NarrowVT)) - return SDValue(); - // Proceed with the transformation if the wide type is twice as large // as the narrow type. unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); @@ -8055,6 +8316,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, // we use mulhs. Othewise, zero extends (zext) use mulhu. unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU; + // Combine to mulh if mulh is legal/custom for the narrow type on the target. + if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT)) + return SDValue(); + SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), RightOp.getOperand(0)); return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1) @@ -8556,8 +8821,8 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { RHS->getAddressSpace(), NewAlign, RHS->getMemOperand()->getFlags(), &Fast) && Fast) { - SDValue NewPtr = - DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL); + SDValue NewPtr = DAG.getMemBasePlusOffset( + RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL); AddToWorklist(NewPtr.getNode()); SDValue Load = DAG.getLoad( VT, DL, RHS->getChain(), NewPtr, @@ -9154,16 +9419,75 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } +bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { + if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD) + return false; + + // For now we check only the LHS of the add. + SDValue LHS = Index.getOperand(0); + SDValue SplatVal = DAG.getSplatValue(LHS); + if (!SplatVal) + return false; + + BasePtr = SplatVal; + Index = Index.getOperand(1); + return true; +} + +// Fold sext/zext of index into index type. +bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index, + bool Scaled, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (Index.getOpcode() == ISD::ZERO_EXTEND) { + SDValue Op = Index.getOperand(0); + MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED); + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + Index = Op; + return true; + } + } + + if (Index.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Op = Index.getOperand(0); + MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED); + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + Index = Op; + return true; + } + } + + return false; +} + SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue Mask = MSC->getMask(); SDValue Chain = MSC->getChain(); + SDValue Index = MSC->getIndex(); + SDValue Scale = MSC->getScale(); + SDValue StoreVal = MSC->getValue(); + SDValue BasePtr = MSC->getBasePtr(); SDLoc DL(N); // Zap scatters with a zero mask. if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; + if (refineUniformBase(BasePtr, Index, DAG)) { + SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedScatter( + DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops, + MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); + } + + if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) { + SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedScatter( + DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops, + MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); + } + return SDValue(); } @@ -9177,6 +9501,14 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; + // If this is a masked load with an all ones mask, we can use a unmasked load. + // FIXME: Can we do this for indexed, compressing, or truncating stores? + if (ISD::isBuildVectorAllOnes(Mask.getNode()) && + MST->isUnindexed() && !MST->isCompressingStore() && + !MST->isTruncatingStore()) + return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), + MST->getBasePtr(), MST->getMemOperand()); + // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); @@ -9187,11 +9519,32 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { SDValue DAGCombiner::visitMGATHER(SDNode *N) { MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); SDValue Mask = MGT->getMask(); + SDValue Chain = MGT->getChain(); + SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); + SDValue PassThru = MGT->getPassThru(); + SDValue BasePtr = MGT->getBasePtr(); SDLoc DL(N); // Zap gathers with a zero mask. if (ISD::isBuildVectorAllZeros(Mask.getNode())) - return CombineTo(N, MGT->getPassThru(), MGT->getChain()); + return CombineTo(N, PassThru, MGT->getChain()); + + if (refineUniformBase(BasePtr, Index, DAG)) { + SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), + PassThru.getValueType(), DL, Ops, + MGT->getMemOperand(), MGT->getIndexType(), + MGT->getExtensionType()); + } + + if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) { + SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), + PassThru.getValueType(), DL, Ops, + MGT->getMemOperand(), MGT->getIndexType(), + MGT->getExtensionType()); + } return SDValue(); } @@ -9205,6 +9558,16 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); + // If this is a masked load with an all ones mask, we can use a unmasked load. + // FIXME: Can we do this for indexed, expanding, or extending loads? + if (ISD::isBuildVectorAllOnes(Mask.getNode()) && + MLD->isUnindexed() && !MLD->isExpandingLoad() && + MLD->getExtensionType() == ISD::NON_EXTLOAD) { + SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(), + MLD->getBasePtr(), MLD->getMemOperand()); + return CombineTo(N, NewLd, NewLd.getValue(1)); + } + // Try transforming N to an indexed load. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); @@ -9364,6 +9727,113 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); } } + + // Match VSELECTs into add with unsigned saturation. + if (hasOperation(ISD::UADDSAT, VT)) { + // Check if one of the arms of the VSELECT is vector with all bits set. + // If it's on the left side invert the predicate to simplify logic below. + SDValue Other; + ISD::CondCode SatCC = CC; + if (ISD::isBuildVectorAllOnes(N1.getNode())) { + Other = N2; + SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType()); + } else if (ISD::isBuildVectorAllOnes(N2.getNode())) { + Other = N1; + } + + if (Other && Other.getOpcode() == ISD::ADD) { + SDValue CondLHS = LHS, CondRHS = RHS; + SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); + + // Canonicalize condition operands. + if (SatCC == ISD::SETUGE) { + std::swap(CondLHS, CondRHS); + SatCC = ISD::SETULE; + } + + // We can test against either of the addition operands. + // x <= x+y ? x+y : ~0 --> uaddsat x, y + // x+y >= x ? x+y : ~0 --> uaddsat x, y + if (SatCC == ISD::SETULE && Other == CondRHS && + (OpLHS == CondLHS || OpRHS == CondLHS)) + return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS); + + if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) && + CondLHS == OpLHS) { + // If the RHS is a constant we have to reverse the const + // canonicalization. + // x >= ~C ? x+C : ~0 --> uaddsat x, C + auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { + return Cond->getAPIntValue() == ~Op->getAPIntValue(); + }; + if (SatCC == ISD::SETULE && + ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT)) + return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS); + } + } + } + + // Match VSELECTs into sub with unsigned saturation. + if (hasOperation(ISD::USUBSAT, VT)) { + // Check if one of the arms of the VSELECT is a zero vector. If it's on + // the left side invert the predicate to simplify logic below. + SDValue Other; + ISD::CondCode SatCC = CC; + if (ISD::isBuildVectorAllZeros(N1.getNode())) { + Other = N2; + SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType()); + } else if (ISD::isBuildVectorAllZeros(N2.getNode())) { + Other = N1; + } + + if (Other && Other.getNumOperands() == 2 && Other.getOperand(0) == LHS) { + SDValue CondRHS = RHS; + SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); + + // Look for a general sub with unsigned saturation first. + // x >= y ? x-y : 0 --> usubsat x, y + // x > y ? x-y : 0 --> usubsat x, y + if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) && + Other.getOpcode() == ISD::SUB && OpRHS == CondRHS) + return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); + + if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) { + if (isa<BuildVectorSDNode>(CondRHS)) { + // If the RHS is a constant we have to reverse the const + // canonicalization. + // x > C-1 ? x+-C : 0 --> usubsat x, C + auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { + return (!Op && !Cond) || + (Op && Cond && + Cond->getAPIntValue() == (-Op->getAPIntValue() - 1)); + }; + if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD && + ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT, + /*AllowUndefs*/ true)) { + OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + OpRHS); + return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); + } + + // Another special case: If C was a sign bit, the sub has been + // canonicalized into a xor. + // FIXME: Would it be better to use computeKnownBits to determine + // whether it's safe to decanonicalize the xor? + // x s< 0 ? x^C : 0 --> usubsat x, C + if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) { + if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR && + ISD::isBuildVectorAllZeros(CondRHS.getNode()) && + OpRHSConst->getAPIntValue().isSignMask()) { + // Note that we have to rebuild the RHS constant here to ensure + // we don't rely on particular values of undef lanes. + OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT); + return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); + } + } + } + } + } + } } if (SimplifySelectOps(N, N1, N2)) @@ -9722,14 +10192,14 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue BasePtr = LN0->getBasePtr(); for (unsigned Idx = 0; Idx < NumSplits; Idx++) { const unsigned Offset = Idx * Stride; - const unsigned Align = MinAlign(LN0->getAlignment(), Offset); + const Align Align = commonAlignment(LN0->getAlign(), Offset); SDValue SplitLoad = DAG.getExtLoad( ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); - BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL); + BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL); Loads.push_back(SplitLoad.getValue(0)); Chains.push_back(SplitLoad.getValue(1)); @@ -10146,7 +10616,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - EVT N00VT = N0.getOperand(0).getValueType(); + EVT N00VT = N00.getValueType(); // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. @@ -10240,6 +10710,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } + // fold sext (not i1 X) -> add (zext i1 X), -1 + // TODO: This could be extended to handle bool vectors. + if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() && + (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegal(ISD::ADD, VT)))) { + // If we can eliminate the 'not', the sext form should be better + if (SDValue NewXor = visitXOR(N0.getNode())) { + // Returning N0 is a form of in-visit replacement that may have + // invalidated N0. + if (NewXor.getNode() == N0.getNode()) { + // Return SDValue here as the xor should have already been replaced in + // this sext. + return SDValue(); + } else { + // Return a new sext with the new xor. + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor); + } + } + + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); + } + return SDValue(); } @@ -10507,13 +11000,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueType()); } - // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc) SDLoc DL(N); + EVT N0VT = N0.getValueType(); + EVT N00VT = N0.getOperand(0).getValueType(); if (SDValue SCC = SimplifySelectCC( - DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), - DAG.getConstant(0, DL, VT), + DL, N0.getOperand(0), N0.getOperand(1), + DAG.getBoolConstant(true, DL, N0VT, N00VT), + DAG.getBoolConstant(false, DL, N0VT, N00VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) - return SCC; + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC); } // (zext (shl (zext x), cst)) -> (shl (zext x), cst) @@ -10602,22 +11098,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (load x)) -> (aext (truncate (extload x))) // None of the supported targets knows how to perform load and any_ext - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + // on vectors in one instruction, so attempt to fold to zext instead. + if (VT.isVector()) { + // Try to simplify (zext (load x)). + if (SDValue foldedExt = + tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, + ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) + return foldedExt; + } else if (ISD::isNON_EXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; - SmallVector<SDNode*, 4> SetCCs; + SmallVector<SDNode *, 4> SetCCs; if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, - TLI); + DoXform = + ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, - LN0->getChain(), - LN0->getBasePtr(), N0.getValueType(), - LN0->getMemOperand()); + LN0->getChain(), LN0->getBasePtr(), + N0.getValueType(), LN0->getMemOperand()); ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); // If the load value is used only by N, replace it via CombineTo N. bool NoReplaceTrunc = N0.hasOneUse(); @@ -10626,8 +11126,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); recursivelyDeleteUnusedNodes(LN0); } else { - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad); + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(LN0, Trunc, ExtLoad.getValue(1)); } return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -10832,12 +11332,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); uint64_t ShiftAmt = N01->getZExtValue(); - uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits(); + uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits(); if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt) ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt); else ExtVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits() - ShiftAmt); + VT.getScalarSizeInBits() - ShiftAmt); } else if (Opc == ISD::AND) { // An AND with a constant mask is the same as a truncate + zero-extend. auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); @@ -10864,12 +11364,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue SRL = N0; if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { ShAmt = ConstShift->getZExtValue(); - unsigned EVTBits = ExtVT.getSizeInBits(); + unsigned EVTBits = ExtVT.getScalarSizeInBits(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); // Is the load width a multiple of size of VT? - if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0) + if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0) return SDValue(); } @@ -10899,7 +11399,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countTrailingOnes()); // If the mask is smaller, recompute the type. - if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) && + if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) && TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) ExtVT = MaskedVT; } @@ -10930,8 +11430,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); auto AdjustBigEndianShift = [&](unsigned ShAmt) { - unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); - unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); + unsigned LVTStoreBits = + LN0->getMemoryVT().getStoreSizeInBits().getFixedSize(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize(); return LVTStoreBits - EVTStoreBits - ShAmt; }; @@ -10941,13 +11442,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ShAmt = AdjustBigEndianShift(ShAmt); uint64_t PtrOff = ShAmt / 8; - unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff); SDLoc DL(LN0); // The original load itself didn't wrap, so an offset within it doesn't. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); - SDValue NewPtr = - DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags); + SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(), + TypeSize::Fixed(PtrOff), DL, Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -10969,13 +11470,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Result = Load; if (ShLeftAmt != 0) { EVT ShImmTy = getShiftAmountTy(Result.getValueType()); - if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) + if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt)) ShImmTy = VT; // If the shift amount is as large as the result size (but, presumably, // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. - if (ShLeftAmt >= VT.getSizeInBits()) + if (ShLeftAmt >= VT.getScalarSizeInBits()) Result = DAG.getConstant(0, DL, VT); else Result = DAG.getNode(ISD::SHL, DL, VT, @@ -11125,6 +11626,41 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } + // fold (sext_inreg (masked_load x)) -> (sext_masked_load x) + // ignore it if the masked load is already sign extended + if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) { + if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() && + Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD && + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) { + SDValue ExtMaskedLoad = DAG.getMaskedLoad( + VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), + Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(), + Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad()); + CombineTo(N, ExtMaskedLoad); + CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x) + if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) { + if (SDValue(GN0, 0).hasOneUse() && + ExtVT == GN0->getMemoryVT() && + TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { + SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), + GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; + + SDValue ExtLoad = DAG.getMaskedGather( + DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops, + GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD); + + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + AddToWorklist(ExtLoad.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) { if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), @@ -11225,10 +11761,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { EVT ExTy = N0.getValueType(); EVT TrTy = N->getValueType(0); - unsigned NumElem = VecTy.getVectorNumElements(); + auto EltCnt = VecTy.getVectorElementCount(); unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); + auto NewEltCnt = EltCnt * SizeRatio; - EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt); assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); SDValue EltNo = N0->getOperand(1); @@ -11342,8 +11879,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (LN0->isSimple() && - LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { + if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) { SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getMemoryVT(), @@ -11372,9 +11908,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Stop if more than one members are non-undef. if (NumDefs > 1) break; + VTs.push_back(EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - X.getValueType().getVectorNumElements())); + X.getValueType().getVectorElementCount())); } if (NumDefs == 0) @@ -11415,8 +11952,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } // Simplify the operands using demanded-bits information. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) @@ -11643,7 +12179,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { *LN0->getMemOperand())) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getAlignment(), + LN0->getPointerInfo(), LN0->getAlign(), LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; @@ -11990,7 +12526,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); - SDNodeFlags Flags = N->getFlags(); bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool CanReassociate = Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); @@ -12023,15 +12558,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), N1, Flags); + return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), + N0.getOperand(1), N1); } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N1.getOperand(0), N1.getOperand(1), N0, Flags); + return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), + N1.getOperand(1), N0); } // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E) @@ -12054,8 +12589,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDValue B = FMA.getOperand(1); SDValue C = FMA.getOperand(2).getOperand(0); SDValue D = FMA.getOperand(2).getOperand(1); - SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags); - return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags); + SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E); + return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE); } // Look through FP_EXTEND nodes to do more combining. @@ -12067,10 +12602,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), N1, Flags); + DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), + N1); } } @@ -12082,10 +12616,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N10.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), N0, Flags); + DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), + N0); } } @@ -12093,14 +12626,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (Aggressive) { // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) - auto FoldFAddFMAFPExtFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, - SDNodeFlags Flags) { + auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V, + SDValue Z) { return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, U), DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z, Flags), Flags); + Z)); }; if (N0.getOpcode() == PreferredFusedOpcode) { SDValue N02 = N0.getOperand(2); @@ -12111,7 +12643,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N020.getValueType())) { return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), N020.getOperand(0), N020.getOperand(1), - N1, Flags); + N1); } } } @@ -12121,16 +12653,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. - auto FoldFAddFPExtFMAFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, - SDNodeFlags Flags) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, X), - DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, U), - DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z, Flags), Flags); + auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V, + SDValue Z) { + return DAG.getNode( + PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X), + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z)); }; if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); @@ -12141,7 +12671,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N00.getValueType())) { return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), N002.getOperand(0), N002.getOperand(1), - N1, Flags); + N1); } } } @@ -12157,7 +12687,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N120.getValueType())) { return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), N120.getOperand(0), N120.getOperand(1), - N0, Flags); + N0); } } } @@ -12176,7 +12706,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N10.getValueType())) { return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), N102.getOperand(0), N102.getOperand(1), - N0, Flags); + N0); } } } @@ -12234,8 +12764,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) { if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0), - XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z), - Flags); + XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z)); } return SDValue(); }; @@ -12246,7 +12775,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)), - YZ.getOperand(1), X, Flags); + YZ.getOperand(1), X); } return SDValue(); }; @@ -12277,7 +12806,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N00), N01, - DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); + DAG.getNode(ISD::FNEG, SL, VT, N1)); } // Look through FP_EXTEND nodes to do more combining. @@ -12290,11 +12819,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); + DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1)); } } @@ -12306,13 +12833,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (isContractableFMUL(N10) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N10.getValueType())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), - N0, Flags); + return DAG.getNode( + PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0); } } @@ -12329,13 +12854,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (isContractableFMUL(N000) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { - return DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(1)), - N1, Flags)); + return DAG.getNode( + ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), + N1)); } } } @@ -12353,13 +12877,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (isContractableFMUL(N000) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N000.getValueType())) { - return DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(1)), - N1, Flags)); + return DAG.getNode( + ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), + N1)); } } } @@ -12371,13 +12894,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), + return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), + N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), - DAG.getNode(ISD::FNEG, SL, VT, - N1), Flags), Flags); + DAG.getNode(ISD::FNEG, SL, VT, N1))); } // fold (fsub x, (fma y, z, (fmul u, v))) @@ -12387,13 +12909,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - N1.getOperand(0)), - N1.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, N20), - N21, N0, Flags), Flags); + return DAG.getNode( + PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0)); } @@ -12407,15 +12927,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (isContractableFMUL(N020) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N020.getValueType())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N020.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N020.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, - N1), Flags), Flags); + return DAG.getNode( + PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), + DAG.getNode( + PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1))); } } } @@ -12433,18 +12951,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (isContractableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N002.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N002.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, - N1), Flags), Flags); + return DAG.getNode( + PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), + DAG.getNode( + PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1))); } } } @@ -12460,16 +12975,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N120.getValueType())) { SDValue N1200 = N120.getOperand(0); SDValue N1201 = N120.getOperand(1); - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), - N1.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, - VT, N1200)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N1201), - N0, Flags), Flags); + return DAG.getNode( + PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0)); } } @@ -12490,18 +13002,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { CvtSrc.getValueType())) { SDValue N1020 = N102.getOperand(0); SDValue N1021 = N102.getOperand(1); - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N100)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, - VT, N1020)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N1021), - N0, Flags), Flags); + return DAG.getNode( + PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0)); } } } @@ -12517,7 +13026,6 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); - const SDNodeFlags Flags = N->getFlags(); assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); @@ -12549,56 +13057,56 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y) // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y)) - auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { + auto FuseFADD = [&](SDValue X, SDValue Y) { if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) { if (C->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - Y, Flags); + Y); if (C->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + DAG.getNode(ISD::FNEG, SL, VT, Y)); } } return SDValue(); }; - if (SDValue FMA = FuseFADD(N0, N1, Flags)) + if (SDValue FMA = FuseFADD(N0, N1)) return FMA; - if (SDValue FMA = FuseFADD(N1, N0, Flags)) + if (SDValue FMA = FuseFADD(N1, N0)) return FMA; // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y) // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y)) // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y)) // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y) - auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { + auto FuseFSUB = [&](SDValue X, SDValue Y) { if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) { if (C0->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, - Y, Flags); + Y); if (C0->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + DAG.getNode(ISD::FNEG, SL, VT, Y)); } if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) { if (C1->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + DAG.getNode(ISD::FNEG, SL, VT, Y)); if (C1->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - Y, Flags); + Y); } } return SDValue(); }; - if (SDValue FMA = FuseFSUB(N0, N1, Flags)) + if (SDValue FMA = FuseFSUB(N0, N1)) return FMA; - if (SDValue FMA = FuseFSUB(N1, N0, Flags)) + if (SDValue FMA = FuseFSUB(N1, N0)) return FMA; return SDValue(); @@ -12607,12 +13115,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); - bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); + bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags Flags = N->getFlags(); + SDNodeFlags Flags = N->getFlags(); + SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; @@ -12624,11 +13133,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); + return DAG.getNode(ISD::FADD, DL, VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); + return DAG.getNode(ISD::FADD, DL, VT, N1, N0); // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); @@ -12643,13 +13152,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) if (SDValue NegN1 = TLI.getCheaperNegatedExpression( N1, DAG, LegalOperations, ForCodeSize)) - return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1); // fold (fadd (fneg A), B) -> (fsub B, A) if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) if (SDValue NegN0 = TLI.getCheaperNegatedExpression( N0, DAG, LegalOperations, ForCodeSize)) - return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -12661,14 +13170,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B) if (isFMulNegTwo(N0)) { SDValue B = N0.getOperand(0); - SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); - return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags); + SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B); + return DAG.getNode(ISD::FSUB, DL, VT, N1, Add); } // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B) if (isFMulNegTwo(N1)) { SDValue B = N1.getOperand(0); - SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); - return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags); + SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B); + return DAG.getNode(ISD::FSUB, DL, VT, N0, Add); } // No FP constant should be created after legalization as Instruction @@ -12694,9 +13203,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 if (N1CFP && N0.getOpcode() == ISD::FADD && - isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { - SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags); - return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags); + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC); } // We can fold chains of FADD's of the same value into multiplications. @@ -12704,14 +13213,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { - bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); - bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); + bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), - DAG.getConstantFP(1.0, DL, VT), Flags); - return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) @@ -12719,20 +13228,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), - DAG.getConstantFP(2.0, DL, VT), Flags); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); } } if (N1.getOpcode() == ISD::FMUL) { - bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); - bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); + bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), - DAG.getConstantFP(1.0, DL, VT), Flags); - return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) @@ -12740,28 +13249,28 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), - DAG.getConstantFP(2.0, DL, VT), Flags); - return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); } } if (N0.getOpcode() == ISD::FADD) { - bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { - return DAG.getNode(ISD::FMUL, DL, VT, - N1, DAG.getConstantFP(3.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1, + DAG.getConstantFP(3.0, DL, VT)); } } if (N1.getOpcode() == ISD::FADD) { - bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, DL, VT, - N0, DAG.getConstantFP(3.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(3.0, DL, VT)); } } @@ -12771,7 +13280,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), - DAG.getConstantFP(4.0, DL, VT), Flags); + DAG.getConstantFP(4.0, DL, VT)); } } } // enable-unsafe-fp-math @@ -12784,6 +13293,33 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N0 = N->getOperand(1); + SDValue N1 = N->getOperand(2); + EVT VT = N->getValueType(0); + EVT ChainVT = N->getValueType(1); + SDLoc DL(N); + SelectionDAG::FlagInserter FlagsInserter(DAG, N); + + // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B) + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT)) + if (SDValue NegN1 = TLI.getCheaperNegatedExpression( + N1, DAG, LegalOperations, ForCodeSize)) { + return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT), + {Chain, N0, NegN1}); + } + + // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A) + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT)) + if (SDValue NegN0 = TLI.getCheaperNegatedExpression( + N0, DAG, LegalOperations, ForCodeSize)) { + return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT), + {Chain, N1, NegN0}); + } + return SDValue(); +} + SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12793,6 +13329,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; @@ -12804,7 +13341,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N0, N1); if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -12824,18 +13361,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // (fsub -0.0, N1) -> -N1 - // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the - // FSUB does not specify the sign bit of a NaN. Also note that for - // the same reason, the inverse transform is not safe, unless fast math - // flags are in play. if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) - return NegN1; - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); + // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are + // flushed to zero, unless all users treat denorms as zero (DAZ). + // FIXME: This transform will change the sign of a NaN and the behavior + // of a signaling NaN. It is only valid when a NoNaN flag is present. + DenormalMode DenormMode = DAG.getDenormalMode(VT); + if (DenormMode == DenormalMode::getIEEE()) { + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return NegN1; + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, N1); + } } } @@ -12844,16 +13384,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y if (N0 == N1->getOperand(0)) - return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); + return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1)); // X - (Y + X) -> -Y if (N0 == N1->getOperand(1)) - return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags); + return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0)); } // fold (fsub A, (fneg B)) -> (fadd A, B) if (SDValue NegN1 = TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) - return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags); + return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -12873,6 +13413,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; @@ -12886,35 +13427,28 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); // canonicalize constant to RHS - if (isConstantFPBuildVectorOrConstantFP(N0) && - !isConstantFPBuildVectorOrConstantFP(N1)) - return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); + if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && + !DAG.isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) || - (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { - // fold (fmul A, 0) -> 0 - if (N1CFP && N1CFP->isZero()) - return N1; - } - if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 - if (isConstantFPBuildVectorOrConstantFP(N1) && + if (DAG.isConstantFPBuildVectorOrConstantFP(N1) && N0.getOpcode() == ISD::FMUL) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); // Avoid an infinite loop by making sure that N00 is not a constant // (the inner multiply has not been constant folded yet). - if (isConstantFPBuildVectorOrConstantFP(N01) && - !isConstantFPBuildVectorOrConstantFP(N00)) { - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); - return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); + if (DAG.isConstantFPBuildVectorOrConstantFP(N01) && + !DAG.isConstantFPBuildVectorOrConstantFP(N00)) { + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); } } @@ -12923,14 +13457,14 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && N0.getOperand(0) == N0.getOperand(1)) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); + return DAG.getNode(ISD::FADD, DL, VT, N0, N0); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) @@ -12949,7 +13483,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (NegN0 && NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || CostN1 == TargetLowering::NegatibleCost::Cheaper)) - return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1); // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) @@ -13015,10 +13549,11 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - // FMA nodes have flags that propagate to the created nodes. - const SDNodeFlags Flags = N->getFlags(); - bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N); + SelectionDAG::FlagInserter FlagsInserter(DAG, N); + + bool UnsafeFPMath = + Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); // Constant fold FMA. if (isa<ConstantFPSDNode>(N0) && @@ -13039,7 +13574,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (NegN0 && NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || CostN1 == TargetLowering::NegatibleCost::Cheaper)) - return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2); if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) @@ -13047,51 +13582,45 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP && N1CFP->isZero()) return N2; } - // TODO: The FMA node should have flags that propagate to these nodes. + if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) - if (isConstantFPBuildVectorOrConstantFP(N0) && - !isConstantFPBuildVectorOrConstantFP(N1)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && + !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); if (UnsafeFPMath) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && - isConstantFPBuildVectorOrConstantFP(N1) && - isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { + DAG.isConstantFPBuildVectorOrConstantFP(N1) && + DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), - Flags), Flags); + DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1))); } // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) if (N0.getOpcode() == ISD::FMUL && - isConstantFPBuildVectorOrConstantFP(N1) && - isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { - return DAG.getNode(ISD::FMA, DL, VT, - N0.getOperand(0), - DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), - Flags), + DAG.isConstantFPBuildVectorOrConstantFP(N1) && + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2); } } - // (fma x, 1, y) -> (fadd x, y) // (fma x, -1, y) -> (fadd (fneg x), y) if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) - // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, DL, VT, N0, N2); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); AddToWorklist(RHSNeg.getNode()); - // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); } @@ -13101,25 +13630,23 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) { return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2); + DAG.getNode(ISD::FNEG, DL, VT, N1), N2); } } if (UnsafeFPMath) { // (fma x, c, x) -> (fmul x, (c+1)) if (N1CFP && N0 == N2) { - return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getNode(ISD::FADD, DL, VT, N1, - DAG.getConstantFP(1.0, DL, VT), Flags), - Flags); + return DAG.getNode( + ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT))); } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getNode(ISD::FADD, DL, VT, N1, - DAG.getConstantFP(-1.0, DL, VT), Flags), - Flags); + return DAG.getNode( + ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT))); } } @@ -13128,7 +13655,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (!TLI.isFNegFree(VT)) if (SDValue Neg = TLI.getCheaperNegatedExpression( SDValue(N, 0), DAG, LegalOperations, ForCodeSize)) - return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags); + return DAG.getNode(ISD::FNEG, DL, VT, Neg); return SDValue(); } @@ -13149,14 +13676,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { return SDValue(); // Skip if current node is a reciprocal/fneg-reciprocal. - SDValue N0 = N->getOperand(0); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true); if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0))) return SDValue(); // Exit early if the target does not want this transform or if there can't // possibly be enough uses of the divisor to make the transform worthwhile. - SDValue N1 = N->getOperand(1); unsigned MinUses = TLI.combineRepeatedFPDivisors(); // For splat vectors, scale the number of uses by the splat factor. If we can @@ -13174,6 +13700,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { SetVector<SDNode *> Users; for (auto *U : N1->uses()) { if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { + // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet. + if (U->getOperand(1).getOpcode() == ISD::FSQRT && + U->getOperand(0) == U->getOperand(1).getOperand(0) && + U->getFlags().hasAllowReassociation() && + U->getFlags().hasNoSignedZeros()) + continue; + // This division is eligible for optimization only if global unsafe math // is enabled or if this division allows reciprocal formation. if (UnsafeMath || U->getFlags().hasAllowReciprocal()) @@ -13215,6 +13748,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags Flags = N->getFlags(); + SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; @@ -13226,7 +13760,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -13251,29 +13785,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { TLI.isOperationLegal(ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT, ForCodeSize))) return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getConstantFP(Recip, DL, VT), Flags); + DAG.getConstantFP(Recip, DL, VT)); } // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), - Flags)) { + if (SDValue RV = + buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), - Flags)) { + if (SDValue RV = + buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FMUL) { // Look through an FMUL. Even though this won't remove the FDIV directly, @@ -13288,29 +13822,34 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } if (Sqrt.getNode()) { // If the other multiply operand is known positive, pull it into the - // sqrt. That will eliminate the division if we convert to an estimate: - // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z) - // TODO: Also fold the case where A == Z (fabs is missing). + // sqrt. That will eliminate the division if we convert to an estimate. if (Flags.hasAllowReassociation() && N1.hasOneUse() && - N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() && - Y.getOpcode() == ISD::FABS && Y.hasOneUse()) { - SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0), - Y.getOperand(0), Flags); - SDValue AAZ = - DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags); - if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) - return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags); - - // Estimate creation failed. Clean up speculatively created nodes. - recursivelyDeleteUnusedNodes(AAZ.getNode()); + N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) { + SDValue A; + if (Y.getOpcode() == ISD::FABS && Y.hasOneUse()) + A = Y.getOperand(0); + else if (Y == Sqrt.getOperand(0)) + A = Y; + if (A) { + // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z) + // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A) + SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A); + SDValue AAZ = + DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0)); + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt); + + // Estimate creation failed. Clean up speculatively created nodes. + recursivelyDeleteUnusedNodes(AAZ.getNode()); + } } // We found a FSQRT, so try to make this fold: // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { - SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags); + SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y); AddToWorklist(Div.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, Div); } } } @@ -13321,6 +13860,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { return RV; } + // Fold X/Sqrt(X) -> Sqrt(X) + if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && + (Options.UnsafeFPMath || Flags.hasAllowReassociation())) + if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0)) + return N1; + // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) TargetLowering::NegatibleCost CostN0 = TargetLowering::NegatibleCost::Expensive; @@ -13333,7 +13878,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (NegN0 && NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || CostN1 == TargetLowering::NegatibleCost::Cheaper)) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1); return SDValue(); } @@ -13345,13 +13890,14 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); SDNodeFlags Flags = N->getFlags(); + SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags()); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -13365,7 +13911,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as: // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN - if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) || + if (!Flags.hasApproximateFuncs() || (!Options.NoInfsFPMath && !Flags.hasNoInfs())) return SDValue(); @@ -13374,6 +13920,10 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { return SDValue(); // FSQRT nodes have flags that propagate to the created nodes. + // TODO: If this is N0/sqrt(N0), and we reach this node before trying to + // transform the fdiv, we may produce a sub-optimal estimate sequence + // because the reciprocal calculation may not have to filter out a + // 0.0 input. return buildSqrtEstimate(N0, Flags); } @@ -13397,8 +13947,8 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); - bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); + bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); if (N0CFP && N1CFP) // Constant fold @@ -13445,6 +13995,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1)); if (!ExponentC) return SDValue(); + SelectionDAG::FlagInserter FlagsInserter(DAG, N); // Try to convert x ** (1/3) into cube root. // TODO: Handle the various flavors of long double. @@ -13471,7 +14022,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT))) return SDValue(); - return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags); + return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0)); } // Try to convert x ** (1/4) and x ** (3/4) into square roots. @@ -13506,12 +14057,12 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { // pow(X, 0.25) --> sqrt(sqrt(X)) SDLoc DL(N); - SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags); - SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags); + SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0)); + SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt); if (ExponentIs025) return SqrtSqrt; // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X)) - return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt); } return SDValue(); @@ -13694,7 +14245,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { return DAG.getUNDEF(VT); // fold (fp_to_sint c1fp) -> c1 - if (isConstantFPBuildVectorOrConstantFP(N0)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); return FoldIntToFPToInt(N, DAG); @@ -13709,7 +14260,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { return DAG.getUNDEF(VT); // fold (fp_to_uint c1fp) -> c1 - if (isConstantFPBuildVectorOrConstantFP(N0)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); return FoldIntToFPToInt(N, DAG); @@ -13781,7 +14332,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (isConstantFPBuildVectorOrConstantFP(N0)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) @@ -13829,7 +14380,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (isConstantFPBuildVectorOrConstantFP(N0)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); @@ -13840,7 +14391,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (isConstantFPBuildVectorOrConstantFP(N0)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); // fold ftrunc (known rounded int x) -> x @@ -13864,19 +14415,19 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (isConstantFPBuildVectorOrConstantFP(N0)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); } -// FIXME: FNEG and FABS have a lot in common; refactor. SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SelectionDAG::FlagInserter FlagsInserter(DAG, N); // Constant fold FNEG. - if (isConstantFPBuildVectorOrConstantFP(N0)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); if (SDValue NegN0 = @@ -13891,51 +14442,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { (DAG.getTarget().Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) { return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1), - N0.getOperand(0), N->getFlags()); - } - - // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading - // constant pool values. - if (!TLI.isFNegFree(VT) && - N0.getOpcode() == ISD::BITCAST && - N0.getNode()->hasOneUse()) { - SDValue Int = N0.getOperand(0); - EVT IntVT = Int.getValueType(); - if (IntVT.isInteger() && !IntVT.isVector()) { - APInt SignMask; - if (N0.getValueType().isVector()) { - // For a vector, get a mask such as 0x80... per scalar element - // and splat it. - SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); - SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); - } else { - // For a scalar, just generate 0x80... - SignMask = APInt::getSignMask(IntVT.getSizeInBits()); - } - SDLoc DL0(N0); - Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, - DAG.getConstant(SignMask, DL0, IntVT)); - AddToWorklist(Int.getNode()); - return DAG.getBitcast(VT, Int); - } - } - - // (fneg (fmul c, x)) -> (fmul -c, x) - if (N0.getOpcode() == ISD::FMUL && - (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { - ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - if (CFP1) { - APFloat CVal = CFP1->getValueAPF(); - CVal.changeSign(); - if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) || - TLI.isOperationLegal(ISD::ConstantFP, VT))) - return DAG.getNode( - ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), - N0->getFlags()); - } + N0.getOperand(0)); } + if (SDValue Cast = foldSignChangeInBitcast(N)) + return Cast; + return SDValue(); } @@ -13946,6 +14458,11 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, EVT VT = N->getValueType(0); const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + const SDNodeFlags Flags = N->getFlags(); + unsigned Opc = N->getOpcode(); + bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; + bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM; + SelectionDAG::FlagInserter FlagsInserter(DAG, N); if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); @@ -13954,10 +14471,39 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, } // Canonicalize to constant on RHS. - if (isConstantFPBuildVectorOrConstantFP(N0) && - !isConstantFPBuildVectorOrConstantFP(N1)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && + !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + if (N1CFP) { + const APFloat &AF = N1CFP->getValueAPF(); + + // minnum(X, nan) -> X + // maxnum(X, nan) -> X + // minimum(X, nan) -> nan + // maximum(X, nan) -> nan + if (AF.isNaN()) + return PropagatesNaN ? N->getOperand(1) : N->getOperand(0); + + // In the following folds, inf can be replaced with the largest finite + // float, if the ninf flag is set. + if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) { + // minnum(X, -inf) -> -inf + // maxnum(X, +inf) -> +inf + // minimum(X, -inf) -> -inf if nnan + // maximum(X, +inf) -> +inf if nnan + if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs())) + return N->getOperand(1); + + // minnum(X, +inf) -> X if nnan + // maxnum(X, -inf) -> X if nnan + // minimum(X, +inf) -> X + // maximum(X, -inf) -> X + if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs())) + return N->getOperand(0); + } + } + return SDValue(); } @@ -13982,7 +14528,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { EVT VT = N->getValueType(0); // fold (fabs c1) -> fabs(c1) - if (isConstantFPBuildVectorOrConstantFP(N0)) + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) @@ -13994,28 +14540,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); - // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads. - if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) { - SDValue Int = N0.getOperand(0); - EVT IntVT = Int.getValueType(); - if (IntVT.isInteger() && !IntVT.isVector()) { - APInt SignMask; - if (N0.getValueType().isVector()) { - // For a vector, get a mask such as 0x7f... per scalar element - // and splat it. - SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits()); - SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); - } else { - // For a scalar, just generate 0x7f... - SignMask = ~APInt::getSignMask(IntVT.getSizeInBits()); - } - SDLoc DL(N0); - Int = DAG.getNode(ISD::AND, DL, IntVT, Int, - DAG.getConstant(SignMask, DL, IntVT)); - AddToWorklist(Int.getNode()); - return DAG.getBitcast(N->getValueType(0), Int); - } - } + if (SDValue Cast = foldSignChangeInBitcast(N)) + return Cast; return SDValue(); } @@ -14025,6 +14551,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are + // nondeterministic jumps). + if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) { + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, + N1->getOperand(0), N2); + } + // If N is a constant we could fold this into a fallthrough or unconditional // branch. However that doesn't happen very often in normal code, because // Instcombine/SimplifyCFG should have handled the available opportunities. @@ -14178,63 +14711,6 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { return SDValue(); } -/// Return true if 'Use' is a load or a store that uses N as its base pointer -/// and that N may be folded in the load / store addressing mode. -static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, - SelectionDAG &DAG, - const TargetLowering &TLI) { - EVT VT; - unsigned AS; - - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { - if (LD->isIndexed() || LD->getBasePtr().getNode() != N) - return false; - VT = LD->getMemoryVT(); - AS = LD->getAddressSpace(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { - if (ST->isIndexed() || ST->getBasePtr().getNode() != N) - return false; - VT = ST->getMemoryVT(); - AS = ST->getAddressSpace(); - } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) { - if (LD->isIndexed() || LD->getBasePtr().getNode() != N) - return false; - VT = LD->getMemoryVT(); - AS = LD->getAddressSpace(); - } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) { - if (ST->isIndexed() || ST->getBasePtr().getNode() != N) - return false; - VT = ST->getMemoryVT(); - AS = ST->getAddressSpace(); - } else - return false; - - TargetLowering::AddrMode AM; - if (N->getOpcode() == ISD::ADD) { - AM.HasBaseReg = true; - ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (Offset) - // [reg +/- imm] - AM.BaseOffs = Offset->getSExtValue(); - else - // [reg +/- reg] - AM.Scale = 1; - } else if (N->getOpcode() == ISD::SUB) { - AM.HasBaseReg = true; - ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (Offset) - // [reg +/- imm] - AM.BaseOffs = -Offset->getSExtValue(); - else - // [reg +/- reg] - AM.Scale = 1; - } else - return false; - - return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, - VT.getTypeForEVT(*DAG.getContext()), AS); -} - static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI) { @@ -14463,16 +14939,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Therefore, we have: // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 - ConstantSDNode *CN = - cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); - int X0, X1, Y0, Y1; + auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); const APInt &Offset0 = CN->getAPIntValue(); - APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); - - X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; - Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; - X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; - Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; + const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); + int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; + int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; + int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; + int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; @@ -14664,8 +15137,8 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); } -static inline int numVectorEltsOrZero(EVT T) { - return T.isVector() ? T.getVectorNumElements() : 0; +static inline ElementCount numVectorEltsOrZero(EVT T) { + return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0); } bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { @@ -14733,6 +15206,24 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { EVT STMemType = ST->getMemoryVT(); EVT STType = ST->getValue().getValueType(); + // There are two cases to consider here: + // 1. The store is fixed width and the load is scalable. In this case we + // don't know at compile time if the store completely envelops the load + // so we abandon the optimisation. + // 2. The store is scalable and the load is fixed width. We could + // potentially support a limited number of cases here, but there has been + // no cost-benefit analysis to prove it's worth it. + bool LdStScalable = LDMemType.isScalableVector(); + if (LdStScalable != STMemType.isScalableVector()) + return SDValue(); + + // If we are dealing with scalable vectors on a big endian platform the + // calculation of offsets below becomes trickier, since we do not know at + // compile time the absolute size of the vector. Until we've done more + // analysis on big-endian platforms it seems better to bail out for now. + if (LdStScalable && DAG.getDataLayout().isBigEndian()) + return SDValue(); + BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG); BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG); int64_t Offset; @@ -14744,13 +15235,21 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // the stored value). With Offset=n (for n > 0) the loaded value starts at the // n:th least significant byte of the stored value. if (DAG.getDataLayout().isBigEndian()) - Offset = ((int64_t)STMemType.getStoreSizeInBits() - - (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset; + Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() - + (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) / + 8 - + Offset; // Check that the stored value cover all bits that are loaded. - bool STCoversLD = - (Offset >= 0) && - (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits()); + bool STCoversLD; + + TypeSize LdMemSize = LDMemType.getSizeInBits(); + TypeSize StMemSize = STMemType.getSizeInBits(); + if (LdStScalable) + STCoversLD = (Offset == 0) && LdMemSize == StMemSize; + else + STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <= + StMemSize.getFixedSize()); auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { if (LD->isIndexed()) { @@ -14771,15 +15270,15 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // Memory as copy space (potentially masked). if (Offset == 0 && LDType == STType && STMemType == LDMemType) { // Simple case: Direct non-truncating forwarding - if (LDType.getSizeInBits() == LDMemType.getSizeInBits()) + if (LDType.getSizeInBits() == LdMemSize) return ReplaceLd(LD, ST->getValue(), Chain); // Can we model the truncate and extension with an and mask? if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() && !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) { // Mask to size of LDMemType auto Mask = - DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(), - STMemType.getSizeInBits()), + DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(), + StMemSize.getFixedSize()), SDLoc(ST), STType); auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); return ReplaceLd(LD, Val, Chain); @@ -15602,8 +16101,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // Figure out the offset for the store and the alignment of the access. unsigned StOffset; - unsigned NewAlign = St->getAlignment(); - if (DAG.getDataLayout().isLittleEndian()) StOffset = ByteShift; else @@ -15612,8 +16109,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { SDLoc DL(IVal); - Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL); - NewAlign = MinAlign(NewAlign, StOffset); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL); } // Truncate down to the new size. @@ -15622,7 +16118,8 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), NewAlign); + St->getPointerInfo().getWithOffset(StOffset), + St->getOriginalAlign()); } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and @@ -15726,7 +16223,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy)) return SDValue(); - SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD)); + SDValue NewPtr = + DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD)); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, @@ -16034,9 +16532,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; if (!UseTrunc) { - NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - FirstInChain->getAlignment()); + NewStore = + DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstInChain->getAlign()); } else { // Must be realized as a trunc store EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); @@ -16048,8 +16546,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, - FirstInChain->getAlignment(), - FirstInChain->getMemOperand()->getFlags()); + FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); } // Replace all merged stores with the new store. @@ -16064,23 +16561,19 @@ void DAGCombiner::getStoreMergeCandidates( StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, SDNode *&RootNode) { // This holds the base pointer, index, and the offset in bytes from the base - // pointer. + // pointer. We must have a base and an offset. Do not handle stores to undef + // base pointers. BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); - EVT MemVT = St->getMemoryVT(); - - SDValue Val = peekThroughBitcasts(St->getValue()); - // We must have a base and an offset. - if (!BasePtr.getBase().getNode()) - return; - - // Do not handle stores to undef base pointers. - if (BasePtr.getBase().isUndef()) + if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef()) return; + SDValue Val = peekThroughBitcasts(St->getValue()); StoreSource StoreSrc = getStoreSource(Val); assert(StoreSrc != StoreSource::Unknown && "Expected known source for store"); - BaseIndexOffset LBasePtr; + // Match on loadbaseptr if relevant. + EVT MemVT = St->getMemoryVT(); + BaseIndexOffset LBasePtr; EVT LoadVT; if (StoreSrc == StoreSource::Load) { auto *Ld = cast<LoadSDNode>(Val); @@ -16101,7 +16594,7 @@ void DAGCombiner::getStoreMergeCandidates( int64_t &Offset) -> bool { // The memory operands must not be volatile/indexed/atomic. // TODO: May be able to relax for unordered atomics (see D66309) - if (!Other->isSimple() || Other->isIndexed()) + if (!Other->isSimple() || Other->isIndexed()) return false; // Don't mix temporal stores with non-temporal stores. if (St->isNonTemporal() != Other->isNonTemporal()) @@ -16110,37 +16603,38 @@ void DAGCombiner::getStoreMergeCandidates( // Allow merging constants of different types as integers. bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) : Other->getMemoryVT() != MemVT; - if (StoreSrc == StoreSource::Load) { + switch (StoreSrc) { + case StoreSource::Load: { if (NoTypeMatch) return false; - // The Load's Base Ptr must also match - if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) { - BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG); - if (LoadVT != OtherLd->getMemoryVT()) - return false; - // Loads must only have one use. - if (!OtherLd->hasNUsesOfValue(1, 0)) - return false; - // The memory operands must not be volatile/indexed/atomic. - // TODO: May be able to relax for unordered atomics (see D66309) - if (!OtherLd->isSimple() || - OtherLd->isIndexed()) - return false; - // Don't mix temporal loads with non-temporal loads. - if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal()) - return false; - if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) - return false; - } else + // The Load's Base Ptr must also match. + auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC); + if (!OtherLd) + return false; + BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG); + if (LoadVT != OtherLd->getMemoryVT()) + return false; + // Loads must only have one use. + if (!OtherLd->hasNUsesOfValue(1, 0)) + return false; + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!OtherLd->isSimple() || OtherLd->isIndexed()) return false; + // Don't mix temporal loads with non-temporal loads. + if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal()) + return false; + if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) + return false; + break; } - if (StoreSrc == StoreSource::Constant) { + case StoreSource::Constant: if (NoTypeMatch) return false; if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC))) return false; - } - if (StoreSrc == StoreSource::Extract) { + break; + case StoreSource::Extract: // Do not merge truncated stores here. if (Other->isTruncatingStore()) return false; @@ -16149,6 +16643,9 @@ void DAGCombiner::getStoreMergeCandidates( if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT && OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; + break; + default: + llvm_unreachable("Unhandled store source for merging"); } Ptr = BaseIndexOffset::match(Other, DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); @@ -16159,11 +16656,22 @@ void DAGCombiner::getStoreMergeCandidates( auto OverLimitInDependenceCheck = [&](SDNode *StoreNode, SDNode *RootNode) -> bool { auto RootCount = StoreRootCountMap.find(StoreNode); - if (RootCount != StoreRootCountMap.end() && - RootCount->second.first == RootNode && - RootCount->second.second > StoreMergeDependenceLimit) - return true; - return false; + return RootCount != StoreRootCountMap.end() && + RootCount->second.first == RootNode && + RootCount->second.second > StoreMergeDependenceLimit; + }; + + auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) { + // This must be a chain use. + if (UseIter.getOperandNo() != 0) + return; + if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) { + BaseIndexOffset Ptr; + int64_t PtrDiff; + if (CandidateMatch(OtherStore, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherStore, RootNode)) + StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff)); + } }; // We looking for a root node which is an ancestor to all mergable @@ -16185,31 +16693,21 @@ void DAGCombiner::getStoreMergeCandidates( RootNode = St->getChain().getNode(); unsigned NumNodesExplored = 0; - if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) { + const unsigned MaxSearchNodes = 1024; + if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) { RootNode = Ldn->getChain().getNode(); for (auto I = RootNode->use_begin(), E = RootNode->use_end(); - I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored) - if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain + I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) { + if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) - if (I2.getOperandNo() == 0) - if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { - BaseIndexOffset Ptr; - int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff) && - !OverLimitInDependenceCheck(OtherST, RootNode)) - StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); - } - } else + TryToAddCandidate(I2); + } + } + } else { for (auto I = RootNode->use_begin(), E = RootNode->use_end(); - I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored) - if (I.getOperandNo() == 0) - if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { - BaseIndexOffset Ptr; - int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff) && - !OverLimitInDependenceCheck(OtherST, RootNode)) - StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); - } + I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) + TryToAddCandidate(I); + } } // We need to check that merging these stores does not cause a loop in @@ -16579,7 +17077,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, } LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); + Align FirstStoreAlign = FirstInChain->getAlign(); LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); // Scan the memory operations on the chain and find the first @@ -16674,7 +17172,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, // the NumElem refers to array/index size. unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); NumElem = std::min(LastLegalType, NumElem); - unsigned FirstLoadAlign = FirstLoad->getAlignment(); + Align FirstLoadAlign = FirstLoad->getAlign(); if (NumElem < 2) { // We know that candidate stores are in order and of correct @@ -16686,8 +17184,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, // can here. unsigned NumSkip = 1; while ((NumSkip < LoadNodes.size()) && - (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign)) NumSkip++; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); @@ -16760,11 +17258,10 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), JointMemOpVT, FirstLoadAlign, LdMMOFlags); - NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), JointMemOpVT, - FirstInChain->getAlignment(), - FirstInChain->getMemOperand()->getFlags()); + NewStore = DAG.getTruncStore( + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), JointMemOpVT, + FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); } // Transfer chain users from old loads to the new load. @@ -16966,17 +17463,15 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), - ST->getAlignment(), MMOFlags, AAInfo); - Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL); - Alignment = MinAlign(Alignment, 4U); + ST->getOriginalAlign(), MMOFlags, AAInfo); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL); SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } @@ -17037,7 +17532,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return NewST; // Try transforming several stores into STORE (BSWAP). - if (SDValue Store = MatchStoreCombine(ST)) + if (SDValue Store = mergeTruncStores(ST)) return Store; if (ST->isUnindexed()) { @@ -17110,11 +17605,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { !ST1->getBasePtr().isUndef() && // BaseIndexOffset and the code below requires knowing the size // of a vector, so bail out if MemoryVT is scalable. + !ST->getMemoryVT().isScalableVector() && !ST1->getMemoryVT().isScalableVector()) { const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); - unsigned STBitSize = ST->getMemoryVT().getSizeInBits(); - unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits(); + unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits(); + unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits(); // If this is a store who's preceding store to a subset of the current // location and no one other node is chained to that store we can // effectively drop the store. Do not remove stores to undef as they may @@ -17185,8 +17681,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { // We walk up the chains to find stores. SmallVector<SDValue, 8> Chains = {N->getOperand(0)}; while (!Chains.empty()) { - SDValue Chain = Chains.back(); - Chains.pop_back(); + SDValue Chain = Chains.pop_back_val(); if (!Chain.hasOneUse()) continue; switch (Chain.getOpcode()) { @@ -17206,11 +17701,16 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { // TODO: Can relax for unordered atomics (see D66309) if (!ST->isSimple() || ST->isIndexed()) continue; + const TypeSize StoreSize = ST->getMemoryVT().getStoreSize(); + // The bounds of a scalable store are not known until runtime, so this + // store cannot be elided. + if (StoreSize.isScalable()) + continue; const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, // we can remove the store. if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase, - ST->getMemoryVT().getStoreSizeInBits())) { + StoreSize.getFixedSize() * 8)) { LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase.dump(); dbgs() << "\n"); @@ -17309,7 +17809,6 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { return SDValue(); // Start to split store. - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); @@ -17322,13 +17821,12 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { SDValue Ptr = ST->getBasePtr(); // Lower value store. SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), - ST->getAlignment(), MMOFlags, AAInfo); - Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL); + ST->getOriginalAlign(), MMOFlags, AAInfo); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL); // Higher value store. - SDValue St1 = - DAG.getStore(St0, DL, Hi, Ptr, - ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), - Alignment / 2, MMOFlags, AAInfo); + SDValue St1 = DAG.getStore( + St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), + ST->getOriginalAlign(), MMOFlags, AAInfo); return St1; } @@ -17566,6 +18064,13 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); + + // If the vector element type is not a multiple of a byte then we are unable + // to correctly compute an address to load only the extracted element as a + // scalar. + if (!VecEltVT.isByteSized()) + return SDValue(); + Align Alignment = OriginalLoad->getAlign(); Align NewAlign = DAG.getDataLayout().getABITypeAlign( VecEltVT.getTypeForEVT(*DAG.getContext())); @@ -18201,20 +18706,24 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, // operands will all be based off of VecIn1, even those in VecIn2. unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements(); + uint64_t VTSize = VT.getFixedSizeInBits(); + uint64_t InVT1Size = InVT1.getFixedSizeInBits(); + uint64_t InVT2Size = InVT2.getFixedSizeInBits(); + // We can't generate a shuffle node with mismatched input and output types. // Try to make the types match the type of the output. if (InVT1 != VT || InVT2 != VT) { - if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) { + if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) { // If the output vector length is a multiple of both input lengths, // we can concatenate them and pad the rest with undefs. - unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits(); + unsigned NumConcats = VTSize / InVT1Size; assert(NumConcats >= 2 && "Concat needs at least two inputs!"); SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); ConcatOps[0] = VecIn1; ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1); VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); VecIn2 = SDValue(); - } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { + } else if (InVT1Size == VTSize * 2) { if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems)) return SDValue(); @@ -18227,7 +18736,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, // Since we now have shorter input vectors, adjust the offset of the // second vector's start. Vec2Offset = NumElems; - } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) { + } else if (InVT2Size <= InVT1Size) { // VecIn1 is wider than the output, and we have another, possibly // smaller input. Pad the smaller input with undefs, shuffle at the // input vector width, and extract the output. @@ -18252,8 +18761,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, // when we start sorting the vectors by type. return SDValue(); } - } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && - InVT1.getSizeInBits() == VT.getSizeInBits()) { + } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) { SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); ConcatOps[0] = VecIn2; VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); @@ -18444,8 +18952,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { // Have we seen this input vector before? // The vectors are expected to be tiny (usually 1 or 2 elements), so using // a map back from SDValues to numbers isn't worth it. - unsigned Idx = std::distance( - VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec)); + unsigned Idx = std::distance(VecIn.begin(), find(VecIn, ExtractedFromVec)); if (Idx == VecIn.size()) VecIn.push_back(ExtractedFromVec); @@ -18795,6 +19302,11 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); EVT OpVT = N->getOperand(0).getValueType(); + + // We currently can't generate an appropriate shuffle for a scalable vector. + if (VT.isScalableVector()) + return SDValue(); + int NumElts = VT.getVectorNumElements(); int NumOpElts = OpVT.getVectorNumElements(); @@ -18898,7 +19410,7 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) { // check the other type in the cast to make sure this is really legal. EVT VT = N->getValueType(0); EVT SrcEltVT = SrcVT.getVectorElementType(); - unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands(); + ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands(); EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); switch (CastOpcode) { @@ -18935,9 +19447,8 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return DAG.getUNDEF(VT); // Optimize concat_vectors where all but the first of the vectors are undef. - if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { - return Op.isUndef(); - })) { + if (all_of(drop_begin(N->ops()), + [](const SDValue &Op) { return Op.isUndef(); })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); @@ -19055,11 +19566,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return V; // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR - // nodes often generate nop CONCAT_VECTOR nodes. - // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that - // place the incoming vectors at the exact same location. + // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR + // operands and look for a CONCAT operations that place the incoming vectors + // at the exact same location. + // + // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled. SDValue SingleSource = SDValue(); - unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); + unsigned PartNumElem = + N->getOperand(0).getValueType().getVectorMinNumElements(); for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Op = N->getOperand(i); @@ -19107,15 +19621,16 @@ static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { auto *IndexC = dyn_cast<ConstantSDNode>(Index); if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && V.getOperand(0).getValueType() == SubVT && - (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) { - uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements(); + (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) { + uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements(); return V.getOperand(SubIdx); } return SDValue(); } static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, - SelectionDAG &DAG) { + SelectionDAG &DAG, + bool LegalOperations) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = Extract->getOperand(0); unsigned BinOpcode = BinOp.getOpcode(); @@ -19129,7 +19644,7 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SDValue Index = Extract->getOperand(1); EVT SubVT = Extract->getValueType(0); - if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT)) + if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations)) return SDValue(); SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT); @@ -19150,11 +19665,12 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, /// If we are extracting a subvector produced by a wide binary operator try /// to use a narrow binary operator and/or avoid concatenation and extraction. -static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { +static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, + bool LegalOperations) { // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share // some of these bailouts with other transforms. - if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG)) + if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations)) return V; // The extract index must be a constant, so we can map it to a concat operand. @@ -19181,7 +19697,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // The binop must be a vector type, so we can extract some fraction of it. EVT WideBVT = BinOp.getValueType(); - if (!WideBVT.isVector()) + // The optimisations below currently assume we are dealing with fixed length + // vectors. It is possible to add support for scalable vectors, but at the + // moment we've done no analysis to prove whether they are profitable or not. + if (!WideBVT.isFixedLengthVector()) return SDValue(); EVT VT = Extract->getValueType(0); @@ -19296,19 +19815,15 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { return SDValue(); unsigned Index = ExtIdx->getZExtValue(); - unsigned NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.getVectorMinNumElements(); - // If the index is a multiple of the extract element count, we can offset the - // address by the store size multiplied by the subvector index. Otherwise if - // the scalar type is byte sized, we can just use the index multiplied by - // the element size in bytes as the offset. - unsigned Offset; - if (Index % NumElts == 0) - Offset = (Index / NumElts) * VT.getStoreSize(); - else if (VT.getScalarType().isByteSized()) - Offset = Index * VT.getScalarType().getStoreSize(); - else - return SDValue(); + // The definition of EXTRACT_SUBVECTOR states that the index must be a + // multiple of the minimum number of elements in the result type. + assert(Index % NumElts == 0 && "The extract subvector index is not a " + "multiple of the result's element count"); + + // It's fine to use TypeSize here as we know the offset will not be negative. + TypeSize Offset = VT.getStoreSize() * (Index / NumElts); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) @@ -19317,13 +19832,21 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { // The narrow load will be offset from the base address of the old load if // we are extracting from something besides index 0 (little-endian). SDLoc DL(Extract); - SDValue BaseAddr = Ld->getBasePtr(); // TODO: Use "BaseIndexOffset" to make this more effective. - SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL); + + uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize()); MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, - VT.getStoreSize()); + MachineMemOperand *MMO; + if (Offset.isScalable()) { + MachinePointerInfo MPI = + MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()); + MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize); + } else + MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(), + StoreSize); + SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); DAG.makeEquivalentMemoryOrdering(Ld, NewLd); return NewLd; @@ -19376,8 +19899,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { } if ((DestNumElts % SrcNumElts) == 0) { unsigned DestSrcRatio = DestNumElts / SrcNumElts; - if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) { - ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio; + if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) { + ElementCount NewExtEC = + NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio); EVT ScalarVT = SrcVT.getScalarType(); if ((ExtIdx % DestSrcRatio) == 0) { SDLoc DL(N); @@ -19391,7 +19915,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { V.getOperand(0), NewIndex); return DAG.getBitcast(NVT, NewExtract); } - if (NewExtEC == 1 && + if (NewExtEC.isScalar() && TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) { SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL); SDValue NewExtract = @@ -19496,7 +20020,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { N->getOperand(1)); } - if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) + if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations)) return NarrowBOp; if (SimplifyDemandedVectorElts(SDValue(N, 0))) @@ -20274,52 +20798,52 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } - // Canonicalize shuffles according to rules: - // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) - // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) - // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) - if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && - N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - TLI.isTypeLegal(VT)) { - // The incoming shuffle must be of the same type as the result of the - // current shuffle. - assert(N1->getOperand(0).getValueType() == VT && - "Shuffle types don't match"); - - SDValue SV0 = N1->getOperand(0); - SDValue SV1 = N1->getOperand(1); - bool HasSameOp0 = N0 == SV0; - bool IsSV1Undef = SV1.isUndef(); - if (HasSameOp0 || IsSV1Undef || N0 == SV1) - // Commute the operands of this shuffle so that next rule - // will trigger. + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { + // Canonicalize shuffles according to rules: + // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) + // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) + // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && + N0.getOpcode() != ISD::VECTOR_SHUFFLE) { + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(N1->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = N1->getOperand(0); + SDValue SV1 = N1->getOperand(1); + bool HasSameOp0 = N0 == SV0; + bool IsSV1Undef = SV1.isUndef(); + if (HasSameOp0 || IsSV1Undef || N0 == SV1) + // Commute the operands of this shuffle so merging below will trigger. + return DAG.getCommutedVectorShuffle(*SVN); + } + + // Canonicalize splat shuffles to the RHS to improve merging below. + // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u)) + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && + N1.getOpcode() == ISD::VECTOR_SHUFFLE && + cast<ShuffleVectorSDNode>(N0)->isSplat() && + !cast<ShuffleVectorSDNode>(N1)->isSplat()) { return DAG.getCommutedVectorShuffle(*SVN); + } } - // Try to fold according to rules: - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - // Don't try to fold shuffles with illegal type. - // Only fold if this shuffle is the only user of the other shuffle. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && - Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { - ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - + // Compute the combined shuffle mask for a shuffle with SV0 as the first + // operand, and SV1 as the second operand. + // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask). + auto MergeInnerShuffle = [NumElts](ShuffleVectorSDNode *SVN, + ShuffleVectorSDNode *OtherSVN, SDValue N1, + SDValue &SV0, SDValue &SV1, + SmallVectorImpl<int> &Mask) -> bool { // Don't try to fold splats; they're likely to simplify somehow, or they // might be free. - if (OtherSV->isSplat()) - return SDValue(); + if (OtherSVN->isSplat()) + return false; - // The incoming shuffle must be of the same type as the result of the - // current shuffle. - assert(OtherSV->getOperand(0).getValueType() == VT && - "Shuffle types don't match"); + SV0 = SV1 = SDValue(); + Mask.clear(); - SDValue SV0, SV1; - SmallVector<int, 4> Mask; - // Compute the combined shuffle mask for a shuffle with SV0 as the first - // operand, and SV1 as the second operand. for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx < 0) { @@ -20332,15 +20856,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (Idx < (int)NumElts) { // This shuffle index refers to the inner shuffle N0. Lookup the inner // shuffle mask to identify which vector is actually referenced. - Idx = OtherSV->getMaskElt(Idx); + Idx = OtherSVN->getMaskElt(Idx); if (Idx < 0) { // Propagate Undef. Mask.push_back(Idx); continue; } - - CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) - : OtherSV->getOperand(1); + CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0) + : OtherSVN->getOperand(1); } else { // This shuffle index references an element within N1. CurrentVec = N1; @@ -20362,38 +20885,82 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { Mask.push_back(Idx); continue; } + if (!SV1.getNode() || SV1 == CurrentVec) { + // Ok. CurrentVec is the right hand side. + // Update the mask accordingly. + SV1 = CurrentVec; + Mask.push_back(Idx + NumElts); + continue; + } - // Bail out if we cannot convert the shuffle pair into a single shuffle. - if (SV1.getNode() && SV1 != CurrentVec) - return SDValue(); + // Last chance - see if the vector is another shuffle and if it + // uses one of the existing candidate shuffle ops. + if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) { + int InnerIdx = CurrentSVN->getMaskElt(Idx); + if (InnerIdx < 0) { + Mask.push_back(-1); + continue; + } + SDValue InnerVec = (InnerIdx < (int)NumElts) + ? CurrentSVN->getOperand(0) + : CurrentSVN->getOperand(1); + if (InnerVec.isUndef()) { + Mask.push_back(-1); + continue; + } + InnerIdx %= NumElts; + if (InnerVec == SV0) { + Mask.push_back(InnerIdx); + continue; + } + if (InnerVec == SV1) { + Mask.push_back(InnerIdx + NumElts); + continue; + } + } - // Ok. CurrentVec is the right hand side. - // Update the mask accordingly. - SV1 = CurrentVec; - Mask.push_back(Idx + NumElts); + // Bail out if we cannot convert the shuffle pair into a single shuffle. + return false; } + return true; + }; - // Check if all indices in Mask are Undef. In case, propagate Undef. - bool isUndefMask = true; - for (unsigned i = 0; i != NumElts && isUndefMask; ++i) - isUndefMask &= Mask[i] < 0; + // Try to fold according to rules: + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + // Don't try to fold shuffles with illegal type. + // Only fold if this shuffle is the only user of the other shuffle. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && + Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - if (isUndefMask) - return DAG.getUNDEF(VT); + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0, SV1; + SmallVector<int, 4> Mask; + if (MergeInnerShuffle(SVN, OtherSV, N1, SV0, SV1, Mask)) { + // Check if all indices in Mask are Undef. In case, propagate Undef. + if (llvm::all_of(Mask, [](int M) { return M < 0; })) + return DAG.getUNDEF(VT); - if (!SV0.getNode()) - SV0 = DAG.getUNDEF(VT); - if (!SV1.getNode()) - SV1 = DAG.getUNDEF(VT); + if (!SV0.getNode()) + SV0 = DAG.getUNDEF(VT); + if (!SV1.getNode()) + SV1 = DAG.getUNDEF(VT); - // Avoid introducing shuffles with illegal mask. - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) - return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG); + // Avoid introducing shuffles with illegal mask. + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG); + } } if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) @@ -20478,8 +21045,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0).getOperand(1) == N2 && - N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == - VT.getVectorNumElements() && + N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() == + VT.getVectorElementCount() && N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() == VT.getSizeInBits()) { return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); @@ -20496,7 +21063,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { EVT CN1VT = CN1.getValueType(); if (CN0VT.isVector() && CN1VT.isVector() && CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && - CN0VT.getVectorNumElements() == VT.getVectorNumElements()) { + CN0VT.getVectorElementCount() == VT.getVectorElementCount()) { SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), CN0.getValueType(), CN0, CN1, N2); return DAG.getBitcast(VT, NewINSERT); @@ -20535,7 +21102,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDLoc DL(N); SDValue NewIdx; LLVMContext &Ctx = *DAG.getContext(); - unsigned NumElts = VT.getVectorNumElements(); + ElementCount NumElts = VT.getVectorElementCount(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) { unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits(); @@ -20543,8 +21110,9 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL); } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) { unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits; - if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) { - NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale); + if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) { + NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, + NumElts.divideCoefficientBy(Scale)); NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL); } } @@ -20576,8 +21144,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { // If the input vector is a concatenation, and the insert replaces // one of the pieces, we can optimize into a single concat_vectors. if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && - N0.getOperand(0).getValueType() == N1.getValueType()) { - unsigned Factor = N1.getValueType().getVectorNumElements(); + N0.getOperand(0).getValueType() == N1.getValueType() && + N0.getOperand(0).getValueType().isScalableVector() == + N1.getValueType().isScalableVector()) { + unsigned Factor = N1.getValueType().getVectorMinNumElements(); SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end()); Ops[InsIdx / Factor] = N1; return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); @@ -20621,7 +21191,7 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { unsigned Opcode = N->getOpcode(); // VECREDUCE over 1-element vector is just an extract. - if (VT.getVectorNumElements() == 1) { + if (VT.getVectorElementCount().isScalar()) { SDLoc dl(N); SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0, @@ -20860,7 +21430,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue Z = LHS.getOperand(2); EVT NarrowVT = X.getValueType(); if (NarrowVT == Y.getValueType() && - TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT, + LegalOperations)) { // (binop undef, undef) may not return undef, so compute that result. SDLoc DL(N); SDValue VecC = @@ -20873,11 +21444,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // Make sure all but the first op are undef or constant. auto ConcatWithConstantOrUndef = [](SDValue Concat) { return Concat.getOpcode() == ISD::CONCAT_VECTORS && - std::all_of(std::next(Concat->op_begin()), Concat->op_end(), - [](const SDValue &Op) { - return Op.isUndef() || - ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); - }); + all_of(drop_begin(Concat->ops()), [](const SDValue &Op) { + return Op.isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); + }); }; // The following pattern is likely to emerge with vector reduction ops. Moving @@ -21099,7 +21669,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // It is safe to replace the two loads if they have different alignments, // but the new load must be the minimum (most restrictive) alignment of the // inputs. - unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); + Align Alignment = std::min(LLD->getAlign(), RLD->getAlign()); MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); if (!RLD->isInvariant()) MMOFlags &= ~MachineMemOperand::MOInvariant; @@ -21205,6 +21775,46 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } +// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values. +SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + bool IsFabs = N->getOpcode() == ISD::FABS; + bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT); + + if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse()) + return SDValue(); + + SDValue Int = N0.getOperand(0); + EVT IntVT = Int.getValueType(); + + // The operand to cast should be integer. + if (!IntVT.isInteger() || IntVT.isVector()) + return SDValue(); + + // (fneg (bitconvert x)) -> (bitconvert (xor x sign)) + // (fabs (bitconvert x)) -> (bitconvert (and x ~sign)) + APInt SignMask; + if (N0.getValueType().isVector()) { + // For vector, create a sign mask (0x80...) or its inverse (for fabs, + // 0x7f...) per element and splat it. + SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); + if (IsFabs) + SignMask = ~SignMask; + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...) + SignMask = APInt::getSignMask(IntVT.getSizeInBits()); + if (IsFabs) + SignMask = ~SignMask; + } + SDLoc DL(N0); + Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int, + DAG.getConstant(SignMask, DL, IntVT)); + AddToWorklist(Int.getNode()); + return DAG.getBitcast(VT, Int); +} + /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 /// in it. This may be a win when the constant is not otherwise available @@ -21486,9 +22096,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { EVT VT = V.getValueType(); - unsigned EltBits = VT.getScalarSizeInBits(); SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); - SDValue Base = DAG.getConstant(EltBits - 1, DL, VT); + SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); return LogBase2; } @@ -21666,37 +22275,21 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, Reciprocal)) { AddToWorklist(Est.getNode()); - if (Iterations) { + if (Iterations) Est = UseOneConstNR ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); - - if (!Reciprocal) { - // The estimate is now completely wrong if the input was exactly 0.0 or - // possibly a denormal. Force the answer to 0.0 for those cases. - SDLoc DL(Op); - EVT CCVT = getSetCCResultType(VT); - ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; - DenormalMode DenormMode = DAG.getDenormalMode(VT); - if (DenormMode.Input == DenormalMode::IEEE) { - // This is specifically a check for the handling of denormal inputs, - // not the result. - - // fabs(X) < SmallestNormal ? 0.0 : Est - const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); - APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); - SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); - SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); - SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); - SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); - Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); - } else { - // X == 0.0 ? 0.0 : Est - SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); - SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); - Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); - } - } + if (!Reciprocal) { + SDLoc DL(Op); + // Try the target specific test first. + SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT)); + + // The estimate is now completely wrong if the input was exactly 0.0 or + // possibly a denormal. Force the answer to 0.0 or value provided by + // target for those cases. + Est = DAG.getNode( + Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, + Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est); } return Est; } diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index fc6c3a145f13..62f7f3d98ba6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -113,11 +113,6 @@ using namespace PatternMatch; #define DEBUG_TYPE "isel" -// FIXME: Remove this after the feature has proven reliable. -static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values", - cl::init(true), cl::Hidden, - cl::desc("Sink local values in FastISel")); - STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " @@ -139,7 +134,6 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } -/// Flush the local CSE map and sink anything we can. void FastISel::finishBasicBlock() { flushLocalValueMap(); } bool FastISel::lowerArguments() { @@ -164,48 +158,77 @@ bool FastISel::lowerArguments() { /// Return the defined register if this instruction defines exactly one /// virtual register and uses no other virtual registers. Otherwise return 0. -static Register findSinkableLocalRegDef(MachineInstr &MI) { +static Register findLocalRegDef(MachineInstr &MI) { Register RegDef; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; if (MO.isDef()) { if (RegDef) - return 0; + return Register(); RegDef = MO.getReg(); } else if (MO.getReg().isVirtual()) { - // This is another use of a vreg. Don't try to sink it. + // This is another use of a vreg. Don't delete it. return Register(); } } return RegDef; } +static bool isRegUsedByPhiNodes(Register DefReg, + FunctionLoweringInfo &FuncInfo) { + for (auto &P : FuncInfo.PHINodesToUpdate) + if (P.second == DefReg) + return true; + return false; +} + void FastISel::flushLocalValueMap() { - // Try to sink local values down to their first use so that we can give them a - // better debug location. This has the side effect of shrinking local value - // live ranges, which helps out fast regalloc. - if (SinkLocalValues && LastLocalValue != EmitStartPt) { - // Sink local value materialization instructions between EmitStartPt and - // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to - // avoid inserting into the range that we're iterating over. + // If FastISel bails out, it could leave local value instructions behind + // that aren't used for anything. Detect and erase those. + if (LastLocalValue != EmitStartPt) { + // Save the first instruction after local values, for later. + MachineBasicBlock::iterator FirstNonValue(LastLocalValue); + ++FirstNonValue; + MachineBasicBlock::reverse_iterator RE = EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) : FuncInfo.MBB->rend(); MachineBasicBlock::reverse_iterator RI(LastLocalValue); - - InstOrderMap OrderMap; for (; RI != RE;) { MachineInstr &LocalMI = *RI; + // Increment before erasing what it points to. ++RI; - bool Store = true; - if (!LocalMI.isSafeToMove(nullptr, Store)) + Register DefReg = findLocalRegDef(LocalMI); + if (!DefReg) continue; - Register DefReg = findSinkableLocalRegDef(LocalMI); - if (DefReg == 0) + if (FuncInfo.RegsWithFixups.count(DefReg)) continue; + bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo); + if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) { + if (EmitStartPt == &LocalMI) + EmitStartPt = EmitStartPt->getPrevNode(); + LLVM_DEBUG(dbgs() << "removing dead local value materialization" + << LocalMI); + LocalMI.eraseFromParent(); + } + } - sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap); + if (FirstNonValue != FuncInfo.MBB->end()) { + // See if there are any local value instructions left. If so, we want to + // make sure the first one has a debug location; if it doesn't, use the + // first non-value instruction's debug location. + + // If EmitStartPt is non-null, this block had copies at the top before + // FastISel started doing anything; it points to the last one, so the + // first local value instruction is the one after EmitStartPt. + // If EmitStartPt is null, the first local value instruction is at the + // top of the block. + MachineBasicBlock::iterator FirstLocalValue = + EmitStartPt ? ++MachineBasicBlock::iterator(EmitStartPt) + : FuncInfo.MBB->begin(); + if (FirstLocalValue != FirstNonValue && !FirstLocalValue->getDebugLoc()) + FirstLocalValue->setDebugLoc(FirstNonValue->getDebugLoc()); } } @@ -213,132 +236,6 @@ void FastISel::flushLocalValueMap() { LastLocalValue = EmitStartPt; recomputeInsertPt(); SavedInsertPt = FuncInfo.InsertPt; - LastFlushPoint = FuncInfo.InsertPt; -} - -static bool isRegUsedByPhiNodes(Register DefReg, - FunctionLoweringInfo &FuncInfo) { - for (auto &P : FuncInfo.PHINodesToUpdate) - if (P.second == DefReg) - return true; - return false; -} - -static bool isTerminatingEHLabel(MachineBasicBlock *MBB, MachineInstr &MI) { - // Ignore non-EH labels. - if (!MI.isEHLabel()) - return false; - - // Any EH label outside a landing pad must be for an invoke. Consider it a - // terminator. - if (!MBB->isEHPad()) - return true; - - // If this is a landingpad, the first non-phi instruction will be an EH_LABEL. - // Don't consider that label to be a terminator. - return MI.getIterator() != MBB->getFirstNonPHI(); -} - -/// Build a map of instruction orders. Return the first terminator and its -/// order. Consider EH_LABEL instructions to be terminators as well, since local -/// values for phis after invokes must be materialized before the call. -void FastISel::InstOrderMap::initialize( - MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) { - unsigned Order = 0; - for (MachineInstr &I : *MBB) { - if (!FirstTerminator && - (I.isTerminator() || isTerminatingEHLabel(MBB, I))) { - FirstTerminator = &I; - FirstTerminatorOrder = Order; - } - Orders[&I] = Order++; - - // We don't need to order instructions past the last flush point. - if (I.getIterator() == LastFlushPoint) - break; - } -} - -void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI, - Register DefReg, - InstOrderMap &OrderMap) { - // If this register is used by a register fixup, MRI will not contain all - // the uses until after register fixups, so don't attempt to sink or DCE - // this instruction. Register fixups typically come from no-op cast - // instructions, which replace the cast instruction vreg with the local - // value vreg. - if (FuncInfo.RegsWithFixups.count(DefReg)) - return; - - // We can DCE this instruction if there are no uses and it wasn't a - // materialized for a successor PHI node. - bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo); - if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) { - if (EmitStartPt == &LocalMI) - EmitStartPt = EmitStartPt->getPrevNode(); - LLVM_DEBUG(dbgs() << "removing dead local value materialization " - << LocalMI); - OrderMap.Orders.erase(&LocalMI); - LocalMI.eraseFromParent(); - return; - } - - // Number the instructions if we haven't yet so we can efficiently find the - // earliest use. - if (OrderMap.Orders.empty()) - OrderMap.initialize(FuncInfo.MBB, LastFlushPoint); - - // Find the first user in the BB. - MachineInstr *FirstUser = nullptr; - unsigned FirstOrder = std::numeric_limits<unsigned>::max(); - for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) { - auto I = OrderMap.Orders.find(&UseInst); - assert(I != OrderMap.Orders.end() && - "local value used by instruction outside local region"); - unsigned UseOrder = I->second; - if (UseOrder < FirstOrder) { - FirstOrder = UseOrder; - FirstUser = &UseInst; - } - } - - // The insertion point will be the first terminator or the first user, - // whichever came first. If there was no terminator, this must be a - // fallthrough block and the insertion point is the end of the block. - MachineBasicBlock::instr_iterator SinkPos; - if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) { - FirstOrder = OrderMap.FirstTerminatorOrder; - SinkPos = OrderMap.FirstTerminator->getIterator(); - } else if (FirstUser) { - SinkPos = FirstUser->getIterator(); - } else { - assert(UsedByPHI && "must be users if not used by a phi"); - SinkPos = FuncInfo.MBB->instr_end(); - } - - // Collect all DBG_VALUEs before the new insertion position so that we can - // sink them. - SmallVector<MachineInstr *, 1> DbgValues; - for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) { - if (!DbgVal.isDebugValue()) - continue; - unsigned UseOrder = OrderMap.Orders[&DbgVal]; - if (UseOrder < FirstOrder) - DbgValues.push_back(&DbgVal); - } - - // Sink LocalMI before SinkPos and assign it the same DebugLoc. - LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI); - FuncInfo.MBB->remove(&LocalMI); - FuncInfo.MBB->insert(SinkPos, &LocalMI); - if (SinkPos != FuncInfo.MBB->end()) - LocalMI.setDebugLoc(SinkPos->getDebugLoc()); - - // Sink any debug values that we've collected. - for (MachineInstr *DI : DbgValues) { - FuncInfo.MBB->remove(DI); - FuncInfo.MBB->insert(SinkPos, DI); - } } bool FastISel::hasTrivialKill(const Value *V) { @@ -446,7 +343,7 @@ Register FastISel::materializeConstant(const Value *V, MVT VT) { getRegForValue(ConstantInt::get(V->getContext(), SIntVal)); if (IntegerReg) Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, - /*Kill=*/false); + /*Op0IsKill=*/false); } } } else if (const auto *Op = dyn_cast<Operator>(V)) { @@ -560,8 +457,6 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I, assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 && "Invalid iterator!"); while (I != E) { - if (LastFlushPoint == I) - LastFlushPoint = E; if (SavedInsertPt == I) SavedInsertPt = E; if (EmitStartPt == I) @@ -578,12 +473,9 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I, } FastISel::SavePoint FastISel::enterLocalValueArea() { - MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; - DebugLoc OldDL = DbgLoc; + SavePoint OldInsertPt = FuncInfo.InsertPt; recomputeInsertPt(); - DbgLoc = DebugLoc(); - SavePoint SP = {OldInsertPt, OldDL}; - return SP; + return OldInsertPt; } void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { @@ -591,8 +483,7 @@ void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { LastLocalValue = &*std::prev(FuncInfo.InsertPt); // Restore the previous insert position. - FuncInfo.InsertPt = OldInsertPt.InsertPt; - DbgLoc = OldInsertPt.DL; + FuncInfo.InsertPt = OldInsertPt; } bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { @@ -690,6 +581,12 @@ bool FastISel::selectGetElementPtr(const User *I) { Register N = getRegForValue(I->getOperand(0)); if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; + + // FIXME: The code below does not handle vector GEPs. Halt "fast" selection + // and bail. + if (isa<VectorType>(I->getType())) + return false; + bool NIsKill = hasTrivialKill(I->getOperand(0)); // Keep a running tab of the total offset to coalesce multiple N = N + Offset @@ -1310,11 +1207,6 @@ bool FastISel::selectCall(const User *I) { // Handle simple inline asms. if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) { - // If the inline asm has side effects, then make sure that no local value - // lives across by flushing the local value map. - if (IA->hasSideEffects()) - flushLocalValueMap(); - // Don't attempt to handle constraints. if (!IA->getConstraintString().empty()) return false; @@ -1344,15 +1236,6 @@ bool FastISel::selectCall(const User *I) { if (const auto *II = dyn_cast<IntrinsicInst>(Call)) return selectIntrinsicCall(II); - // Usually, it does not make sense to initialize a value, - // make an unrelated function call and use the value, because - // it tends to be spilled on the stack. So, we move the pointer - // to the last local value to the beginning of the block, so that - // all the values which have already been materialized, - // appear after the call. It also makes sense to skip intrinsics - // since they tend to be inlined. - flushLocalValueMap(); - return lowerCall(Call); } @@ -1369,6 +1252,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::sideeffect: // Neither does the assume intrinsic; it's also OK not to codegen its operand. case Intrinsic::assume: + // Neither does the llvm.experimental.noalias.scope.decl intrinsic + case Intrinsic::experimental_noalias_scope_decl: return true; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); @@ -1637,6 +1522,11 @@ void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) } bool FastISel::selectInstruction(const Instruction *I) { + // Flush the local value map before starting each instruction. + // This improves locality and debugging, and can reduce spills. + // Reuse of values across IR instructions is relatively uncommon. + flushLocalValueMap(); + MachineInstr *SavedLastLocalValue = getLastLocalValue(); // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. @@ -1783,13 +1673,13 @@ bool FastISel::selectFNeg(const User *I, const Value *In) { return false; Register IntResultReg = fastEmit_ri_( - IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true, + IntVT.getSimpleVT(), ISD::XOR, IntReg, /*Op0IsKill=*/true, UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); if (!IntResultReg) return false; ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST, - IntResultReg, /*IsKill=*/true); + IntResultReg, /*Op0IsKill=*/true); if (!ResultReg) return false; @@ -1845,13 +1735,8 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return selectBinaryOp(I, ISD::FADD); case Instruction::Sub: return selectBinaryOp(I, ISD::SUB); - case Instruction::FSub: { - // FNeg is currently represented in LLVM IR as a special case of FSub. - Value *X; - if (match(I, m_FNeg(m_Value(X)))) - return selectFNeg(I, X); + case Instruction::FSub: return selectBinaryOp(I, ISD::FSUB); - } case Instruction::Mul: return selectBinaryOp(I, ISD::MUL); case Instruction::FMul: @@ -2347,9 +2232,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); - // Set the DebugLoc for the copy. Prefer the location of the operand - // if there is one; use the location of the PHI otherwise. - DbgLoc = PN.getDebugLoc(); + // Set the DebugLoc for the copy. Use the location of the operand if + // there is one; otherwise no location, flushLocalValueMap will fix it. + DbgLoc = DebugLoc(); if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) DbgLoc = Inst->getDebugLoc(); diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 5cf83cff3a90..32a4f60df097 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -197,7 +197,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for inline asm that clobbers the SP register. if (auto *Call = dyn_cast<CallBase>(&I)) { if (Call->isInlineAsm()) { - unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector<TargetLowering::AsmOperandInfo> Ops = TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, @@ -360,7 +360,7 @@ void FunctionLoweringInfo::clear() { RegFixups.clear(); RegsWithFixups.clear(); StatepointStackSlots.clear(); - StatepointSpillMaps.clear(); + StatepointRelocationMaps.clear(); PreferredExtendType.clear(); } @@ -458,8 +458,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { APInt Val = CI->getValue().zextOrTrunc(BitWidth); DestLOI.NumSignBits = Val.getNumSignBits(); - DestLOI.Known.Zero = ~Val; - DestLOI.Known.One = Val; + DestLOI.Known = KnownBits::makeConstant(Val); } else { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); @@ -509,8 +508,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { return; } DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits); - DestLOI.Known.Zero &= SrcLOI->Known.Zero; - DestLOI.Known.One &= SrcLOI->Known.One; + DestLOI.Known = KnownBits::commonBits(DestLOI.Known, SrcLOI->Known); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 0e4e99214aa2..a5978711b871 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -200,6 +201,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && II.isVariadic() && II.variadicOpsAreDefs(); unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs(); + if (Node->getMachineOpcode() == TargetOpcode::STATEPOINT) + NumVRegs = NumResults; for (unsigned i = 0; i < NumVRegs; ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination @@ -693,6 +696,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, return &*MIB; } + // Attempt to produce a DBG_INSTR_REF if we've been asked to. + if (EmitDebugInstrRefs) + if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap)) + return InstrRef; + if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. @@ -760,6 +768,63 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, } MachineInstr * +InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, + DenseMap<SDValue, Register> &VRBaseMap) { + // Instruction referencing is still in a prototype state: for now we're only + // going to support SDNodes within a block. Copies are not supported, they + // don't actually define a value. + if (SD->getKind() != SDDbgValue::SDNODE) + return nullptr; + + SDNode *Node = SD->getSDNode(); + SDValue Op = SDValue(Node, SD->getResNo()); + DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); + if (I==VRBaseMap.end()) + return nullptr; // undef value: let EmitDbgValue produce a DBG_VALUE $noreg. + + MDNode *Var = SD->getVariable(); + MDNode *Expr = SD->getExpression(); + DebugLoc DL = SD->getDebugLoc(); + + // Try to pick out a defining instruction at this point. + unsigned VReg = getVR(Op, VRBaseMap); + MachineInstr *ResultInstr = nullptr; + + // No definition corresponds to scenarios where a vreg is live-in to a block, + // and doesn't have a defining instruction (yet). This can be patched up + // later; at this early stage of implementation, fall back to using DBG_VALUE. + if (!MRI->hasOneDef(VReg)) + return nullptr; + + MachineInstr &DefMI = *MRI->def_instr_begin(VReg); + // Some target specific opcodes can become copies. As stated above, we're + // ignoring those for now. + if (DefMI.isCopy() || DefMI.getOpcode() == TargetOpcode::SUBREG_TO_REG) + return nullptr; + + const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); + auto MIB = BuildMI(*MF, DL, RefII); + + // Find the operand which defines the specified VReg. + unsigned OperandIdx = 0; + for (const auto &MO : DefMI.operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == VReg) + break; + ++OperandIdx; + } + assert(OperandIdx < DefMI.getNumOperands()); + + // Make the DBG_INSTR_REF refer to that instruction, and that operand. + unsigned InstrNum = DefMI.getDebugInstrNum(); + MIB.addImm(InstrNum); + MIB.addImm(OperandIdx); + MIB.addMetadata(Var); + MIB.addMetadata(Expr); + ResultInstr = &*MIB; + return ResultInstr; +} + +MachineInstr * InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { MDNode *Label = SD->getLabel(); DebugLoc DL = SD->getDebugLoc(); @@ -821,6 +886,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, NumDefs = NumResults; } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); + } else if (Opc == TargetOpcode::STATEPOINT) { + NumDefs = NumResults; } unsigned NumImpUses = 0; @@ -970,6 +1037,22 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef()) MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); + // STATEPOINT is too 'dynamic' to have meaningful machine description. + // We have to manually tie operands. + if (Opc == TargetOpcode::STATEPOINT && NumDefs > 0) { + assert(!HasPhysRegOuts && "STATEPOINT mishandled"); + MachineInstr *MI = MIB; + unsigned Def = 0; + int First = StatepointOpers(MI).getFirstGCPtrIdx(); + assert(First > 0 && "Statepoint has Defs but no GC ptr list"); + unsigned Use = (unsigned)First; + while (Def < NumDefs) { + if (MI->getOperand(Use).isReg()) + MI->tieOperands(Def++, Use); + Use = StackMaps::getNextMetaArgIdx(MI, Use); + } + } + // Run post-isel target hook to adjust this instruction if needed. if (II.hasPostISelHook()) TLI->AdjustInstrPostInstrSelection(*MIB, Node); @@ -1042,6 +1125,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } + case ISD::PSEUDO_PROBE: { + unsigned TarOp = TargetOpcode::PSEUDO_PROBE; + auto Guid = cast<PseudoProbeSDNode>(Node)->getGuid(); + auto Index = cast<PseudoProbeSDNode>(Node)->getIndex(); + auto Attr = cast<PseudoProbeSDNode>(Node)->getAttributes(); + + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addImm(Guid) + .addImm(Index) + .addImm((uint8_t)PseudoProbeType::Block) + .addImm(Attr); + break; + } + case ISD::INLINEASM: case ISD::INLINEASM_BR: { unsigned NumOps = Node->getNumOperands(); @@ -1157,10 +1254,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. -InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, +InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos) : MF(mbb->getParent()), MRI(&MF->getRegInfo()), TII(MF->getSubtarget().getInstrInfo()), TRI(MF->getSubtarget().getRegisterInfo()), TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), - InsertPos(insertpos) {} + InsertPos(insertpos) { + EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations; +} diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index c3567eae9161..09658b8143fe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -26,6 +26,7 @@ class MCInstrDesc; class SDDbgLabel; class SDDbgValue; class TargetLowering; +class TargetMachine; class LLVM_LIBRARY_VISIBILITY InstrEmitter { MachineFunction *MF; @@ -37,6 +38,9 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { MachineBasicBlock *MBB; MachineBasicBlock::iterator InsertPos; + /// Should we try to produce DBG_INSTR_REF instructions? + bool EmitDebugInstrRefs; + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void EmitCopyFromReg(SDNode *Node, unsigned ResNo, @@ -109,6 +113,11 @@ public: MachineInstr *EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap); + /// Attempt to emit a dbg_value as a DBG_INSTR_REF. May fail and return + /// nullptr, in which case we fall back to plain EmitDbgValue. + MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, + DenseMap<SDValue, Register> &VRBaseMap); + /// Generate machine instruction for a dbg_label node. MachineInstr *EmitDbgLabel(SDDbgLabel *SD); @@ -130,7 +139,8 @@ public: /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. - InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); + InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, + MachineBasicBlock::iterator insertpos); private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 6a6004c158bb..62d7191036ca 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -173,14 +173,17 @@ private: SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; + SDValue ExpandFNEG(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain); void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results); void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results); + SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl); SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); + SDValue ExpandPARITY(SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -428,7 +431,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { LLVM_DEBUG(dbgs() << "Optimizing float store operations\n"); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' - // FIXME: We shouldn't do this for TargetConstantFP's. // FIXME: move this to the DAG Combiner! Note that we can't regress due // to phase ordering between legalized code and the dag combiner. This // probably means that we need to integrate dag combiner and legalizer @@ -436,10 +438,16 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // We generally can't do this one for long doubles. SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); + SDValue Value = ST->getValue(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + + // Don't optimise TargetConstantFP + if (Value.getOpcode() == ISD::TargetConstantFP) + return SDValue(); + + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { if (CFP->getValueType(0) == MVT::f32 && TLI.isTypeLegal(MVT::i32)) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). @@ -470,7 +478,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), ST->getOriginalAlign(), MMOFlags, AAInfo); - Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), dl); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), ST->getOriginalAlign(), MMOFlags, AAInfo); @@ -479,7 +487,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } } } - return SDValue(nullptr, 0); + return SDValue(); } void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { @@ -540,28 +548,29 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n"); SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); + TypeSize StWidth = StVT.getSizeInBits(); + TypeSize StSize = StVT.getStoreSizeInBits(); auto &DL = DAG.getDataLayout(); - if (StWidth != StVT.getStoreSizeInBits()) { + if (StWidth != StSize) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedSize()); Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); - } else if (StWidth & (StWidth - 1)) { + } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedSize())) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned LogStWidth = Log2_32(StWidth); + unsigned StWidthBits = StWidth.getFixedSize(); + unsigned LogStWidth = Log2_32(StWidthBits); assert(LogStWidth < 32); unsigned RoundWidth = 1 << LogStWidth; - assert(RoundWidth < StWidth); - unsigned ExtraWidth = StWidth - RoundWidth; + assert(RoundWidth < StWidthBits); + unsigned ExtraWidth = StWidthBits - RoundWidth; assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Store size not an integral number of bytes!"); @@ -578,7 +587,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, @@ -718,7 +727,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); - unsigned SrcWidth = SrcVT.getSizeInBits(); + TypeSize SrcWidth = SrcVT.getSizeInBits(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); @@ -764,14 +773,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Value = Result; Chain = Ch; - } else if (SrcWidth & (SrcWidth - 1)) { + } else if (!isPowerOf2_64(SrcWidth.getKnownMinSize())) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); - unsigned LogSrcWidth = Log2_32(SrcWidth); + unsigned SrcWidthBits = SrcWidth.getFixedSize(); + unsigned LogSrcWidth = Log2_32(SrcWidthBits); assert(LogSrcWidth < 32); unsigned RoundWidth = 1 << LogSrcWidth; - assert(RoundWidth < SrcWidth); - unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(RoundWidth < SrcWidthBits); + unsigned ExtraWidth = SrcWidthBits - RoundWidth; assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Load size not an integral number of bytes!"); @@ -790,7 +800,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); @@ -818,7 +828,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); @@ -1103,6 +1113,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // They'll be converted to Copy(To/From)Reg. Action = TargetLowering::Legal; break; + case ISD::UBSANTRAP: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Expand) { + // replace ISD::UBSANTRAP with ISD::TRAP + SDValue NewVal; + NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(), + Node->getOperand(0)); + ReplaceNode(Node, NewVal.getNode()); + LegalizeOp(NewVal.getNode()); + return; + } + break; case ISD::DEBUGTRAP: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Expand) { @@ -1118,10 +1140,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: - case ISD::USUBSAT: { + case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; - } case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -1159,6 +1184,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction( Node->getOpcode(), Node->getOperand(0).getValueType()); break; + case ISD::VECREDUCE_SEQ_FADD: + Action = TLI.getOperationAction( + Node->getOpcode(), Node->getOperand(1).getValueType()); + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -1411,6 +1440,12 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SmallVector<SDValue, 8> Stores; unsigned TypeByteSize = MemVT.getSizeInBits() / 8; assert(TypeByteSize > 0 && "Vector element type too small for stack store!"); + + // If the destination vector element type of a BUILD_VECTOR is narrower than + // the source element type, only store the bits necessary. + bool Truncate = isa<BuildVectorSDNode>(Node) && + MemVT.bitsLT(Node->getOperand(0).getValueType()); + // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { // Ignore undef elements. @@ -1418,11 +1453,9 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { unsigned Offset = TypeByteSize*i; - SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, Offset, dl); + SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, TypeSize::Fixed(Offset), dl); - // If the destination vector element type is narrower than the source - // element type, only store the bits necessary. - if (MemVT.bitsLT(Node->getOperand(i).getValueType())) + if (Truncate) Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset), MemVT)); @@ -1448,7 +1481,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, SDValue Value) const { EVT FloatVT = Value.getValueType(); - unsigned NumBits = FloatVT.getSizeInBits(); + unsigned NumBits = FloatVT.getScalarSizeInBits(); State.FloatVT = FloatVT; EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); // Convert to an integer of the same size. @@ -1480,8 +1513,9 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, State.IntPointerInfo = State.FloatPointerInfo; } else { // Advance the pointer so that the loaded byte will contain the sign bit. - unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; - IntPtr = DAG.getMemBasePlusOffset(StackPtr, ByteOffset, DL); + unsigned ByteOffset = (NumBits / 8) - 1; + IntPtr = + DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(ByteOffset), DL); State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, ByteOffset); } @@ -1489,7 +1523,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, State.IntPtr = IntPtr; State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr, State.IntPointerInfo, MVT::i8); - State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); + State.SignMask = APInt::getOneBitSet(LoadTy.getScalarSizeInBits(), 7); State.SignBit = 7; } @@ -1544,7 +1578,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { // Get the signbit at the right position for MagAsInt. int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit; EVT ShiftVT = IntVT; - if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) { + if (SignBit.getScalarValueSizeInBits() < + ClearedSign.getScalarValueSizeInBits()) { SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit); ShiftVT = MagVT; } @@ -1555,7 +1590,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT); SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst); } - if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) { + if (SignBit.getScalarValueSizeInBits() > + ClearedSign.getScalarValueSizeInBits()) { SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit); } @@ -1564,6 +1600,22 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { return modifySignAsInt(MagAsInt, DL, CopiedSign); } +SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const { + // Get the sign bit as an integer. + SDLoc DL(Node); + FloatSignAsInt SignAsInt; + getSignAsIntValue(SignAsInt, DL, Node->getOperand(0)); + EVT IntVT = SignAsInt.IntValue.getValueType(); + + // Flip the sign. + SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); + SDValue SignFlip = + DAG.getNode(ISD::XOR, DL, IntVT, SignAsInt.IntValue, SignMask); + + // Convert back to float. + return modifySignAsInt(SignAsInt, DL, SignFlip); +} + SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { SDLoc DL(Node); SDValue Value = Node->getOperand(0); @@ -1587,7 +1639,7 @@ SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SmallVectorImpl<SDValue> &Results) { - unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); SDLoc dl(Node); @@ -1681,21 +1733,41 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode( unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); + case ISD::SETUO: + if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) { + CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; + break; + } + assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && + "If SETUE is expanded, SETOEQ or SETUNE must be legal!"); + NeedInvert = true; + LLVM_FALLTHROUGH; case ISD::SETO: assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; - case ISD::SETUO: - assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT) - && "If SETUO is expanded, SETUNE must be legal!"); - CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; + case ISD::SETONE: + case ISD::SETUEQ: + // If the SETUO or SETO CC isn't legal, we might be able to use + // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one + // of SETOGT/SETOLT to be legal, the other can be emulated by swapping + // the operands. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + if (!TLI.isCondCodeLegal(CC2, OpVT) && + (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) || + TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) { + CC1 = ISD::SETOGT; + CC2 = ISD::SETOLT; + Opc = ISD::OR; + NeedInvert = ((unsigned)CCCode & 0x8U); + break; + } + LLVM_FALLTHROUGH; case ISD::SETOEQ: case ISD::SETOGT: case ISD::SETOGE: case ISD::SETOLT: case ISD::SETOLE: - case ISD::SETONE: - case ISD::SETUEQ: case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE: @@ -1727,12 +1799,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode( if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, + IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, + IsSignaling); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, + IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, + IsSignaling); } if (Chain) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), @@ -1758,27 +1834,34 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl, SDValue Chain) { + unsigned SrcSize = SrcOp.getValueSizeInBits(); + unsigned SlotSize = SlotVT.getSizeInBits(); + unsigned DestSize = DestVT.getSizeInBits(); + Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); + Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType); + + // Don't convert with stack if the load/store is expensive. + if ((SrcSize > SlotSize && + !TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) || + (SlotSize < DestSize && + !TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT))) + return SDValue(); + // Create the stack frame object. - unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( + Align SrcAlign = DAG.getDataLayout().getPrefTypeAlign( SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); - SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); + SDValue FIPtr = DAG.CreateStackTemporary(SlotVT.getStoreSize(), SrcAlign); FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); int SPFI = StackPtrFI->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); - unsigned SrcSize = SrcOp.getValueSizeInBits(); - unsigned SlotSize = SlotVT.getSizeInBits(); - unsigned DestSize = DestVT.getSizeInBits(); - Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); - unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType); - // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. SDValue Store; - if (SrcSize > SlotSize) + if (SrcSize > SlotSize) Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SlotVT, SrcAlign); else { @@ -1790,7 +1873,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); - + assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, DestAlign); @@ -2111,7 +2194,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, if (Node->isStrictFPOpcode()) { EVT RetVT = Node->getValueType(0); - SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); + SmallVector<SDValue, 4> Ops(drop_begin(Node->ops())); TargetLowering::MakeLibCallOptions CallOptions; // FIXME: This doesn't support tail calls. std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, @@ -2361,7 +2444,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, // TODO: Should any fast-math-flags be set for the created nodes? LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); - if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { + if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64) && + (DestVT.bitsLE(MVT::f64) || + TLI.isOperationLegal(Node->isStrictFPOpcode() ? ISD::STRICT_FP_EXTEND + : ISD::FP_EXTEND, + DestVT))) { LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " "expansion\n"); @@ -2388,7 +2475,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot, MachinePointerInfo()); // Store the hi of the constructed double. - SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, 4, dl); + SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl); SDValue Store2 = DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo()); MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -2423,16 +2510,24 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, } return Result; } - // Code below here assumes !isSigned without checking again. - assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + + if (isSigned) + return SDValue(); // TODO: Generalize this for use with other types. - if ((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) { - LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32\n"); + if (((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) || + (SrcVT == MVT::i64 && DestVT == MVT::f64)) { + LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32/f64\n"); // For unsigned conversions, convert them to signed conversions using the // algorithm from the x86_64 __floatundisf in compiler_rt. That method // should be valid for i32->f32 as well. + // More generally this transform should be valid if there are 3 more bits + // in the integer type than the significand. Rounding uses the first bit + // after the width of the significand and the OR of all bits after that. So + // we need to be able to OR the shifted out bit into one of the bits that + // participate in the OR. + // TODO: This really should be implemented using a branch rather than a // select. We happen to get lucky and machinesink does the right // thing most of the time. This would be a good candidate for a @@ -2476,6 +2571,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast); } + // Don't expand it if there isn't cheap fadd. + if (!TLI.isOperationLegalOrCustom( + Node->isStrictFPOpcode() ? ISD::STRICT_FADD : ISD::FADD, DestVT)) + return SDValue(); + // The following optimization is valid only if every value in SrcVT (when // treated as signed) is representable in DestVT. Check that the mantissa // size of DestVT is >= than the number of bits in SrcVT -1. @@ -2502,7 +2602,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, // offset depending on the data type. uint64_t FF; switch (SrcVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported integer type!"); + default: + return SDValue(); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) @@ -2657,6 +2758,30 @@ void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl, Results.push_back(Operation.getValue(1)); } +/// Promote FP_TO_*INT_SAT operation to a larger result type. At this point +/// the result and operand types are legal and there must be a legal +/// FP_TO_*INT_SAT operation for a larger result type. +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT_SAT(SDNode *Node, + const SDLoc &dl) { + unsigned Opcode = Node->getOpcode(); + + // Scan for the appropriate larger type to use. + EVT NewOutTy = Node->getValueType(0); + while (true) { + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy + 1); + assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + + if (TLI.isOperationLegalOrCustom(Opcode, NewOutTy)) + break; + } + + // Saturation width is determined by second operand, so we don't have to + // perform any fixup and can directly truncate the result. + SDValue Result = DAG.getNode(Opcode, dl, NewOutTy, Node->getOperand(0), + Node->getOperand(1)); + return DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Result); +} + /// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); @@ -2773,6 +2898,28 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { } } +/// Open code the operations for PARITY of the specified operation. +SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + // If CTPOP is legal, use it. Otherwise use shifts and xor. + SDValue Result; + if (TLI.isOperationLegal(ISD::CTPOP, VT)) { + Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); + } else { + Result = Op; + for (unsigned i = Log2_32_Ceil(Sz); i != 0;) { + SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, Result, + DAG.getConstant(1ULL << (--i), dl, ShVT)); + Result = DAG.getNode(ISD::XOR, dl, VT, Result, Shift); + } + } + + return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT)); +} + bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to expand node\n"); SmallVector<SDValue, 8> Results; @@ -2804,6 +2951,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); break; + case ISD::PARITY: + Results.push_back(ExpandPARITY(Node->getOperand(0), dl)); + break; case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::FRAME_TO_ARGS_OFFSET: @@ -2948,18 +3098,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; // We fall back to use stack operation when the FP_ROUND operation // isn't available. - Tmp1 = EmitStackConvert(Node->getOperand(1), - Node->getValueType(0), - Node->getValueType(0), dl, Node->getOperand(0)); - ReplaceNode(Node, Tmp1.getNode()); - LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n"); - return true; + if ((Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0), + Node->getValueType(0), dl, + Node->getOperand(0)))) { + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n"); + return true; + } + break; case ISD::FP_ROUND: case ISD::BITCAST: - Tmp1 = EmitStackConvert(Node->getOperand(0), - Node->getValueType(0), - Node->getValueType(0), dl); - Results.push_back(Tmp1); + if ((Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), + Node->getValueType(0), dl))) + Results.push_back(Tmp1); break; case ISD::STRICT_FP_EXTEND: // When strict mode is enforced we can't do expansion because it @@ -2974,17 +3125,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; // We fall back to use stack operation when the FP_EXTEND operation // isn't available. - Tmp1 = EmitStackConvert(Node->getOperand(1), - Node->getOperand(1).getValueType(), - Node->getValueType(0), dl, Node->getOperand(0)); - ReplaceNode(Node, Tmp1.getNode()); - LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n"); - return true; + if ((Tmp1 = EmitStackConvert( + Node->getOperand(1), Node->getOperand(1).getValueType(), + Node->getValueType(0), dl, Node->getOperand(0)))) { + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n"); + return true; + } + break; case ISD::FP_EXTEND: - Tmp1 = EmitStackConvert(Node->getOperand(0), - Node->getOperand(0).getValueType(), - Node->getValueType(0), dl); - Results.push_back(Tmp1); + if ((Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getOperand(0).getValueType(), + Node->getValueType(0), dl))) + Results.push_back(Tmp1); break; case ISD::SIGN_EXTEND_INREG: { EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); @@ -3029,10 +3182,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LLVM_FALLTHROUGH; case ISD::SINT_TO_FP: case ISD::STRICT_SINT_TO_FP: - Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2); - Results.push_back(Tmp1); - if (Node->isStrictFPOpcode()) - Results.push_back(Tmp2); + if ((Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2))) { + Results.push_back(Tmp1); + if (Node->isStrictFPOpcode()) + Results.push_back(Tmp2); + } break; case ISD::FP_TO_SINT: if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) @@ -3059,6 +3213,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { return true; } break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Results.push_back(TLI.expandFP_TO_INT_SAT(Node, DAG)); + break; case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); @@ -3187,7 +3345,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::STACKSAVE: // Expand to CopyFromReg if the target set // StackPointerRegisterToSaveRestore. - if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + if (Register SP = TLI.getStackPointerRegisterToSaveRestore()) { Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, Node->getValueType(0))); Results.push_back(Results[0].getValue(1)); @@ -3199,7 +3357,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::STACKRESTORE: // Expand to CopyToReg if the target set // StackPointerRegisterToSaveRestore. - if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + if (Register SP = TLI.getStackPointerRegisterToSaveRestore()) { Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, Node->getOperand(1))); } else { @@ -3214,12 +3372,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandFCOPYSIGN(Node)); break; case ISD::FNEG: - // Expand Y = FNEG(X) -> Y = SUB -0.0, X - Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); - // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. - Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, - Node->getOperand(0)); - Results.push_back(Tmp1); + Results.push_back(ExpandFNEG(Node)); break; case ISD::FABS: Results.push_back(ExpandFABS(Node)); @@ -3315,7 +3468,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0), - DAG.getMachineFunction().getFunction().hasOptSize())) + DAG.shouldOptForSize())) Results.push_back(ExpandConstantFP(CFP, true)); break; } @@ -3394,7 +3547,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 4> Halves; EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext()); assert(TLI.isTypeLegal(HalfType)); - if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves, + if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, dl, LHS, RHS, Halves, HalfType, DAG, TargetLowering::MulExpansionKind::Always)) { for (unsigned i = 0; i < 2; ++i) { @@ -3463,7 +3616,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; case ISD::ROTL: case ISD::ROTR: - if (TLI.expandROT(Node, Tmp1, DAG)) + if (TLI.expandROT(Node, true /*AllowVectorOps*/, Tmp1, DAG)) Results.push_back(Tmp1); break; case ISD::SADDSAT: @@ -3472,6 +3625,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::USUBSAT: Results.push_back(TLI.expandAddSubSat(Node, DAG)); break; + case ISD::SSHLSAT: + case ISD::USHLSAT: + Results.push_back(TLI.expandShlSat(Node, DAG)); + break; case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -3809,16 +3966,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); - assert(!NeedInvert && "Don't know how to invert BR_CC!"); - // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC // node. if (Tmp4.getNode()) { + assert(!NeedInvert && "Don't know how to invert BR_CC!"); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); } else { Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType()); - Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp4 = DAG.getCondCode(NeedInvert ? ISD::SETEQ : ISD::SETNE); Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); } @@ -3899,6 +4056,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { == TargetLowering::Legal) return true; break; + case ISD::STRICT_FSUB: { + if (TLI.getStrictFPOperationAction( + ISD::STRICT_FSUB, Node->getValueType(0)) == TargetLowering::Legal) + return true; + if (TLI.getStrictFPOperationAction( + ISD::STRICT_FADD, Node->getValueType(0)) != TargetLowering::Legal) + break; + + EVT VT = Node->getValueType(0); + const SDNodeFlags Flags = Node->getFlags(); + SDValue Neg = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(2), Flags); + SDValue Fadd = DAG.getNode(ISD::STRICT_FADD, dl, Node->getVTList(), + {Node->getOperand(0), Node->getOperand(1), Neg}, + Flags); + + Results.push_back(Fadd); + Results.push_back(Fadd.getValue(1)); + break; + } + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::STRICT_LRINT: case ISD::STRICT_LLRINT: case ISD::STRICT_LROUND: @@ -3967,12 +4145,23 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_CMP_SWAP: { MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - + AtomicOrdering Order = cast<AtomicSDNode>(Node)->getOrdering(); + RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT); EVT RetVT = Node->getValueType(0); - SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); TargetLowering::MakeLibCallOptions CallOptions; + SmallVector<SDValue, 4> Ops; + if (TLI.getLibcallName(LC)) { + // If outline atomic available, prepare its arguments and expand. + Ops.append(Node->op_begin() + 2, Node->op_end()); + Ops.push_back(Node->getOperand(1)); + + } else { + LC = RTLIB::getSYNC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Unexpected atomic op or value type!"); + // Arguments for expansion to sync libcall + Ops.append(Node->op_begin() + 1, Node->op_end()); + } std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node), @@ -4220,11 +4409,131 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Results.push_back(ExpandLibCall(LC, Node, false)); break; } + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: { + // TODO - Common the code with DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP + bool IsStrict = Node->isStrictFPOpcode(); + bool Signed = Node->getOpcode() == ISD::SINT_TO_FP || + Node->getOpcode() == ISD::STRICT_SINT_TO_FP; + EVT SVT = Node->getOperand(IsStrict ? 1 : 0).getValueType(); + EVT RVT = Node->getValueType(0); + EVT NVT = EVT(); + SDLoc dl(Node); + + // Even if the input is legal, no libcall may exactly match, eg. we don't + // have i1 -> fp conversions. So, it needs to be promoted to a larger type, + // eg: i13 -> fp. Then, look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE; + t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; + ++t) { + NVT = (MVT::SimpleValueType)t; + // The source needs to big enough to hold the operand. + if (NVT.bitsGE(SVT)) + LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT) + : RTLIB::getUINTTOFP(NVT, RVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall"); + + SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); + // Sign/zero extend the argument if the libcall takes a larger type. + SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NVT, Node->getOperand(IsStrict ? 1 : 0)); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(Signed); + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, dl, Chain); + Results.push_back(Tmp.first); + if (IsStrict) + Results.push_back(Tmp.second); + break; + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: { + // TODO - Common the code with DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT. + bool IsStrict = Node->isStrictFPOpcode(); + bool Signed = Node->getOpcode() == ISD::FP_TO_SINT || + Node->getOpcode() == ISD::STRICT_FP_TO_SINT; + + SDValue Op = Node->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); + EVT RVT = Node->getValueType(0); + EVT NVT = EVT(); + SDLoc dl(Node); + + // Even if the result is legal, no libcall may exactly match, eg. we don't + // have fp -> i1 conversions. So, it needs to be promoted to a larger type, + // eg: fp -> i32. Then, look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; + IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; + ++IntVT) { + NVT = (MVT::SimpleValueType)IntVT; + // The type needs to big enough to hold the result. + if (NVT.bitsGE(RVT)) + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) + : RTLIB::getFPTOUINT(SVT, NVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall"); + + SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain); + + // Truncate the result if the libcall returns a larger type. + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first)); + if (IsStrict) + Results.push_back(Tmp.second); + break; + } + + case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: { + // X = FP_ROUND(Y, TRUNC) + // TRUNC is a flag, which is always an integer that is zero or one. + // If TRUNC is 0, this is a normal rounding, if it is 1, this FP_ROUND + // is known to not change the value of Y. + // We can only expand it into libcall if the TRUNC is 0. + bool IsStrict = Node->isStrictFPOpcode(); + SDValue Op = Node->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); + EVT VT = Node->getValueType(0); + assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1)) + ->isNullValue() && + "Unable to expand as libcall if it is not normal rounding"); + + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall"); + + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, SDLoc(Node), Chain); + Results.push_back(Tmp.first); + if (IsStrict) + Results.push_back(Tmp.second); + break; + } + case ISD::FP_EXTEND: { + Results.push_back( + ExpandLibCall(RTLIB::getFPEXT(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, false)); + break; + } + case ISD::STRICT_FP_EXTEND: case ISD::STRICT_FP_TO_FP16: { RTLIB::Libcall LC = - RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16); - assert(LC != RTLIB::UNKNOWN_LIBCALL && - "Unable to expand strict_fp_to_fp16"); + Node->getOpcode() == ISD::STRICT_FP_TO_FP16 + ? RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16) + : RTLIB::getFPEXT(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall"); + TargetLowering::MakeLibCallOptions CallOptions; std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1), @@ -4321,7 +4630,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP || - Node->getOpcode() == ISD::STRICT_SINT_TO_FP) + Node->getOpcode() == ISD::STRICT_SINT_TO_FP || + Node->getOpcode() == ISD::STRICT_FSETCC || + Node->getOpcode() == ISD::STRICT_FSETCCS) OVT = Node->getOperand(1).getSimpleValueType(); if (Node->getOpcode() == ISD::BR_CC) OVT = Node->getOperand(2).getSimpleValueType(); @@ -4381,6 +4692,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::STRICT_FP_TO_SINT: PromoteLegalFP_TO_INT(Node, dl, Results); break; + case ISD::FP_TO_UINT_SAT: + case ISD::FP_TO_SINT_SAT: + Results.push_back(PromoteLegalFP_TO_INT_SAT(Node, dl)); + break; case ISD::UINT_TO_FP: case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: @@ -4515,13 +4830,29 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::SETCC: { + case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { unsigned ExtOp = ISD::FP_EXTEND; if (NVT.isInteger()) { - ISD::CondCode CCCode = - cast<CondCodeSDNode>(Node->getOperand(2))->get(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; } + if (Node->isStrictFPOpcode()) { + SDValue InChain = Node->getOperand(0); + std::tie(Tmp1, std::ignore) = + DAG.getStrictFPExtendOrRound(Node->getOperand(1), InChain, dl, NVT); + std::tie(Tmp2, std::ignore) = + DAG.getStrictFPExtendOrRound(Node->getOperand(2), InChain, dl, NVT); + SmallVector<SDValue, 2> TmpChains = {Tmp1.getValue(1), Tmp2.getValue(1)}; + SDValue OutChain = DAG.getTokenFactor(dl, TmpChains); + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + Results.push_back(DAG.getNode(Node->getOpcode(), dl, VTs, + {OutChain, Tmp1, Tmp2, Node->getOperand(3)}, + Node->getFlags())); + Results.push_back(Results.back().getValue(1)); + break; + } Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 7e8ad28f9b14..966645e3256d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -134,6 +134,16 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAX: + R = SoftenFloatRes_VECREDUCE(N); + break; + case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + R = SoftenFloatRes_VECREDUCE_SEQ(N); + break; } // If R is null, the sub-method took care of registering the result. @@ -772,6 +782,16 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { return Tmp.first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE(SDNode *N) { + // Expand and soften recursively. + ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG)); + return SDValue(); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE_SEQ(SDNode *N) { + ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG)); + return SDValue(); +} //===----------------------------------------------------------------------===// // Convert Float Operand to Integer @@ -799,6 +819,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = SoftenFloatOp_FP_TO_XINT_SAT(N); break; case ISD::STRICT_LROUND: case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break; case ISD::STRICT_LLROUND: @@ -890,6 +913,24 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } +// Even if the result type is legal, no libcall may exactly match. (e.g. We +// don't have FP-i8 conversions) This helper method looks for an appropriate +// promoted libcall. +static RTLIB::Libcall findFPToIntLibcall(EVT SrcVT, EVT RetVT, EVT &Promoted, + bool Signed) { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; + IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; + ++IntVT) { + Promoted = (MVT::SimpleValueType)IntVT; + // The type needs to big enough to hold the result. + if (Promoted.bitsGE(RetVT)) + LC = Signed ? RTLIB::getFPTOSINT(SrcVT, Promoted) + : RTLIB::getFPTOUINT(SrcVT, Promoted); + } + return LC; +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool IsStrict = N->isStrictFPOpcode(); bool Signed = N->getOpcode() == ISD::FP_TO_SINT || @@ -905,16 +946,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly // match, eg. we don't have fp -> i8 conversions. // Look for an appropriate libcall. - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; - IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; - ++IntVT) { - NVT = (MVT::SimpleValueType)IntVT; - // The type needs to big enough to hold the result. - if (NVT.bitsGE(RVT)) - LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT); - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); + RTLIB::Libcall LC = findFPToIntLibcall(SVT, RVT, NVT, Signed); + assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() && + "Unsupported FP_TO_XINT!"); Op = GetSoftenedFloat(Op); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); @@ -934,6 +968,11 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { return SDValue(); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N) { + SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG); + return Res; +} + SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); @@ -1200,6 +1239,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FTRUNC: case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; case ISD::STRICT_FREM: @@ -1272,7 +1313,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo, SDValue &Hi) { - ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128), Lo, Hi); @@ -1598,21 +1639,31 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - SDValue Src = N->getOperand(0); + bool Strict = N->isStrictFPOpcode(); + SDValue Src = N->getOperand(Strict ? 1 : 0); EVT SrcVT = Src.getValueType(); - bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; + bool isSigned = N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::STRICT_SINT_TO_FP; SDLoc dl(N); + SDValue Chain = Strict ? N->getOperand(0) : DAG.getEntryNode(); + + // TODO: Any other flags to propagate? + SDNodeFlags Flags; + Flags.setNoFPExcept(N->getFlags().hasNoFPExcept()); // First do an SINT_TO_FP, whether the original was signed or unsigned. // When promoting partial word types to i32 we must honor the signedness, // though. if (SrcVT.bitsLE(MVT::i32)) { // The integer can be represented exactly in an f64. - Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, - MVT::i32, Src); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(NVT.getSizeInBits(), 0)), dl, NVT); - Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); + if (Strict) { + Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other), + {Chain, Src}, Flags); + Chain = Hi.getValue(1); + } else + Hi = DAG.getNode(N->getOpcode(), dl, NVT, Src); } else { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (SrcVT.bitsLE(MVT::i64)) { @@ -1627,14 +1678,25 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first; - GetPairElements(Hi, Lo, Hi); + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain); + if (Strict) + Chain = Tmp.second; + GetPairElements(Tmp.first, Lo, Hi); } - if (isSigned) + // No need to complement for unsigned 32-bit integers + if (isSigned || SrcVT.bitsLE(MVT::i32)) { + if (Strict) + ReplaceValueWith(SDValue(N, 1), Chain); + return; + } // Unsigned - fix up the SINT_TO_FP value just calculated. + // FIXME: For unsigned i128 to ppc_fp128 conversion, we need to carefully + // keep semantics correctness if the integer is not exactly representable + // here. See ExpandLegalINT_TO_FP. Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi); SrcVT = Src.getValueType(); @@ -1658,11 +1720,16 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, break; } - // TODO: Are there fast-math-flags to propagate to this FADD? - Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, - DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble(), - APInt(128, Parts)), - dl, MVT::ppcf128)); + // TODO: Are there other fast-math-flags to propagate to this FADD? + SDValue NewLo = DAG.getConstantFP( + APFloat(APFloat::PPCDoubleDouble(), APInt(128, Parts)), dl, MVT::ppcf128); + if (Strict) { + Lo = DAG.getNode(ISD::STRICT_FADD, dl, DAG.getVTList(VT, MVT::Other), + {Chain, Hi, NewLo}, Flags); + Chain = Lo.getValue(1); + ReplaceValueWith(SDValue(N, 1), Chain); + } else + Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, NewLo); Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT), Lo, Hi, ISD::SETLT); GetPairElements(Lo, Lo, Hi); @@ -1702,14 +1769,16 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; case ISD::STRICT_FP_TO_SINT: - case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; case ISD::STRICT_FP_TO_UINT: - case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_XINT(N); break; case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break; case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break; case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break; case ISD::LLRINT: Res = ExpandFloatOp_LLRINT(N); break; case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -1735,7 +1804,8 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &dl) { + const SDLoc &dl, SDValue &Chain, + bool IsSignaling) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedFloat(NewLHS, LHSLo, LHSHi); GetExpandedFloat(NewRHS, RHSLo, RHSHi); @@ -1747,25 +1817,32 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, // BNE crN, L: // FCMPU crN, lo1, lo2 // The following can be improved, but not that much. - SDValue Tmp1, Tmp2, Tmp3; - Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, ISD::SETOEQ); - Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), - LHSLo, RHSLo, CCCode); + SDValue Tmp1, Tmp2, Tmp3, OutputChain; + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, + RHSHi, ISD::SETOEQ, Chain, IsSignaling); + OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, + RHSLo, CCCode, OutputChain, IsSignaling); + OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); - Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, ISD::SETUNE); - Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, CCCode); + Tmp1 = + DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, + ISD::SETUNE, OutputChain, IsSignaling); + OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, + RHSHi, CCCode, OutputChain, IsSignaling); + OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); NewRHS = SDValue(); // LHS is the result, not a compare. + Chain = OutputChain; } SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + SDValue Chain; + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1820,38 +1897,23 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { return SDValue(); } -SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_XINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); bool IsStrict = N->isStrictFPOpcode(); + bool Signed = N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT; SDValue Op = N->getOperand(IsStrict ? 1 : 0); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); - RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - TargetLowering::MakeLibCallOptions CallOptions; - std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, - CallOptions, dl, Chain); - if (!IsStrict) - return Tmp.first; - - ReplaceValueWith(SDValue(N, 1), Tmp.second); - ReplaceValueWith(SDValue(N, 0), Tmp.first); - return SDValue(); -} -SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { - EVT RVT = N->getValueType(0); - SDLoc dl(N); - - bool IsStrict = N->isStrictFPOpcode(); - SDValue Op = N->getOperand(IsStrict ? 1 : 0); - SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); - RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + EVT NVT; + RTLIB::Libcall LC = findFPToIntLibcall(Op.getValueType(), RVT, NVT, Signed); + assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() && + "Unsupported FP_TO_XINT!"); TargetLowering::MakeLibCallOptions CallOptions; - std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, - CallOptions, dl, Chain); + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain); if (!IsStrict) return Tmp.first; @@ -1863,7 +1925,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + SDValue Chain; + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1879,20 +1942,25 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { - SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); - ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + bool IsStrict = N->isStrictFPOpcode(); + SDValue NewLHS = N->getOperand(IsStrict ? 1 : 0); + SDValue NewRHS = N->getOperand(IsStrict ? 2 : 1); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + ISD::CondCode CCCode = + cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain, + N->getOpcode() == ISD::STRICT_FSETCCS); - // If ExpandSetCCOperands returned a scalar, use it. - if (!NewRHS.getNode()) { - assert(NewLHS.getValueType() == N->getValueType(0) && - "Unexpected setcc expansion!"); - return NewLHS; + // FloatExpandSetCCOperands always returned a scalar. + assert(!NewRHS.getNode() && "Expect to return scalar"); + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + if (Chain) { + ReplaceValueWith(SDValue(N, 0), NewLHS); + ReplaceValueWith(SDValue(N, 1), Chain); + return SDValue(); } - - // Otherwise, update N to have the operands specified. - return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, - DAG.getCondCode(CCCode)), 0); + return NewLHS; } SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { @@ -2013,6 +2081,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break; case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; @@ -2056,6 +2127,13 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) { return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op); } +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, + unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op, + N->getOperand(1)); +} + SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { SDValue Op = GetPromotedFloat(N->getOperand(0)); EVT VT = N->getValueType(0); @@ -2191,6 +2269,16 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAX: + R = PromoteFloatRes_VECREDUCE(N); + break; + case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + R = PromoteFloatRes_VECREDUCE_SEQ(N); + break; } if (R.getNode()) @@ -2422,6 +2510,20 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { N->getValueType(0))); } +SDValue DAGTypeLegalizer::PromoteFloatRes_VECREDUCE(SDNode *N) { + // Expand and promote recursively. + // TODO: This is non-optimal, but dealing with the concurrently happening + // vector-legalization is non-trivial. We could do something similar to + // PromoteFloatRes_EXTRACT_VECTOR_ELT here. + ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG)); + return SDValue(); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_VECREDUCE_SEQ(SDNode *N) { + ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG)); + return SDValue(); +} + SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { EVT VT = N->getValueType(0); @@ -2530,6 +2632,16 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAX: + R = SoftPromoteHalfRes_VECREDUCE(N); + break; + case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + R = SoftPromoteHalfRes_VECREDUCE_SEQ(N); + break; } if (R.getNode()) @@ -2722,6 +2834,18 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) { return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) { + // Expand and soften recursively. + ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG)); + return SDValue(); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N) { + // Expand and soften. + ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG)); + return SDValue(); +} + //===----------------------------------------------------------------------===// // Half Operand Soft Promotion //===----------------------------------------------------------------------===// @@ -2753,6 +2877,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break; case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: Res = SoftPromoteHalfOp_FP_EXTEND(N); break; case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break; @@ -2822,6 +2949,20 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N) { + SDValue Op = N->getOperand(0); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); + + Op = GetSoftPromotedHalf(Op); + + SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res, + N->getOperand(1)); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Can only soften the comparison values"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 74071f763dbf..4a686bc227de 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -62,7 +62,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::Constant: Res = PromoteIntRes_Constant(N); break; case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; - case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::PARITY: + case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break; case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: @@ -81,7 +82,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMIN: case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; case ISD::UMIN: - case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break; + case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break; case ISD::SHL: Res = PromoteIntRes_SHL(N); break; case ISD::SIGN_EXTEND_INREG: @@ -122,6 +123,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = PromoteIntRes_FP_TO_XINT_SAT(N); break; + case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break; @@ -151,10 +156,15 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ADDCARRY: case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break; + case ISD::SADDO_CARRY: + case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break; + case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: - case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: Res = PromoteIntRes_ADDSUBSHLSAT(N); break; case ISD::SMULFIX: case ISD::SMULFIXSAT: @@ -205,6 +215,16 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FREEZE: Res = PromoteIntRes_FREEZE(N); break; + + case ISD::ROTL: + case ISD::ROTR: + Res = PromoteIntRes_Rotate(N); + break; + + case ISD::FSHL: + case ISD::FSHR: + Res = PromoteIntRes_FunnelShift(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -491,10 +511,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { NVT)); } -SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { - // Zero extend to the promoted type and do the count there. +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { + // Zero extend to the promoted type and do the count or parity there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { @@ -559,8 +579,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { SDValue Res; if (N->isStrictFPOpcode()) { - Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, - { N->getOperand(0), N->getOperand(1) }); + Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, + {N->getOperand(0), N->getOperand(1)}); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -580,6 +600,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { DAG.getValueType(N->getValueType(0).getScalarType())); } +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) { + // Promote the result type, while keeping the original width in Op1. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0), + N->getOperand(1)); +} + SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -663,12 +691,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { assert(NVT == ExtPassThru.getValueType() && "Gather result type and the passThru argument type should be the same"); + ISD::LoadExtType ExtType = N->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + ExtType = ISD::EXTLOAD; + SDLoc dl(N); SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(), N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand(), N->getIndexType()); + N->getMemOperand(), N->getIndexType(), + ExtType); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -700,11 +733,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT); } -SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { // If the promoted type is legal, we can convert this to: // 1. ANY_EXTEND iN to iM // 2. SHL by M-N - // 3. [US][ADD|SUB]SAT + // 3. [US][ADD|SUB|SHL]SAT // 4. L/ASHR by M-N // Else it is more efficient to convert this to a min and a max // operation in the higher precision arithmetic. @@ -714,9 +747,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { unsigned OldBits = Op1.getScalarValueSizeInBits(); unsigned Opcode = N->getOpcode(); + bool IsShift = Opcode == ISD::USHLSAT || Opcode == ISD::SSHLSAT; SDValue Op1Promoted, Op2Promoted; - if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) { + if (IsShift) { + Op1Promoted = GetPromotedInteger(Op1); + Op2Promoted = ZExtPromotedInteger(Op2); + } else if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) { Op1Promoted = ZExtPromotedInteger(Op1); Op2Promoted = ZExtPromotedInteger(Op2); } else { @@ -726,20 +763,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { EVT PromotedType = Op1Promoted.getValueType(); unsigned NewBits = PromotedType.getScalarSizeInBits(); - if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + // Shift cannot use a min/max expansion, we can't detect overflow if all of + // the bits have been shifted out. + if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { unsigned ShiftOp; switch (Opcode) { case ISD::SADDSAT: case ISD::SSUBSAT: + case ISD::SSHLSAT: ShiftOp = ISD::SRA; break; case ISD::UADDSAT: case ISD::USUBSAT: + case ISD::USHLSAT: ShiftOp = ISD::SRL; break; default: llvm_unreachable("Expected opcode to be signed or unsigned saturation " - "addition or subtraction"); + "addition, subtraction or left shift"); } unsigned SHLAmount = NewBits - OldBits; @@ -747,8 +788,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); - Op2Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + if (!IsShift) + Op2Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); SDValue Result = DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); @@ -1076,6 +1118,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { + // It doesn't matter if we sign extend or zero extend in the inputs. So do + // whatever is best for the target. + SDValue LHS = SExtOrZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtOrZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { // The input value must be properly sign extended. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); @@ -1094,6 +1145,60 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { + // Lower the rotate to shifts and ORs which can be promoted. + SDValue Res; + TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { + SDValue Hi = GetPromotedInteger(N->getOperand(0)); + SDValue Lo = GetPromotedInteger(N->getOperand(1)); + SDValue Amount = GetPromotedInteger(N->getOperand(2)); + + SDLoc DL(N); + EVT OldVT = N->getOperand(0).getValueType(); + EVT VT = Lo.getValueType(); + unsigned Opcode = N->getOpcode(); + bool IsFSHR = Opcode == ISD::FSHR; + unsigned OldBits = OldVT.getScalarSizeInBits(); + unsigned NewBits = VT.getScalarSizeInBits(); + + // Amount has to be interpreted modulo the old bit width. + Amount = + DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT)); + + // If the promoted type is twice the size (or more), then we use the + // traditional funnel 'double' shift codegen. This isn't necessary if the + // shift amount is constant. + // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw. + // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)). + if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) && + !TLI.isOperationLegalOrCustom(Opcode, VT)) { + SDValue HiShift = DAG.getConstant(OldBits, DL, VT); + Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift); + Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT); + SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo); + Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount); + if (!IsFSHR) + Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift); + return Res; + } + + // Shift Lo up to occupy the upper bits of the promoted type. + SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT); + Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset); + + // Increase Amount to shift the result into the lower bits of the promoted + // type. + if (IsFSHR) + Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset); + + return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount); +} + SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; @@ -1181,7 +1286,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { } // Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that -// the third operand of ADDE/SUBE nodes is carry flag, which differs from +// the third operand of ADDE/SUBE nodes is carry flag, which differs from // the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean. SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { if (ResNo == 1) @@ -1212,6 +1317,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { return SDValue(Res.getNode(), 0); } +SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N, + unsigned ResNo) { + assert(ResNo == 1 && "Don't know how to promote other results yet."); + return PromoteIntRes_Overflow(N); +} + SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) { SDValue Op0 = SExtPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0); @@ -1394,6 +1505,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::ROTL: case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + case ISD::SADDO_CARRY: + case ISD::SSUBO_CARRY: case ISD::ADDCARRY: case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break; @@ -1620,8 +1733,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT OpTy = N->getOperand(1).getValueType(); if (N->getOpcode() == ISD::VSELECT) - if (SDValue Res = WidenVSELECTAndMask(N)) - return Res; + if (SDValue Res = WidenVSELECTMask(N)) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + Res, N->getOperand(1), N->getOperand(2)); // Promote all the way up to the canonical SetCC type. EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy; @@ -1763,6 +1877,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo) { + bool TruncateStore = N->isTruncatingStore(); SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); if (OpNo == 2) { // The Mask @@ -1775,9 +1890,17 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); else NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); - } else + + N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(), + N->getMemoryVT(), NewOps[OpNo])); + } else { NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + TruncateStore = true; + } + + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(), + SDLoc(N), NewOps, N->getMemOperand(), + N->getIndexType(), TruncateStore); } SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { @@ -1921,6 +2044,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break; case ISD::CTLZ_ZERO_UNDEF: @@ -1933,6 +2057,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break; case ISD::STRICT_LLROUND: case ISD::STRICT_LLRINT: case ISD::LLROUND: @@ -2009,6 +2135,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ADDCARRY: case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break; + case ISD::SADDO_CARRY: + case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break; + case ISD::SHL: case ISD::SRA: case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; @@ -2025,6 +2154,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBSAT: case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break; + case ISD::SSHLSAT: + case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break; + case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -2044,6 +2176,16 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break; + + case ISD::ROTL: + case ISD::ROTR: + ExpandIntRes_Rotate(N, Lo, Hi); + break; + + case ISD::FSHL: + case ISD::FSHR: + ExpandIntRes_FunnelShift(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -2055,12 +2197,22 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - + AtomicOrdering order = cast<AtomicSDNode>(Node)->getOrdering(); + // Lower to outline atomic libcall if outline atomics enabled, + // or to sync libcall otherwise + RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT); EVT RetVT = Node->getValueType(0); - SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); TargetLowering::MakeLibCallOptions CallOptions; + SmallVector<SDValue, 4> Ops; + if (TLI.getLibcallName(LC)) { + Ops.append(Node->op_begin() + 2, Node->op_end()); + Ops.push_back(Node->getOperand(1)); + } else { + LC = RTLIB::getSYNC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Unexpected atomic op or value type!"); + Ops.append(Node->op_begin() + 1, Node->op_end()); + } return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node), Node->getOperand(0)); } @@ -2619,6 +2771,26 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N, ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + + // We need to use an unsigned carry op for the lo part. + unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY + : ISD::SUBCARRY; + Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) }); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) }); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -2700,6 +2872,17 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); } +void DAGTypeLegalizer::ExpandIntRes_PARITY(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + // parity(HiLo) -> parity(Lo^Hi) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + Lo = + DAG.getNode(ISD::PARITY, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, Lo, Hi)); + Hi = DAG.getConstant(0, dl, NVT); +} + void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -2717,16 +2900,38 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); + SDValue N0 = N->getOperand(0); + GetExpandedInteger(N0, Lo, Hi); + EVT NVT = Lo.getValueType(); + + // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we + // use in LegalizeDAG. The ADD part of the expansion is based on + // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that + // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded + // if needed. Shift expansion has a special case for filling with sign bits + // so that we will only end up with one SRA. + bool HasAddCarry = TLI.isOperationLegalOrCustom( + ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasAddCarry) { + EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG); + SDValue Sign = + DAG.getNode(ISD::SRA, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy)); + SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1)); + Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign); + Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign); + return; + } + // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo) EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), N0); SDValue NegLo, NegHi; SplitInteger(Neg, NegLo, NegHi); - GetExpandedInteger(N0, Lo, Hi); - EVT NVT = Lo.getValueType(); SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); @@ -2859,6 +3064,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, ReplaceValueWith(SDValue(N, 1), Tmp.second); } +void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG); + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); @@ -2929,7 +3140,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); return; } - + if (ISD::isNormalLoad(N)) { ExpandRes_NormalLoad(N, Lo, Hi); return; @@ -2983,7 +3194,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; - Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, N->getOriginalAlign(), MMOFlags, AAInfo); @@ -3007,7 +3218,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -3147,6 +3358,12 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo, SplitInteger(Result, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_SHLSAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Result = TLI.expandShlSat(N, DAG); + SplitInteger(Result, Lo, Hi); +} + /// This performs an expansion of the integer result for a fixed point /// multiplication. The default expansion performs rounding down towards /// negative infinity, though targets that do care about rounding should specify @@ -3385,40 +3602,66 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue RHS = Node->getOperand(1); SDLoc dl(Node); - // Expand the result by simply replacing it with the equivalent - // non-overflow-checking operation. - SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? - ISD::ADD : ISD::SUB, dl, LHS.getValueType(), - LHS, RHS); - SplitInteger(Sum, Lo, Hi); + SDValue Ovf; - // Compute the overflow. - // - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Sum >= 0 - // - // Add: - // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) - // Sub: - // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) - // - EVT OType = Node->getValueType(1); - SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + unsigned CarryOp; + switch(Node->getOpcode()) { + default: llvm_unreachable("Node has unexpected Opcode"); + case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break; + case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break; + } + + bool HasCarryOp = TLI.isOperationLegalOrCustom( + CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); - SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); - SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); - SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, - Node->getOpcode() == ISD::SADDO ? - ISD::SETEQ : ISD::SETNE); + if (HasCarryOp) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1)); + + Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL }); + Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) }); + + Ovf = Hi.getValue(1); + } else { + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); - SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); - SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); - SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + } // Use the calculated overflow everywhere. - ReplaceValueWith(SDValue(Node, 1), Cmp); + ReplaceValueWith(SDValue(Node, 1), Ovf); } void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, @@ -3874,6 +4117,22 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N, SplitInteger(Res, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Lower the rotate to shifts and ORs which can be expanded. + SDValue Res; + TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG); + SplitInteger(Res, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Lower the funnel shift to shifts and ORs which can be expanded. + SDValue Res; + TLI.expandFunnelShift(N, Res, DAG); + SplitInteger(Res, Lo, Hi); +} + //===----------------------------------------------------------------------===// // Integer Operand Expansion //===----------------------------------------------------------------------===// @@ -4246,7 +4505,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, N->getOriginalAlign(), MMOFlags, AAInfo); @@ -4281,7 +4540,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -4586,8 +4845,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); + + EVT ResVT = N->getValueType(0); unsigned NumElems = N->getNumOperands(); + if (ResVT.isScalableVector()) { + SDValue ResVec = DAG.getUNDEF(ResVT); + + for (unsigned OpIdx = 0; OpIdx < NumElems; ++OpIdx) { + SDValue Op = N->getOperand(OpIdx); + unsigned OpNumElts = Op.getValueType().getVectorMinNumElements(); + ResVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ResVec, Op, + DAG.getIntPtrConstant(OpIdx * OpNumElts, dl)); + } + + return ResVec; + } + EVT RetSclrTy = N->getValueType(0).getVectorElementType(); SmallVector<SDValue, 8> NewOps; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index ae087d3bbd8c..a59f03854775 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -663,8 +663,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // Process the list of nodes that need to be reanalyzed. while (!NodesToAnalyze.empty()) { - SDNode *N = NodesToAnalyze.back(); - NodesToAnalyze.pop_back(); + SDNode *N = NodesToAnalyze.pop_back_val(); if (N->getNodeId() != DAGTypeLegalizer::NewNode) // The node was analyzed while reanalyzing an earlier node - it is safe // to skip. Note that this is not a morphing node - otherwise it would @@ -753,7 +752,10 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { // Note that in some cases vector operation operands may be greater than // the vector element type. For example BUILD_VECTOR of type <1 x i1> with // a constant i8 operand. - assert(Result.getValueSizeInBits() >= Op.getScalarValueSizeInBits() && + + // We don't currently support the scalarization of scalable vector types. + assert(Result.getValueSizeInBits().getFixedSize() >= + Op.getScalarValueSizeInBits() && "Invalid type for scalarized vector"); AnalyzeNewValue(Result); @@ -955,11 +957,12 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); for (unsigned i = 0, e = Results.size(); i != e; ++i) { - // If this is a chain output just replace it. - if (Results[i].getValueType() == MVT::Other) - ReplaceValueWith(SDValue(N, i), Results[i]); - else + // If this is a chain output or already widened just replace it. + bool WasWidened = SDValue(N, i).getValueType() != Results[i].getValueType(); + if (WasWidened) SetWidenedVector(SDValue(N, i), Results[i]); + else + ReplaceValueWith(SDValue(N, i), Results[i]); } return true; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 0fa6d653a836..630a0a9adaf7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -311,10 +311,11 @@ private: SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); SDValue PromoteIntRes_CTLZ(SDNode *N); - SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N); SDValue PromoteIntRes_CTTZ(SDNode *N); SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); + SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N); SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); @@ -331,22 +332,26 @@ private: SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_UMINUMAX(SDNode *N); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); SDValue PromoteIntRes_SRA(SDNode *N); SDValue PromoteIntRes_SRL(SDNode *N); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_VSCALE(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); - SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); + SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); SDValue PromoteIntRes_DIVFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); SDValue PromoteIntRes_VECREDUCE(SDNode *N); SDValue PromoteIntRes_ABS(SDNode *N); + SDValue PromoteIntRes_Rotate(SDNode *N); + SDValue PromoteIntRes_FunnelShift(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -420,6 +425,7 @@ private: void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -427,8 +433,10 @@ private: void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -442,12 +450,16 @@ private: void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SHLSAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandShiftByConstant(SDNode *N, const APInt &Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -541,6 +553,8 @@ private: SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); + SDValue SoftenFloatRes_VECREDUCE(SDNode *N); + SDValue SoftenFloatRes_VECREDUCE_SEQ(SDNode *N); // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); @@ -549,6 +563,7 @@ private: SDValue SoftenFloatOp_BR_CC(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N); SDValue SoftenFloatOp_LROUND(SDNode *N); SDValue SoftenFloatOp_LLROUND(SDNode *N); SDValue SoftenFloatOp_LRINT(SDNode *N); @@ -617,8 +632,7 @@ private: SDValue ExpandFloatOp_BR_CC(SDNode *N); SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); SDValue ExpandFloatOp_FP_ROUND(SDNode *N); - SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); - SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_FP_TO_XINT(SDNode *N); SDValue ExpandFloatOp_LROUND(SDNode *N); SDValue ExpandFloatOp_LLROUND(SDNode *N); SDValue ExpandFloatOp_LRINT(SDNode *N); @@ -628,7 +642,8 @@ private: SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, const SDLoc &dl); + ISD::CondCode &CCCode, const SDLoc &dl, + SDValue &Chain, bool IsSignaling = false); //===--------------------------------------------------------------------===// // Float promotion support: LegalizeFloatTypes.cpp @@ -658,12 +673,15 @@ private: SDValue PromoteFloatRes_UNDEF(SDNode *N); SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N); SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); + SDValue PromoteFloatRes_VECREDUCE(SDNode *N); + SDValue PromoteFloatRes_VECREDUCE_SEQ(SDNode *N); bool PromoteFloatOperand(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); @@ -695,12 +713,15 @@ private: SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N); SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N); SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); + SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N); + SDValue SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N); bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_BITCAST(SDNode *N); SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N); SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N); + SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N); SDValue SoftPromoteHalfOp_SETCC(SDNode *N); SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); @@ -745,6 +766,7 @@ private: SDValue ScalarizeVecRes_SETCC(SDNode *N); SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + SDValue ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N); SDValue ScalarizeVecRes_FIX(SDNode *N); @@ -760,7 +782,10 @@ private: SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_FP_EXTEND(SDNode *N); + SDValue ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N); SDValue ScalarizeVecOp_VECREDUCE(SDNode *N); + SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -778,8 +803,8 @@ private: // Helper function for incrementing the pointer when splitting // memory operations - void IncrementPointer(MemSDNode *N, EVT MemVT, - MachinePointerInfo &MPI, SDValue &Ptr); + void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, + SDValue &Ptr, uint64_t *ScaledOffset = nullptr); // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned ResNo); @@ -806,20 +831,23 @@ private: void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi); - void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi); // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_VECREDUCE_SEQ(SDNode *N); SDValue SplitVecOp_UnaryOp(SDNode *N); SDValue SplitVecOp_TruncateHelper(SDNode *N); SDValue SplitVecOp_BITCAST(SDNode *N); + SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); @@ -831,6 +859,7 @@ private: SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); SDValue SplitVecOp_FCOPYSIGN(SDNode *N); + SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp @@ -862,9 +891,9 @@ private: SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); - SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_ScalarOp(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); - SDValue WidenVSELECTAndMask(SDNode *N); + SDValue WidenVSELECTMask(SDNode *N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_SETCC(SDNode* N); SDValue WidenVecRes_STRICT_FSETCC(SDNode* N); @@ -879,9 +908,9 @@ private: SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_Convert_StrictFP(SDNode *N); + SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); - SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); @@ -901,8 +930,10 @@ private: SDValue WidenVecOp_VSELECT(SDNode *N); SDValue WidenVecOp_Convert(SDNode *N); + SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecOp_FCOPYSIGN(SDNode *N); SDValue WidenVecOp_VECREDUCE(SDNode *N); + SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N); /// Helper function to generate a set of operations to perform /// a vector operation for a wider type. @@ -934,13 +965,6 @@ private: /// ST: store of a widen value void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); - /// Helper function to generate a set of stores to store a truncate widen - /// vector into non-widen memory. - /// StChain: list of chains for the stores we have generated - /// ST: store of a widen value - void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, - StoreSDNode *ST); - /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. /// When FillWithZeroes is "on" the vector will be widened with zeroes. @@ -980,8 +1004,6 @@ private: void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVSETCC(const SDNode *N); - //===--------------------------------------------------------------------===// // Generic Expansion: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 9cd3b8f76d6c..81cc2bf10d25 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -175,7 +175,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; - StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); + StackPtr = + DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, @@ -266,7 +267,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; - Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); Hi = DAG.getLoad( NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo); @@ -481,7 +482,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Hi = DAG.getStore( Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo); @@ -514,8 +515,8 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Cond = N->getOperand(0); CL = CH = Cond; if (Cond.getValueType().isVector()) { - if (SDValue Res = WidenVSELECTAndMask(N)) - std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl); + if (SDValue Res = WidenVSELECTMask(N)) + std::tie(CL, CH) = DAG.SplitVector(Res, dl); // Check if there are already splitted versions of the vector available and // use those instead of splitting the mask operand again. else if (getTypeAction(Cond.getValueType()) == diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 6409f924920d..4015a5a0ce70 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -143,7 +143,6 @@ class VectorLegalizer { void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); - SDValue ExpandStrictFPOp(SDNode *Node); void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -454,6 +453,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::SMULFIX: @@ -487,6 +490,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; + case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()); + break; } LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); @@ -794,7 +802,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { break; case ISD::ROTL: case ISD::ROTR: - if (TLI.expandROT(Node, Tmp, DAG)) { + if (TLI.expandROT(Node, false /*AllowVectorOps*/, Tmp, DAG)) { Results.push_back(Tmp); return; } @@ -806,6 +814,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::UADDO: case ISD::USUBO: ExpandUADDSUBO(Node, Results); @@ -868,6 +885,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::VECREDUCE_FMIN: Results.push_back(TLI.expandVecReduce(Node, DAG)); return; + case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + Results.push_back(TLI.expandVecReduceSeq(Node, DAG)); + return; case ISD::SREM: case ISD::UREM: ExpandREM(Node, Results); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 414ba25ffd5f..57cb364f1939 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -129,6 +129,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: case ISD::FPOW: case ISD::FREM: @@ -144,9 +146,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::SRA: case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: R = ScalarizeVecRes_BinOp(N); break; case ISD::FMA: + case ISD::FSHL: + case ISD::FSHR: R = ScalarizeVecRes_TernaryOp(N); break; @@ -156,6 +162,11 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_StrictFPOp(N); break; + case ISD::FP_TO_UINT_SAT: + case ISD::FP_TO_SINT_SAT: + R = ScalarizeVecRes_FP_TO_XINT_SAT(N); + break; + case ISD::UADDO: case ISD::SADDO: case ISD::USUBO: @@ -510,6 +521,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { return GetScalarizedVector(N->getOperand(Op)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N) { + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + SDLoc dl(N); + + // Handle case where result is scalarized but operand is not + if (getTypeAction(SrcVT) == TargetLowering::TypeScalarizeVector) + Src = GetScalarizedVector(Src); + else + Src = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, SrcVT.getVectorElementType(), Src, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + EVT DstVT = N->getValueType(0).getVectorElementType(); + return DAG.getNode(N->getOpcode(), dl, DstVT, Src, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -552,72 +580,80 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"); SDValue Res = SDValue(); - if (!Res.getNode()) { - switch (N->getOpcode()) { - default: + switch (N->getOpcode()) { + default: #ifndef NDEBUG - dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"; + dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; #endif - report_fatal_error("Do not know how to scalarize this operator's " - "operand!\n"); - case ISD::BITCAST: - Res = ScalarizeVecOp_BITCAST(N); - break; - case ISD::ANY_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::TRUNCATE: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - Res = ScalarizeVecOp_UnaryOp(N); - break; - case ISD::STRICT_SINT_TO_FP: - case ISD::STRICT_UINT_TO_FP: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - Res = ScalarizeVecOp_UnaryOp_StrictFP(N); - break; - case ISD::CONCAT_VECTORS: - Res = ScalarizeVecOp_CONCAT_VECTORS(N); - break; - case ISD::EXTRACT_VECTOR_ELT: - Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); - break; - case ISD::VSELECT: - Res = ScalarizeVecOp_VSELECT(N); - break; - case ISD::SETCC: - Res = ScalarizeVecOp_VSETCC(N); - break; - case ISD::STORE: - Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); - break; - case ISD::STRICT_FP_ROUND: - Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo); - break; - case ISD::FP_ROUND: - Res = ScalarizeVecOp_FP_ROUND(N, OpNo); - break; - case ISD::VECREDUCE_FADD: - case ISD::VECREDUCE_FMUL: - case ISD::VECREDUCE_ADD: - case ISD::VECREDUCE_MUL: - case ISD::VECREDUCE_AND: - case ISD::VECREDUCE_OR: - case ISD::VECREDUCE_XOR: - case ISD::VECREDUCE_SMAX: - case ISD::VECREDUCE_SMIN: - case ISD::VECREDUCE_UMAX: - case ISD::VECREDUCE_UMIN: - case ISD::VECREDUCE_FMAX: - case ISD::VECREDUCE_FMIN: - Res = ScalarizeVecOp_VECREDUCE(N); - break; - } + report_fatal_error("Do not know how to scalarize this operator's " + "operand!\n"); + case ISD::BITCAST: + Res = ScalarizeVecOp_BITCAST(N); + break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::TRUNCATE: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Res = ScalarizeVecOp_UnaryOp(N); + break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + Res = ScalarizeVecOp_UnaryOp_StrictFP(N); + break; + case ISD::CONCAT_VECTORS: + Res = ScalarizeVecOp_CONCAT_VECTORS(N); + break; + case ISD::EXTRACT_VECTOR_ELT: + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); + break; + case ISD::VSELECT: + Res = ScalarizeVecOp_VSELECT(N); + break; + case ISD::SETCC: + Res = ScalarizeVecOp_VSETCC(N); + break; + case ISD::STORE: + Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + case ISD::STRICT_FP_ROUND: + Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo); + break; + case ISD::FP_ROUND: + Res = ScalarizeVecOp_FP_ROUND(N, OpNo); + break; + case ISD::STRICT_FP_EXTEND: + Res = ScalarizeVecOp_STRICT_FP_EXTEND(N); + break; + case ISD::FP_EXTEND: + Res = ScalarizeVecOp_FP_EXTEND(N); + break; + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Res = ScalarizeVecOp_VECREDUCE(N); + break; + case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + Res = ScalarizeVecOp_VECREDUCE_SEQ(N); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -762,6 +798,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ /// If the value to round is a vector that needs to be scalarized, it must be /// <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Wrong operand for scalarization!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), N->getValueType(0).getVectorElementType(), Elt, @@ -787,7 +824,36 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, // handled all replacements since caller can only handle a single result. ReplaceValueWith(SDValue(N, 0), Res); return SDValue(); -} +} + +/// If the value to extend is a vector that needs to be scalarized, it must be +/// <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_EXTEND(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Res = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + N->getValueType(0).getVectorElementType(), Elt); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + +/// If the value to extend is a vector that needs to be scalarized, it must be +/// <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = + DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), + {N->getValueType(0).getVectorElementType(), MVT::Other}, + {N->getOperand(0), Elt}); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); + + // Do our own replacement and return SDValue() to tell the caller that we + // handled all replacements since caller can only handle a single result. + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); +} SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) { SDValue Res = GetScalarizedVector(N->getOperand(0)); @@ -797,6 +863,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) { return Res; } +SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) { + SDValue AccOp = N->getOperand(0); + SDValue VecOp = N->getOperand(1); + + unsigned BaseOpc = ISD::getVecReduceBaseOpcode(N->getOpcode()); + + SDValue Op = GetScalarizedVector(VecOp); + return DAG.getNode(BaseOpc, SDLoc(N), N->getValueType(0), + AccOp, Op, N->getFlags()); +} + //===----------------------------------------------------------------------===// // Result Vector Splitting //===----------------------------------------------------------------------===// @@ -836,7 +913,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; - case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::SPLAT_VECTOR: + case ISD::SCALAR_TO_VECTOR: + SplitVecRes_ScalarOp(N, Lo, Hi); + break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); @@ -939,9 +1019,15 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: + case ISD::ROTL: + case ISD::ROTR: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: + case ISD::FSHL: + case ISD::FSHR: SplitVecRes_TernaryOp(N, Lo, Hi); break; @@ -951,6 +1037,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_StrictFPOp(N, Lo, Hi); break; + case ISD::FP_TO_UINT_SAT: + case ISD::FP_TO_SINT_SAT: + SplitVecRes_FP_TO_XINT_SAT(N, Lo, Hi); + break; + case ISD::UADDO: case ISD::SADDO: case ISD::USUBO: @@ -977,21 +1068,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { } void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, - MachinePointerInfo &MPI, - SDValue &Ptr) { + MachinePointerInfo &MPI, SDValue &Ptr, + uint64_t *ScaledOffset) { SDLoc DL(N); unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8; if (MemVT.isScalableVector()) { + SDNodeFlags Flags; SDValue BytesIncrement = DAG.getVScale( DL, Ptr.getValueType(), APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize)); MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement); + Flags.setNoUnsignedWrap(true); + if (ScaledOffset) + *ScaledOffset += IncrementSize; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement, + Flags); } else { MPI = N->getPointerInfo().getWithOffset(IncrementSize); // Increment the pointer to the other half. - Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); + Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::Fixed(IncrementSize)); } } @@ -1200,7 +1296,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueSizeInBits() / 8; - StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); + StackPtr = + DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, @@ -1448,14 +1545,16 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { unsigned IdxVal = CIdx->getZExtValue(); - unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); - if (IdxVal < LoNumElts) + unsigned LoNumElts = Lo.getValueType().getVectorMinNumElements(); + if (IdxVal < LoNumElts) { Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Lo.getValueType(), Lo, Elt, Idx); - else + return; + } else if (!Vec.getValueType().isScalableVector()) { Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl)); - return; + return; + } } // See if the target wants to custom expand this node. @@ -1468,7 +1567,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, if (VecVT.getScalarSizeInBits() < 8) { EltVT = MVT::i8; VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - VecVT.getVectorNumElements()); + VecVT.getVectorElementCount()); Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); // Extend the element type to match if needed. if (EltVT.bitsGT(Elt.getValueType())) @@ -1493,7 +1592,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Store = DAG.getTruncStore( Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT, - commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8)); + commonAlignment(SmallestAlign, + EltVT.getFixedSizeInBits() / 8)); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); @@ -1502,12 +1602,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign); // Increment the pointer to the other part. - unsigned IncrementSize = LoVT.getSizeInBits() / 8; - StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); + auto Load = cast<LoadSDNode>(Lo); + MachinePointerInfo MPI = Load->getPointerInfo(); + IncrementPointer(Load, LoVT, MPI, StackPtr); - // Load the Hi part from the stack slot. - Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), SmallestAlign); + Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, MPI, SmallestAlign); // If we adjusted the original type, we need to truncate the results. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); @@ -1517,13 +1616,18 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } -void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); - Hi = DAG.getUNDEF(HiVT); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0)); + if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { + Hi = DAG.getUNDEF(HiVT); + } else { + assert(N->getOpcode() == ISD::SPLAT_VECTOR && "Unexpected opcode"); + Hi = Lo; + } } void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, @@ -1611,9 +1715,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, else std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); + unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); + MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment, + MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -1627,12 +1732,18 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, // Generate hi masked load. Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); - unsigned HiOffset = LoMemVT.getStoreSize(); + unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); + + MachinePointerInfo MPI; + if (LoMemVT.isScalableVector()) + MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace()); + else + MPI = MLD->getPointerInfo().getWithOffset( + LoMemVT.getStoreSize().getFixedSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, - MLD->getAAInfo(), MLD->getRanges()); + MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(), + MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -1662,7 +1773,9 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue PassThru = MGT->getPassThru(); SDValue Index = MGT->getIndex(); SDValue Scale = MGT->getScale(); + EVT MemoryVT = MGT->getMemoryVT(); Align Alignment = MGT->getOriginalAlign(); + ISD::LoadExtType ExtType = MGT->getExtensionType(); // Split Mask operand SDValue MaskLo, MaskHi; @@ -1675,6 +1788,10 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); } + EVT LoMemVT, HiMemVT; + // Split MemoryVT + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + SDValue PassThruLo, PassThruHi; if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(PassThru, PassThruLo, PassThruHi); @@ -1693,12 +1810,12 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, MGT->getRanges()); SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; - Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, - MMO, MGT->getIndexType()); + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo, + MMO, MGT->getIndexType(), ExtType); SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; - Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, - MMO, MGT->getIndexType()); + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi, + MMO, MGT->getIndexType(), ExtType); // Build a factor node to remember that this load is independent of the // other one. @@ -1786,8 +1903,8 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, // more effectively move in the right direction and prevent falling down // to scalarization in many cases due to the input vector being split too // far. - if ((SrcVT.getVectorMinNumElements() & 1) == 0 && - SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { + if (SrcVT.getVectorElementCount().isKnownEven() && + SrcVT.getScalarSizeInBits() * 2 < DestVT.getScalarSizeInBits()) { LLVMContext &Ctx = *DAG.getContext(); EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx); EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx); @@ -1942,6 +2059,22 @@ void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { ReplaceValueWith(SDValue(N, 1), Chain); } +void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT DstVTLo, DstVTHi; + std::tie(DstVTLo, DstVTHi) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDLoc dl(N); + + SDValue SrcLo, SrcHi; + EVT SrcVT = N->getOperand(0).getValueType(); + if (getTypeAction(SrcVT) == TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(0), SrcLo, SrcHi); + else + std::tie(SrcLo, SrcHi) = DAG.SplitVectorOperand(N, 0); + + Lo = DAG.getNode(N->getOpcode(), dl, DstVTLo, SrcLo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1)); +} //===----------------------------------------------------------------------===// // Operand Vector Splitting @@ -1959,92 +2092,95 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) return false; - if (!Res.getNode()) { - switch (N->getOpcode()) { - default: + switch (N->getOpcode()) { + default: #ifndef NDEBUG - dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"; + dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; #endif - report_fatal_error("Do not know how to split this operator's " - "operand!\n"); - - case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; - case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; - case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; - case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; - case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; - case ISD::TRUNCATE: + report_fatal_error("Do not know how to split this operator's " + "operand!\n"); + + case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; + case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; + case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::TRUNCATE: + Res = SplitVecOp_TruncateHelper(N); + break; + case ISD::STRICT_FP_ROUND: + case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; + case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; + case ISD::STORE: + Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + case ISD::MSTORE: + Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); + break; + case ISD::MSCATTER: + Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo); + break; + case ISD::MGATHER: + Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo); + break; + case ISD::VSELECT: + Res = SplitVecOp_VSELECT(N, OpNo); + break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + if (N->getValueType(0).bitsLT( + N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType())) Res = SplitVecOp_TruncateHelper(N); - break; - case ISD::STRICT_FP_ROUND: - case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; - case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; - case ISD::STORE: - Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); - break; - case ISD::MSTORE: - Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); - break; - case ISD::MSCATTER: - Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo); - break; - case ISD::MGATHER: - Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo); - break; - case ISD::VSELECT: - Res = SplitVecOp_VSELECT(N, OpNo); - break; - case ISD::STRICT_SINT_TO_FP: - case ISD::STRICT_UINT_TO_FP: - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - if (N->getValueType(0).bitsLT( - N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType())) - Res = SplitVecOp_TruncateHelper(N); - else - Res = SplitVecOp_UnaryOp(N); - break; - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - case ISD::CTTZ: - case ISD::CTLZ: - case ISD::CTPOP: - case ISD::STRICT_FP_EXTEND: - case ISD::FP_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - case ISD::FTRUNC: - case ISD::FCANONICALIZE: + else Res = SplitVecOp_UnaryOp(N); - break; + break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = SplitVecOp_FP_TO_XINT_SAT(N); + break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_EXTEND: + case ISD::FP_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::FTRUNC: + Res = SplitVecOp_UnaryOp(N); + break; - case ISD::ANY_EXTEND_VECTOR_INREG: - case ISD::SIGN_EXTEND_VECTOR_INREG: - case ISD::ZERO_EXTEND_VECTOR_INREG: - Res = SplitVecOp_ExtVecInRegOp(N); - break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + Res = SplitVecOp_ExtVecInRegOp(N); + break; - case ISD::VECREDUCE_FADD: - case ISD::VECREDUCE_FMUL: - case ISD::VECREDUCE_ADD: - case ISD::VECREDUCE_MUL: - case ISD::VECREDUCE_AND: - case ISD::VECREDUCE_OR: - case ISD::VECREDUCE_XOR: - case ISD::VECREDUCE_SMAX: - case ISD::VECREDUCE_SMIN: - case ISD::VECREDUCE_UMAX: - case ISD::VECREDUCE_UMIN: - case ISD::VECREDUCE_FMAX: - case ISD::VECREDUCE_FMIN: - Res = SplitVecOp_VECREDUCE(N, OpNo); - break; - } + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Res = SplitVecOp_VECREDUCE(N, OpNo); + break; + case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + Res = SplitVecOp_VECREDUCE_SEQ(N); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2112,36 +2248,35 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { EVT LoOpVT, HiOpVT; std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT); - bool NoNaN = N->getFlags().hasNoNaNs(); - unsigned CombineOpc = 0; - switch (N->getOpcode()) { - case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break; - case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break; - case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break; - case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break; - case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break; - case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break; - case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break; - case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break; - case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break; - case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break; - case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break; - case ISD::VECREDUCE_FMAX: - CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM; - break; - case ISD::VECREDUCE_FMIN: - CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM; - break; - default: - llvm_unreachable("Unexpected reduce ISD node"); - } - // Use the appropriate scalar instruction on the split subvectors before // reducing the now partially reduced smaller vector. + unsigned CombineOpc = ISD::getVecReduceBaseOpcode(N->getOpcode()); SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags()); return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags()); } +SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) { + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc dl(N); + + SDValue AccOp = N->getOperand(0); + SDValue VecOp = N->getOperand(1); + SDNodeFlags Flags = N->getFlags(); + + EVT VecVT = VecOp.getValueType(); + assert(VecVT.isVector() && "Can only split reduce vector operand"); + GetSplitVector(VecOp, Lo, Hi); + EVT LoOpVT, HiOpVT; + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT); + + // Reduce low half. + SDValue Partial = DAG.getNode(N->getOpcode(), dl, ResVT, AccOp, Lo, Flags); + + // Reduce high half, using low half result as initial value. + return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags); +} + SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); @@ -2151,7 +2286,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), - InVT.getVectorNumElements()); + InVT.getVectorElementCount()); if (N->isStrictFPOpcode()) { Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, @@ -2191,19 +2326,52 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { JoinIntegers(Lo, Hi)); } +SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Invalid OpNo; can only split SubVec."); + // We know that the result type is legal. + EVT ResVT = N->getValueType(0); + + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + + SDValue Lo, Hi; + GetSplitVector(SubVec, Lo, Hi); + + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); + + SDValue FirstInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx); + SDValue SecondInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi, + DAG.getVectorIdxConstant(IdxVal + LoElts, dl)); + + return SecondInsertion; +} + SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); SDLoc dl(N); SDValue Lo, Hi; + + if (SubVT.isScalableVector() != + N->getOperand(0).getValueType().isScalableVector()) + report_fatal_error("Extracting a fixed-length vector from an illegal " + "scalable vector is not yet supported"); + GetSplitVector(N->getOperand(0), Lo, Hi); - uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); if (IdxVal < LoElts) { - assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && + assert(IdxVal + SubVT.getVectorMinNumElements() <= LoElts && "Extracted subvector crosses vector split!"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); } else { @@ -2223,13 +2391,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Lo, Hi; GetSplitVector(Vec, Lo, Hi); - uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); if (IdxVal < LoElts) return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); - return SDValue(DAG.UpdateNodeOperands(N, Hi, - DAG.getConstant(IdxVal - LoElts, SDLoc(N), - Idx.getValueType())), 0); + else if (!Vec.getValueType().isScalableVector()) + return SDValue(DAG.UpdateNodeOperands(N, Hi, + DAG.getConstant(IdxVal - LoElts, SDLoc(N), + Idx.getValueType())), 0); } // See if the target wants to custom expand this node. @@ -2242,7 +2411,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { if (VecVT.getScalarSizeInBits() < 8) { EltVT = MVT::i8; VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - VecVT.getVectorNumElements()); + VecVT.getVectorElementCount()); Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); } @@ -2272,7 +2441,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT, - commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8)); + commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8)); } SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { @@ -2298,6 +2467,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Mask = MGT->getMask(); SDValue PassThru = MGT->getPassThru(); Align Alignment = MGT->getOriginalAlign(); + ISD::LoadExtType ExtType = MGT->getExtensionType(); SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) @@ -2328,12 +2498,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, MGT->getRanges()); SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; - SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, - OpsLo, MMO, MGT->getIndexType()); + SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, + OpsLo, MMO, MGT->getIndexType(), ExtType); SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; - SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, - OpsHi, MMO, MGT->getIndexType()); + SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, + OpsHi, MMO, MGT->getIndexType(), ExtType); // Build a factor node to remember that this load is independent of the // other one. @@ -2387,9 +2557,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); SDValue Lo, Hi, Res; + unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); + N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment, + N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -2403,11 +2574,20 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, N->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); + MachinePointerInfo MPI; + if (LoMemVT.isScalableVector()) { + Alignment = commonAlignment( + Alignment, LoMemVT.getSizeInBits().getKnownMinSize() / 8); + MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); + } else + MPI = N->getPointerInfo().getWithOffset( + LoMemVT.getStoreSize().getFixedSize()); + + unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, - HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); + MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(), + N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -2429,11 +2609,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue Index = N->getIndex(); SDValue Scale = N->getScale(); SDValue Data = N->getValue(); + EVT MemoryVT = N->getMemoryVT(); Align Alignment = N->getOriginalAlign(); SDLoc DL(N); // Split all operands + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + SDValue DataLo, DataHi; if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) // Split Data operand @@ -2464,15 +2648,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; - Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), - DL, OpsLo, MMO, N->getIndexType()); + Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, + DL, OpsLo, MMO, N->getIndexType(), + N->isTruncatingStore()); // The order of the Scatter operation after split is well defined. The "Hi" // part comes after the "Lo". So these two operations should be chained one // after another. SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO, N->getIndexType()); + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, + DL, OpsHi, MMO, N->getIndexType(), + N->isTruncatingStore()); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2559,13 +2745,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { SDValue InVec = N->getOperand(OpNo); EVT InVT = InVec->getValueType(0); EVT OutVT = N->getValueType(0); - unsigned NumElements = OutVT.getVectorNumElements(); + ElementCount NumElements = OutVT.getVectorElementCount(); bool IsFloat = OutVT.isFloatingPoint(); - // Widening should have already made sure this is a power-two vector - // if we're trying to split it at all. assert() that's true, just in case. - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - unsigned InElementSize = InVT.getScalarSizeInBits(); unsigned OutElementSize = OutVT.getScalarSizeInBits(); @@ -2595,11 +2777,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { GetSplitVector(InVec, InLoVec, InHiVec); // Truncate them to 1/2 the element size. + // + // This assumes the number of elements is a power of two; any vector that + // isn't should be widened, not split. EVT HalfElementVT = IsFloat ? EVT::getFloatingPointVT(InElementSize/2) : EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, - NumElements/2); + NumElements.divideCoefficientBy(2)); SDValue HalfLo; SDValue HalfHi; @@ -2678,7 +2863,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), - InVT.getVectorNumElements()); + InVT.getVectorElementCount()); if (N->isStrictFPOpcode()) { Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, @@ -2704,6 +2889,22 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); } +SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) { + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc dl(N); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT NewResVT = + EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorElementCount()); + + Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, Hi, N->getOperand(1)); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} //===----------------------------------------------------------------------===// // Result Vector Widening @@ -2734,7 +2935,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; - case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + case ISD::SCALAR_TO_VECTOR: + Res = WidenVecRes_ScalarOp(N); + break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::VSELECT: case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; @@ -2759,6 +2963,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::OR: case ISD::SUB: case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINIMUM: @@ -2771,6 +2978,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SADDSAT: case ISD::USUBSAT: case ISD::SSUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: + case ISD::ROTL: + case ISD::ROTR: Res = WidenVecRes_Binary(N); break; @@ -2819,12 +3030,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_POWI(N); break; - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - Res = WidenVecRes_Shift(N); - break; - case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: @@ -2844,6 +3049,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = WidenVecRes_FP_TO_XINT_SAT(N); + break; + case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -2891,6 +3101,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Unary(N); break; case ISD::FMA: + case ISD::FSHL: + case ISD::FSHR: Res = WidenVecRes_Ternary(N); break; } @@ -3256,19 +3468,34 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + LLVMContext &Ctx = *DAG.getContext(); SDValue InOp = N->getOperand(0); SDLoc DL(N); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); - EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); unsigned Opcode = N->getOpcode(); - unsigned InVTNumElts = InVT.getVectorNumElements(); const SDNodeFlags Flags = N->getFlags(); + + // Handle the case of ZERO_EXTEND where the promoted InVT element size does + // not equal that of WidenVT. + if (N->getOpcode() == ISD::ZERO_EXTEND && + getTypeAction(InVT) == TargetLowering::TypePromoteInteger && + TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() != + WidenVT.getScalarSizeInBits()) { + InOp = ZExtPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits()) + Opcode = ISD::TRUNCATE; + } + + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts); + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); @@ -3336,6 +3563,27 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getBuildVector(WidenVT, DL, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) { + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + ElementCount WidenNumElts = WidenVT.getVectorElementCount(); + + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + + // Also widen the input. + if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) { + Src = GetWidenedVector(Src); + SrcVT = Src.getValueType(); + } + + // Input and output not widened to the same size, give up. + if (WidenNumElts != SrcVT.getVectorElementCount()) + return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue()); + + return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { SDValue InOp = N->getOperand(1); SDLoc DL(N); @@ -3442,25 +3690,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); } -SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp = GetWidenedVector(N->getOperand(0)); - SDValue ShOp = N->getOperand(1); - - EVT ShVT = ShOp.getValueType(); - if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) { - ShOp = GetWidenedVector(ShOp); - ShVT = ShOp.getValueType(); - } - EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), - ShVT.getVectorElementType(), - WidenVT.getVectorNumElements()); - if (ShVT != ShWidenVT) - ShOp = ModifyToType(ShOp, ShWidenVT); - - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); -} - SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -3605,16 +3834,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { EVT InVT = N->getOperand(0).getValueType(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - unsigned NumInElts = InVT.getVectorNumElements(); unsigned NumOperands = N->getNumOperands(); bool InputWidened = false; // Indicates we need to widen the input. if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { - if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { + unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); + unsigned NumInElts = InVT.getVectorMinNumElements(); + if (WidenNumElts % NumInElts == 0) { // Add undef vectors to widen to correct length. - unsigned NumConcat = WidenVT.getVectorNumElements() / - InVT.getVectorNumElements(); + unsigned NumConcat = WidenNumElts / NumInElts; SDValue UndefVal = DAG.getUNDEF(InVT); SmallVector<SDValue, 16> Ops(NumConcat); for (unsigned i=0; i < NumOperands; ++i) @@ -3638,6 +3866,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { return GetWidenedVector(N->getOperand(0)); if (NumOperands == 2) { + assert(!WidenVT.isScalableVector() && + "Cannot use vector shuffles to widen CONCAT_VECTOR result"); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumInElts = InVT.getVectorNumElements(); + // Replace concat of two operands with a shuffle. SmallVector<int, 16> MaskOps(WidenNumElts, -1); for (unsigned i = 0; i < NumInElts; ++i) { @@ -3652,6 +3885,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { } } + assert(!WidenVT.isScalableVector() && + "Cannot use build vectors to widen CONCAT_VECTOR result"); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumInElts = InVT.getVectorNumElements(); + // Fall back to use extracts and build vector. EVT EltVT = WidenVT.getVectorElementType(); SmallVector<SDValue, 16> Ops(WidenNumElts); @@ -3806,9 +4044,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Index = ModifyToType(Index, WideIndexVT); SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, Scale }; + + // Widen the MemoryType + EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(), + N->getMemoryVT().getScalarType(), NumElts); SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), - N->getMemoryVT(), dl, Ops, - N->getMemOperand(), N->getIndexType()); + WideMemVT, dl, Ops, N->getMemOperand(), + N->getIndexType(), N->getExtensionType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -3816,10 +4058,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { return Res; } -SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), - WidenVT, N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0)); } // Return true is this is a SETCC node or a strict version of it. @@ -3939,11 +4180,11 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, return Mask; } -// This method tries to handle VSELECT and its mask by legalizing operands -// (which may require widening) and if needed adjusting the mask vector type -// to match that of the VSELECT. Without it, many cases end up with -// scalarization of the SETCC, with many unnecessary instructions. -SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { +// This method tries to handle some special cases for the vselect mask +// and if needed adjusting the mask vector type to match that of the VSELECT. +// Without it, many cases end up with scalarization of the SETCC, with many +// unnecessary instructions. +SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) { LLVMContext &Ctx = *DAG.getContext(); SDValue Cond = N->getOperand(0); @@ -3990,14 +4231,9 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { return SDValue(); } - // Get the VT and operands for VSELECT, and widen if needed. - SDValue VSelOp1 = N->getOperand(1); - SDValue VSelOp2 = N->getOperand(2); - if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) { + // Widen the vselect result type if needed. + if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT); - VSelOp1 = GetWidenedVector(VSelOp1); - VSelOp2 = GetWidenedVector(VSelOp2); - } // The mask of the VSELECT should have integer elements. EVT ToMaskVT = VSelVT; @@ -4046,7 +4282,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { } else return SDValue(); - return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2); + return Mask; } SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { @@ -4056,8 +4292,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { SDValue Cond1 = N->getOperand(0); EVT CondVT = Cond1.getValueType(); if (CondVT.isVector()) { - if (SDValue Res = WidenVSELECTAndMask(N)) - return Res; + if (SDValue WideCond = WidenVSELECTMask(N)) { + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(N->getOpcode(), SDLoc(N), + WidenVT, WideCond, InOp1, InOp2); + } EVT CondEltVT = CondVT.getVectorElementType(); EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), @@ -4264,6 +4505,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { Res = WidenVecOp_Convert(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = WidenVecOp_FP_TO_XINT_SAT(N); + break; + case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -4279,6 +4525,10 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VECREDUCE_FMIN: Res = WidenVecOp_VECREDUCE(N); break; + case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + Res = WidenVecOp_VECREDUCE_SEQ(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -4433,6 +4683,28 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { return DAG.getBuildVector(VT, dl, Ops); } +SDValue DAGTypeLegalizer::WidenVecOp_FP_TO_XINT_SAT(SDNode *N) { + EVT DstVT = N->getValueType(0); + SDValue Src = GetWidenedVector(N->getOperand(0)); + EVT SrcVT = Src.getValueType(); + ElementCount WideNumElts = SrcVT.getVectorElementCount(); + SDLoc dl(N); + + // See if a widened result type would be legal, if so widen the node. + EVT WideDstVT = EVT::getVectorVT(*DAG.getContext(), + DstVT.getVectorElementType(), WideNumElts); + if (TLI.isTypeLegal(WideDstVT)) { + SDValue Res = + DAG.getNode(N->getOpcode(), dl, WideDstVT, Src, N->getOperand(1)); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + // Give up and unroll. + return DAG.UnrollVectorOp(N); +} + SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -4533,11 +4805,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { if (!ST->getMemoryVT().getScalarType().isByteSized()) return TLI.scalarizeVectorStore(ST, DAG); - SmallVector<SDValue, 16> StChain; if (ST->isTruncatingStore()) - GenWidenVectorTruncStores(StChain, ST); - else - GenWidenVectorStores(StChain, ST); + return TLI.scalarizeVectorStore(ST, DAG); + + SmallVector<SDValue, 16> StChain; + GenWidenVectorStores(StChain, ST); if (StChain.size() == 1) return StChain[0]; @@ -4599,7 +4871,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) { SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index, Scale}; SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops, - MG->getMemOperand(), MG->getIndexType()); + MG->getMemOperand(), MG->getIndexType(), + MG->getExtensionType()); ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); return SDValue(); @@ -4611,6 +4884,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { SDValue Mask = MSC->getMask(); SDValue Index = MSC->getIndex(); SDValue Scale = MSC->getScale(); + EVT WideMemVT = MSC->getMemoryVT(); if (OpNo == 1) { DataOp = GetWidenedVector(DataOp); @@ -4627,6 +4901,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(), NumElts); Mask = ModifyToType(Mask, WideMaskVT, true); + + // Widen the MemoryType + WideMemVT = EVT::getVectorVT(*DAG.getContext(), + MSC->getMemoryVT().getScalarType(), NumElts); } else if (OpNo == 4) { // Just widen the index. It's allowed to have extra elements. Index = GetWidenedVector(Index); @@ -4635,9 +4913,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index, Scale}; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), - MSC->getMemoryVT(), SDLoc(N), Ops, - MSC->getMemOperand(), MSC->getIndexType()); + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), + Ops, MSC->getMemOperand(), MSC->getIndexType(), + MSC->isTruncatingStore()); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { @@ -4716,45 +4994,37 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { EVT OrigVT = N->getOperand(0).getValueType(); EVT WideVT = Op.getValueType(); EVT ElemVT = OrigVT.getVectorElementType(); + SDNodeFlags Flags = N->getFlags(); - SDValue NeutralElem; - switch (N->getOpcode()) { - case ISD::VECREDUCE_ADD: - case ISD::VECREDUCE_OR: - case ISD::VECREDUCE_XOR: - case ISD::VECREDUCE_UMAX: - NeutralElem = DAG.getConstant(0, dl, ElemVT); - break; - case ISD::VECREDUCE_MUL: - NeutralElem = DAG.getConstant(1, dl, ElemVT); - break; - case ISD::VECREDUCE_AND: - case ISD::VECREDUCE_UMIN: - NeutralElem = DAG.getAllOnesConstant(dl, ElemVT); - break; - case ISD::VECREDUCE_SMAX: - NeutralElem = DAG.getConstant( - APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT); - break; - case ISD::VECREDUCE_SMIN: - NeutralElem = DAG.getConstant( - APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT); - break; - case ISD::VECREDUCE_FADD: - NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT); - break; - case ISD::VECREDUCE_FMUL: - NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT); - break; - case ISD::VECREDUCE_FMAX: - NeutralElem = DAG.getConstantFP( - -std::numeric_limits<double>::infinity(), dl, ElemVT); - break; - case ISD::VECREDUCE_FMIN: - NeutralElem = DAG.getConstantFP( - std::numeric_limits<double>::infinity(), dl, ElemVT); - break; - } + unsigned Opc = N->getOpcode(); + unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc); + SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags); + assert(NeutralElem && "Neutral element must exist"); + + // Pad the vector with the neutral element. + unsigned OrigElts = OrigVT.getVectorNumElements(); + unsigned WideElts = WideVT.getVectorNumElements(); + for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) + Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, + DAG.getVectorIdxConstant(Idx, dl)); + + return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags); +} + +SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) { + SDLoc dl(N); + SDValue AccOp = N->getOperand(0); + SDValue VecOp = N->getOperand(1); + SDValue Op = GetWidenedVector(VecOp); + + EVT OrigVT = VecOp.getValueType(); + EVT WideVT = Op.getValueType(); + EVT ElemVT = OrigVT.getVectorElementType(); + SDNodeFlags Flags = N->getFlags(); + + unsigned Opc = N->getOpcode(); + unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc); + SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags); // Pad the vector with the neutral element. unsigned OrigElts = OrigVT.getVectorNumElements(); @@ -4763,7 +5033,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, DAG.getVectorIdxConstant(Idx, dl)); - return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags()); + return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags); } SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { @@ -4806,7 +5076,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, // If we have one element to load/store, return it. EVT RetVT = WidenEltVT; - if (Width == WidenEltWidth) + if (!Scalable && Width == WidenEltWidth) return RetVT; // See if there is larger legal integer than the element type to load/store. @@ -4852,11 +5122,14 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { - if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) + if (RetVT.getFixedSizeInBits() < MemVTWidth || MemVT == WidenVT) return MemVT; } } + if (Scalable) + report_fatal_error("Using element-wise loads and stores for widening " + "operations is not supported for scalable vectors"); return RetVT; } @@ -4899,10 +5172,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, // element type or scalar loads and then recombines it to the widen vector // type. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); - unsigned WidenWidth = WidenVT.getSizeInBits(); EVT LdVT = LD->getMemoryVT(); SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); // Load information @@ -4911,22 +5184,25 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); - int LdWidth = LdVT.getSizeInBits(); - int WidthDiff = WidenWidth - LdWidth; - // Allow wider loads. + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + // Allow wider loads if they are sufficiently aligned to avoid memory faults + // and if the original load is simple. unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment(); // Find the vector type that can load from. - EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); - int NewVTWidth = NewVT.getSizeInBits(); + EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, + WidthDiff.getKnownMinSize()); + TypeSize NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. - if (LdWidth <= NewVTWidth) { + if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) { if (!NewVT.isVector()) { - unsigned NumElts = WidenWidth / NewVTWidth; + unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); @@ -4934,8 +5210,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, if (NewVT == WidenVT) return LdOp; - assert(WidenWidth % NewVTWidth == 0); - unsigned NumConcat = WidenWidth / NewVTWidth; + // TODO: We don't currently have any tests that exercise this code path. + assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0); + unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); SmallVector<SDValue, 16> ConcatOps(NumConcat); SDValue UndefVal = DAG.getUNDEF(NewVT); ConcatOps[0] = LdOp; @@ -4948,48 +5225,30 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, SmallVector<SDValue, 16> LdOps; LdOps.push_back(LdOp); - LdWidth -= NewVTWidth; - unsigned Offset = 0; - - while (LdWidth > 0) { - unsigned Increment = NewVTWidth / 8; - Offset += Increment; - BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); + uint64_t ScaledOffset = 0; + MachinePointerInfo MPI = LD->getPointerInfo(); + do { + LdWidth -= NewVTWidth; + IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr, + &ScaledOffset); - SDValue L; - if (LdWidth < NewVTWidth) { + if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) { // The current type we are using is too large. Find a better size. - NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, + WidthDiff.getKnownMinSize()); NewVTWidth = NewVT.getSizeInBits(); - L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), - LD->getOriginalAlign(), MMOFlags, AAInfo); - LdChain.push_back(L.getValue(1)); - if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { - // Later code assumes the vector loads produced will be mergeable, so we - // must pad the final entry up to the previous width. Scalars are - // combined separately. - SmallVector<SDValue, 16> Loads; - Loads.push_back(L); - unsigned size = L->getValueSizeInBits(0); - while (size < LdOp->getValueSizeInBits(0)) { - Loads.push_back(DAG.getUNDEF(L->getValueType(0))); - size += L->getValueSizeInBits(0); - } - L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads); - } - } else { - L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), - LD->getOriginalAlign(), MMOFlags, AAInfo); - LdChain.push_back(L.getValue(1)); } + Align NewAlign = ScaledOffset == 0 + ? LD->getOriginalAlign() + : commonAlignment(LD->getAlign(), ScaledOffset); + SDValue L = + DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); + LdChain.push_back(L.getValue(1)); + LdOps.push_back(L); LdOp = L; - - LdWidth -= NewVTWidth; - } + } while (TypeSize::isKnownGT(LdWidth, NewVTWidth)); // Build the vector from the load operations. unsigned End = LdOps.size(); @@ -5013,13 +5272,27 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End); } + ConcatOps[--Idx] = LdOps[i]; for (--i; i >= 0; --i) { EVT NewLdTy = LdOps[i].getValueType(); if (NewLdTy != LdTy) { // Create a larger vector. + TypeSize LdTySize = LdTy.getSizeInBits(); + TypeSize NewLdTySize = NewLdTy.getSizeInBits(); + assert(NewLdTySize.isScalable() == LdTySize.isScalable() && + NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinSize())); + unsigned NumOps = + NewLdTySize.getKnownMinSize() / LdTySize.getKnownMinSize(); + SmallVector<SDValue, 16> WidenOps(NumOps); + unsigned j = 0; + for (; j != End-Idx; ++j) + WidenOps[j] = ConcatOps[Idx+j]; + for (; j != NumOps; ++j) + WidenOps[j] = DAG.getUNDEF(LdTy); + ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, - makeArrayRef(&ConcatOps[Idx], End - Idx)); + WidenOps); Idx = End - 1; LdTy = NewLdTy; } @@ -5031,7 +5304,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, makeArrayRef(&ConcatOps[Idx], End - Idx)); // We need to fill the rest with undefs to build the vector. - unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + unsigned NumOps = + WidenWidth.getKnownMinSize() / LdTy.getSizeInBits().getKnownMinSize(); SmallVector<SDValue, 16> WidenOps(NumOps); SDValue UndefVal = DAG.getUNDEF(LdTy); { @@ -5054,6 +5328,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, EVT LdVT = LD->getMemoryVT(); SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); // Load information SDValue Chain = LD->getChain(); @@ -5061,6 +5336,10 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); + if (LdVT.isScalableVector()) + report_fatal_error("Generating widen scalable extending vector loads is " + "not yet supported"); + EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); unsigned NumElts = LdVT.getVectorNumElements(); @@ -5075,7 +5354,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); + SDValue NewBasePtr = + DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::Fixed(Offset)); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, LD->getOriginalAlign(), MMOFlags, AAInfo); @@ -5103,99 +5383,66 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); + TypeSize StWidth = StVT.getSizeInBits(); EVT ValVT = ValOp.getValueType(); - unsigned ValWidth = ValVT.getSizeInBits(); + TypeSize ValWidth = ValVT.getSizeInBits(); EVT ValEltVT = ValVT.getVectorElementType(); - unsigned ValEltWidth = ValEltVT.getSizeInBits(); + unsigned ValEltWidth = ValEltVT.getFixedSizeInBits(); assert(StVT.getVectorElementType() == ValEltVT); + assert(StVT.isScalableVector() == ValVT.isScalableVector() && + "Mismatch between store and value types"); int Idx = 0; // current index to store - unsigned Offset = 0; // offset from base to store - while (StWidth != 0) { + + MachinePointerInfo MPI = ST->getPointerInfo(); + uint64_t ScaledOffset = 0; + while (StWidth.isNonZero()) { // Find the largest vector type we can store with. - EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); - unsigned NewVTWidth = NewVT.getSizeInBits(); - unsigned Increment = NewVTWidth / 8; + EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); + TypeSize NewVTWidth = NewVT.getSizeInBits(); + if (NewVT.isVector()) { - unsigned NumVTElts = NewVT.getVectorNumElements(); + unsigned NumVTElts = NewVT.getVectorMinNumElements(); do { + Align NewAlign = ScaledOffset == 0 + ? ST->getOriginalAlign() + : commonAlignment(ST->getAlign(), ScaledOffset); SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, DAG.getVectorIdxConstant(Idx, dl)); - StChain.push_back(DAG.getStore( - Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), - ST->getOriginalAlign(), MMOFlags, AAInfo)); + SDValue PartStore = DAG.getStore(Chain, dl, EOp, BasePtr, MPI, NewAlign, + MMOFlags, AAInfo); + StChain.push_back(PartStore); + StWidth -= NewVTWidth; - Offset += Increment; Idx += NumVTElts; - BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); - } while (StWidth != 0 && StWidth >= NewVTWidth); + IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr, + &ScaledOffset); + } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); } else { // Cast the vector to the scalar type we can store. - unsigned NumElts = ValWidth / NewVTWidth; + unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize(); EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); // Readjust index position based on new vector type. - Idx = Idx * ValEltWidth / NewVTWidth; + Idx = Idx * ValEltWidth / NewVTWidth.getFixedSize(); do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getVectorIdxConstant(Idx++, dl)); - StChain.push_back(DAG.getStore( - Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), - ST->getOriginalAlign(), MMOFlags, AAInfo)); + SDValue PartStore = + DAG.getStore(Chain, dl, EOp, BasePtr, MPI, ST->getOriginalAlign(), + MMOFlags, AAInfo); + StChain.push_back(PartStore); + StWidth -= NewVTWidth; - Offset += Increment; - BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); - } while (StWidth != 0 && StWidth >= NewVTWidth); + IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr); + } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); // Restore index back to be relative to the original widen element type. - Idx = Idx * NewVTWidth / ValEltWidth; + Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth; } } } -void -DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, - StoreSDNode *ST) { - // For extension loads, it may not be more efficient to truncate the vector - // and then store it. Instead, we extract each element and then store it. - SDValue Chain = ST->getChain(); - SDValue BasePtr = ST->getBasePtr(); - MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); - AAMDNodes AAInfo = ST->getAAInfo(); - SDValue ValOp = GetWidenedVector(ST->getValue()); - SDLoc dl(ST); - - EVT StVT = ST->getMemoryVT(); - EVT ValVT = ValOp.getValueType(); - - // It must be true that the wide vector type is bigger than where we need to - // store. - assert(StVT.isVector() && ValOp.getValueType().isVector()); - assert(StVT.bitsLT(ValOp.getValueType())); - - // For truncating stores, we can not play the tricks of chopping legal vector - // types and bitcast it to the right type. Instead, we unroll the store. - EVT StEltVT = StVT.getVectorElementType(); - EVT ValEltVT = ValVT.getVectorElementType(); - unsigned Increment = ValEltVT.getSizeInBits() / 8; - unsigned NumElts = StVT.getVectorNumElements(); - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getVectorIdxConstant(0, dl)); - StChain.push_back( - DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, - ST->getOriginalAlign(), MMOFlags, AAInfo)); - unsigned Offset = Increment; - for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getVectorIdxConstant(0, dl)); - StChain.push_back(DAG.getTruncStore( - Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), - StEltVT, ST->getOriginalAlign(), MMOFlags, AAInfo)); - } -} - /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. /// FillWithZeroes specifies that the vector should be widened with zeroes. diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 2902c96c7658..0022e5ec31f0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -760,7 +760,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(BB, InsertPos); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); DenseMap<SDValue, Register> VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 72e68a5045c6..7a5e8ac6075e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1838,13 +1838,16 @@ protected: template<class SF> static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) { - std::vector<SUnit *>::iterator Best = Q.begin(); - for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != std::prev(Q.end())) - std::swap(*Best, Q.back()); + unsigned BestIdx = 0; + // Only compute the cost for the first 1000 items in the queue, to avoid + // excessive compile-times for very large queues. + for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E; + I++) + if (Picker(Q[BestIdx], Q[I])) + BestIdx = I; + SUnit *V = Q[BestIdx]; + if (BestIdx + 1 != Q.size()) + std::swap(Q[BestIdx], Q.back()); Q.pop_back(); return V; } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index ce20d506586f..debfdda90e1e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -125,8 +125,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, PhysReg = Reg; } else if (Def->isMachineOpcode()) { const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); - if (ResNo >= II.getNumDefs() && - II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) + if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg)) PhysReg = Reg; } @@ -173,7 +172,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { // Don't add glue to something that already has a glue value. if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false; - SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end()); + SmallVector<EVT, 4> VTs(N->values()); if (AddGlue) VTs.push_back(MVT::Glue); @@ -830,7 +829,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, /// not necessarily refer to returned BB. The emitter may split blocks. MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(BB, InsertPos); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); DenseMap<SDValue, Register> VRBaseMap; DenseMap<SUnit*, Register> CopyVRBaseMap; SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; @@ -1034,7 +1033,29 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } InsertPos = Emitter.getInsertPos(); - return Emitter.getBlock(); + // In some cases, DBG_VALUEs might be inserted after the first terminator, + // which results in an invalid MBB. If that happens, move the DBG_VALUEs + // before the first terminator. + MachineBasicBlock *InsertBB = Emitter.getBlock(); + auto FirstTerm = InsertBB->getFirstTerminator(); + if (FirstTerm != InsertBB->end()) { + assert(!FirstTerm->isDebugValue() && + "first terminator cannot be a debug value"); + for (MachineInstr &MI : make_early_inc_range( + make_range(std::next(FirstTerm), InsertBB->end()))) { + if (!MI.isDebugValue()) + continue; + + if (&MI == InsertPos) + InsertPos = std::prev(InsertPos->getIterator()); + + // The DBG_VALUE was referencing a value produced by a terminator. By + // moving the DBG_VALUE, the referenced value also needs invalidating. + MI.getOperand(0).ChangeToRegister(0, false); + MI.moveBefore(&*FirstTerm); + } + } + return InsertBB; } /// Return the basic block label. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 592c09c10fb0..2090762e2ff4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -138,6 +139,15 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, //===----------------------------------------------------------------------===// bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { + if (N->getOpcode() == ISD::SPLAT_VECTOR) { + unsigned EltSize = + N->getValueType(0).getVectorElementType().getSizeInBits(); + if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) { + SplatVal = Op0->getAPIntValue().truncOrSelf(EltSize); + return true; + } + } + auto *BV = dyn_cast<BuildVectorSDNode>(N); if (!BV) return false; @@ -154,11 +164,16 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be // specializations of the more general isConstantSplatVector()? -bool ISD::isBuildVectorAllOnes(const SDNode *N) { +bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) { // Look through a bit convert. while (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); + if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { + APInt SplatVal; + return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue(); + } + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; unsigned i = 0, e = N->getNumOperands(); @@ -198,11 +213,16 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { return true; } -bool ISD::isBuildVectorAllZeros(const SDNode *N) { +bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) { // Look through a bit convert. while (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); + if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { + APInt SplatVal; + return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue(); + } + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; bool IsAllUndef = true; @@ -235,6 +255,14 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { return true; } +bool ISD::isBuildVectorAllOnes(const SDNode *N) { + return isConstantSplatVectorAllOnes(N, /*BuildVectorOnly*/ true); +} + +bool ISD::isBuildVectorAllZeros(const SDNode *N) { + return isConstantSplatVectorAllZeros(N, /*BuildVectorOnly*/ true); +} + bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -278,7 +306,8 @@ bool ISD::matchUnaryPredicate(SDValue Op, return Match(Cst); // FIXME: Add support for vector UNDEF cases? - if (ISD::BUILD_VECTOR != Op.getOpcode()) + if (ISD::BUILD_VECTOR != Op.getOpcode() && + ISD::SPLAT_VECTOR != Op.getOpcode()) return false; EVT SVT = Op.getValueType().getScalarType(); @@ -332,6 +361,76 @@ bool ISD::matchBinaryPredicate( return true; } +ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { + switch (VecReduceOpcode) { + default: + llvm_unreachable("Expected VECREDUCE opcode"); + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_SEQ_FADD: + return ISD::FADD; + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_SEQ_FMUL: + return ISD::FMUL; + case ISD::VECREDUCE_ADD: + return ISD::ADD; + case ISD::VECREDUCE_MUL: + return ISD::MUL; + case ISD::VECREDUCE_AND: + return ISD::AND; + case ISD::VECREDUCE_OR: + return ISD::OR; + case ISD::VECREDUCE_XOR: + return ISD::XOR; + case ISD::VECREDUCE_SMAX: + return ISD::SMAX; + case ISD::VECREDUCE_SMIN: + return ISD::SMIN; + case ISD::VECREDUCE_UMAX: + return ISD::UMAX; + case ISD::VECREDUCE_UMIN: + return ISD::UMIN; + case ISD::VECREDUCE_FMAX: + return ISD::FMAXNUM; + case ISD::VECREDUCE_FMIN: + return ISD::FMINNUM; + } +} + +bool ISD::isVPOpcode(unsigned Opcode) { + switch (Opcode) { + default: + return false; +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ + case ISD::SDOPC: \ + return true; +#include "llvm/IR/VPIntrinsics.def" + } +} + +/// The operand position of the vector mask. +Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { + switch (Opcode) { + default: + return None; +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...) \ + case ISD::SDOPC: \ + return MASKPOS; +#include "llvm/IR/VPIntrinsics.def" + } +} + +/// The operand position of the explicit vector length parameter. +Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { + switch (Opcode) { + default: + return None; +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ + case ISD::SDOPC: \ + return EVLPOS; +#include "llvm/IR/VPIntrinsics.def" + } +} + ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { switch (ExtType) { case ISD::EXTLOAD: @@ -536,6 +635,11 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset()); } break; + case ISD::PSEUDO_PROBE: + ID.AddInteger(cast<PseudoProbeSDNode>(N)->getGuid()); + ID.AddInteger(cast<PseudoProbeSDNode>(N)->getIndex()); + ID.AddInteger(cast<PseudoProbeSDNode>(N)->getAttributes()); + break; case ISD::JumpTable: case ISD::TargetJumpTable: ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); @@ -1229,7 +1333,7 @@ SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isT, bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || - (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && + (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO); } @@ -1251,10 +1355,10 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, // inserted value (the type does not need to match the vector element type). // Any extra bits introduced will be truncated away. if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == - TargetLowering::TypePromoteInteger) { - EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); - APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); - Elt = ConstantInt::get(*getContext(), NewVal); + TargetLowering::TypePromoteInteger) { + EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); + Elt = ConstantInt::get(*getContext(), NewVal); } // In other cases the element type is illegal and needs to be expanded, for // example v2i64 on MIPS32. In this case, find the nearest legal type, split @@ -1264,7 +1368,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, // only legalize if the DAG tells us we must produce legal types. else if (NewNodesMustHaveLegalTypes && VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == - TargetLowering::TypeExpandInteger) { + TargetLowering::TypeExpandInteger) { const APInt &NewVal = Elt->getValue(); EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); @@ -1278,9 +1382,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, SmallVector<SDValue, 2> EltParts; for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { - EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) - .zextOrTrunc(ViaEltSizeInBits), DL, - ViaEltVT, isT, isO)); + EltParts.push_back(getConstant( + NewVal.lshr(i * ViaEltSizeInBits).zextOrTrunc(ViaEltSizeInBits), DL, + ViaEltVT, isT, isO)); } // EltParts is currently in little endian order. If we actually want @@ -1297,9 +1401,10 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) - Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); + llvm::append_range(Ops, EltParts); - SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); + SDValue V = + getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); return V; } @@ -1380,7 +1485,9 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, } SDValue Result(N, 0); - if (VT.isVector()) + if (VT.isScalableVector()) + Result = getSplatVector(VT, DL, Result); + else if (VT.isVector()) Result = getSplatBuildVector(VT, DL, Result); NewSDValueDbgMsg(Result, "Creating fp constant: ", this); return Result; @@ -2023,7 +2130,14 @@ Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) { SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { MachineFrameInfo &MFI = MF->getFrameInfo(); - int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, false); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + int StackID = 0; + if (Bytes.isScalable()) + StackID = TFI->getStackIDForScalableVectors(); + // The stack id gives an indication of whether the object is scalable or + // not, so it's safe to pass in the minimum size here. + int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinSize(), Alignment, + false, nullptr, StackID); return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } @@ -2035,7 +2149,14 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { } SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { - TypeSize Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); + TypeSize VT1Size = VT1.getStoreSize(); + TypeSize VT2Size = VT2.getStoreSize(); + assert(VT1Size.isScalable() == VT2Size.isScalable() && + "Don't know how to choose the maximum size when creating a stack " + "temporary"); + TypeSize Bytes = + VT1Size.getKnownMinSize() > VT2Size.getKnownMinSize() ? VT1Size : VT2Size; + Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); const DataLayout &DL = getDataLayout(); @@ -2204,6 +2325,10 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, /// SimplifyMultipleUseDemandedBits and not generate any new nodes. SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { EVT VT = V.getValueType(); + + if (VT.isScalableVector()) + return SDValue(); + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -2221,7 +2346,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, default: return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, *this, 0); - break; case ISD::Constant: { const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue(); APInt NewVal = CVal & DemandedBits; @@ -2247,18 +2371,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, V.getOperand(1)); } break; - case ISD::AND: { - // X & -1 -> X (ignoring bits which aren't demanded). - // Also handle the case where masked out bits in X are known to be zero. - if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) { - const APInt &AndVal = RHSC->getAPIntValue(); - if (DemandedBits.isSubsetOf(AndVal) || - DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero | - AndVal)) - return V.getOperand(0); - } - break; - } } return SDValue(); } @@ -2298,17 +2410,23 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, /// sense to specify which elements are demanded or undefined, therefore /// they are simply ignored. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, - APInt &UndefElts) { + APInt &UndefElts, unsigned Depth) { EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); if (!VT.isScalableVector() && !DemandedElts) return false; // No demanded elts, better to assume we don't know anything. + if (Depth >= MaxRecursionDepth) + return false; // Limit search depth. + // Deal with some common cases here that work for both fixed and scalable // vector types. switch (V.getOpcode()) { case ISD::SPLAT_VECTOR: + UndefElts = V.getOperand(0).isUndef() + ? APInt::getAllOnesValue(DemandedElts.getBitWidth()) + : APInt(DemandedElts.getBitWidth(), 0); return true; case ISD::ADD: case ISD::SUB: @@ -2316,13 +2434,17 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, APInt UndefLHS, UndefRHS; SDValue LHS = V.getOperand(0); SDValue RHS = V.getOperand(1); - if (isSplatValue(LHS, DemandedElts, UndefLHS) && - isSplatValue(RHS, DemandedElts, UndefRHS)) { + if (isSplatValue(LHS, DemandedElts, UndefLHS, Depth + 1) && + isSplatValue(RHS, DemandedElts, UndefRHS, Depth + 1)) { UndefElts = UndefLHS | UndefRHS; return true; } break; } + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1); } // We don't support other cases than those above for scalable vectors at @@ -2377,7 +2499,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt UndefSrcElts; APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts)) { + if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { UndefElts = UndefSrcElts.extractBits(NumElts, Idx); return true; } @@ -2574,15 +2696,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (auto *C = dyn_cast<ConstantSDNode>(Op)) { // We know all of the bits for a constant! - Known.One = C->getAPIntValue(); - Known.Zero = ~Known.One; - return Known; + return KnownBits::makeConstant(C->getAPIntValue()); } if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) { // We know all of the bits for a constant fp! - Known.One = C->getValueAPF().bitcastToAPInt(); - Known.Zero = ~Known.One; - return Known; + return KnownBits::makeConstant(C->getValueAPF().bitcastToAPInt()); } if (Depth >= MaxRecursionDepth) @@ -2617,8 +2735,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } // Known bits are the values that are shared by every demanded element. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); // If we don't know any bits, early out. if (Known.isUnknown()) @@ -2655,8 +2772,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (!!DemandedLHS) { SDValue LHS = Op.getOperand(0); Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); } // If we don't know any bits, early out. if (Known.isUnknown()) @@ -2664,8 +2780,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); } break; } @@ -2681,8 +2796,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (!!DemandedSub) { SDValue Sub = Op.getOperand(i); Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); } // If we don't know any bits, early out. if (Known.isUnknown()) @@ -2710,8 +2824,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } if (!!DemandedSrcElts) { Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); } break; } @@ -2830,35 +2943,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::MUL: { Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - - // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conservative estimate for high known-0 bits. - // More trickiness is possible, but this is sufficient for the - // interesting case of alignment computation. - unsigned TrailZ = Known.countMinTrailingZeros() + - Known2.countMinTrailingZeros(); - unsigned LeadZ = std::max(Known.countMinLeadingZeros() + - Known2.countMinLeadingZeros(), - BitWidth) - BitWidth; - - Known.resetAll(); - Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); - Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); + Known = KnownBits::computeForMul(Known, Known2); break; } case ISD::UDIV: { - // For the purposes of computing leading zeros we can conservatively - // treat a udiv as a logical right shift by the power of 2 known to - // be less than the denominator. - Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - unsigned LeadZ = Known2.countMinLeadingZeros(); - + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); - if (RHSMaxLeadingZeros != BitWidth) - LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); - - Known.Zero.setHighBits(LeadZ); + Known = KnownBits::udiv(Known, Known2); break; } case ISD::SELECT: @@ -2870,8 +2961,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1); // Only known if known in both the LHS and RHS. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); break; case ISD::SELECT_CC: Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1); @@ -2881,8 +2971,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); // Only known if known in both the LHS and RHS. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); break; case ISD::SMULO: case ISD::UMULO: @@ -2911,19 +3000,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::SHL: Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - - if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { - unsigned Shift = ShAmt->getZExtValue(); - Known.Zero <<= Shift; - Known.One <<= Shift; - // Low bits are known zero. - Known.Zero.setLowBits(Shift); - break; - } - - // No matter the shift amount, the trailing zeros will stay zero. - Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros()); - Known.One.clearAllBits(); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::shl(Known, Known2); // Minimum shift low bits are known zero. if (const APInt *ShMinAmt = @@ -2932,19 +3010,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; case ISD::SRL: Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - - if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { - unsigned Shift = ShAmt->getZExtValue(); - Known.Zero.lshrInPlace(Shift); - Known.One.lshrInPlace(Shift); - // High bits are known zero. - Known.Zero.setHighBits(Shift); - break; - } - - // No matter the shift amount, the leading zeros will stay zero. - Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros()); - Known.One.clearAllBits(); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::lshr(Known, Known2); // Minimum shift high bits are known zero. if (const APInt *ShMinAmt = @@ -2952,13 +3019,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setHighBits(ShMinAmt->getZExtValue()); break; case ISD::SRA: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - unsigned Shift = ShAmt->getZExtValue(); - // Sign extend known zero/one bit (else is unknown). - Known.Zero.ashrInPlace(Shift); - Known.One.ashrInPlace(Shift); - } + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::ashr(Known, Known2); + // TODO: Add minimum shift high known sign bits. break; case ISD::FSHL: case ISD::FSHR: @@ -2993,38 +3057,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } break; case ISD::SIGN_EXTEND_INREG: { - EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - unsigned EBits = EVT.getScalarSizeInBits(); - - // Sign extension. Compute the demanded bits in the result that are not - // present in the input. - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); - - APInt InSignMask = APInt::getSignMask(EBits); - APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); - - // If the sign extended bits are demanded, we know that the sign - // bit is demanded. - InSignMask = InSignMask.zext(BitWidth); - if (NewBits.getBoolValue()) - InputDemandedBits |= InSignMask; - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known.One &= InputDemandedBits; - Known.Zero &= InputDemandedBits; - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. - if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear - Known.Zero |= NewBits; - Known.One &= ~NewBits; - } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set - Known.One |= NewBits; - Known.Zero &= ~NewBits; - } else { // Input sign bit unknown - Known.Zero &= ~NewBits; - Known.One &= ~NewBits; - } + EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + Known = Known.sextInReg(EVT.getScalarSizeInBits()); break; } case ISD::CTTZ: @@ -3052,6 +3087,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); break; } + case ISD::PARITY: { + // Parity returns 0 everywhere but the LSB. + Known.Zero.setBitsFrom(1); + break; + } case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Op); const Constant *Cst = TLI->getTargetConstantFromLoad(LD); @@ -3095,13 +3135,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) { if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { - const APInt &Value = CInt->getValue(); - Known.One = Value; - Known.Zero = ~Value; + Known = KnownBits::makeConstant(CInt->getValue()); } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { - APInt Value = CFP->getValueAPF().bitcastToAPInt(); - Known.One = Value; - Known.Zero = ~Value; + Known = + KnownBits::makeConstant(CFP->getValueAPF().bitcastToAPInt()); } } } @@ -3241,53 +3278,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::computeForAddCarry(Known, Known2, Carry); break; } - case ISD::SREM: - if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) { - const APInt &RA = Rem->getAPIntValue().abs(); - if (RA.isPowerOf2()) { - APInt LowBits = RA - 1; - Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - - // The low bits of the first operand are unchanged by the srem. - Known.Zero = Known2.Zero & LowBits; - Known.One = Known2.One & LowBits; - - // If the first operand is non-negative or has all low bits zero, then - // the upper bits are all zero. - if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) - Known.Zero |= ~LowBits; - - // If the first operand is negative and not all low bits are zero, then - // the upper bits are all one. - if (Known2.isNegative() && LowBits.intersects(Known2.One)) - Known.One |= ~LowBits; - assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); - } - } + case ISD::SREM: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::srem(Known, Known2); break; + } case ISD::UREM: { - if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) { - const APInt &RA = Rem->getAPIntValue(); - if (RA.isPowerOf2()) { - APInt LowBits = (RA - 1); - Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - - // The upper bits are all zero, the lower ones are unchanged. - Known.Zero = Known2.Zero | ~LowBits; - Known.One = Known2.One & LowBits; - break; - } - } - - // Since the result is less than or equal to either operand, any leading - // zero bits in either operand must also exist in the result. Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - - uint32_t Leaders = - std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); - Known.resetAll(); - Known.Zero.setHighBits(Leaders); + Known = KnownBits::urem(Known, Known2); break; } case ISD::EXTRACT_ELEMENT: { @@ -3307,6 +3307,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, SDValue InVec = Op.getOperand(0); SDValue EltNo = Op.getOperand(1); EVT VecVT = InVec.getValueType(); + // computeKnownBits not yet implemented for scalable vectors. + if (VecVT.isScalableVector()) + break; const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); const unsigned NumSrcElts = VecVT.getVectorNumElements(); @@ -3347,73 +3350,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setAllBits(); if (DemandedVal) { Known2 = computeKnownBits(InVal, Depth + 1); - Known.One &= Known2.One.zextOrTrunc(BitWidth); - Known.Zero &= Known2.Zero.zextOrTrunc(BitWidth); + Known = KnownBits::commonBits(Known, Known2.zextOrTrunc(BitWidth)); } if (!!DemandedVecElts) { Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); } break; } case ISD::BITREVERSE: { Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known.Zero = Known2.Zero.reverseBits(); - Known.One = Known2.One.reverseBits(); + Known = Known2.reverseBits(); break; } case ISD::BSWAP: { Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known.Zero = Known2.Zero.byteSwap(); - Known.One = Known2.One.byteSwap(); + Known = Known2.byteSwap(); break; } case ISD::ABS: { Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - - // If the source's MSB is zero then we know the rest of the bits already. - if (Known2.isNonNegative()) { - Known.Zero = Known2.Zero; - Known.One = Known2.One; - break; - } - - // We only know that the absolute values's MSB will be zero iff there is - // a set bit that isn't the sign bit (otherwise it could be INT_MIN). - Known2.One.clearSignBit(); - if (Known2.One.getBoolValue()) { - Known.Zero = APInt::getSignMask(BitWidth); - break; - } + Known = Known2.abs(); break; } case ISD::UMIN: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - - // UMIN - we know that the result will have the maximum of the - // known zero leading bits of the inputs. - unsigned LeadZero = Known.countMinLeadingZeros(); - LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros()); - - Known.Zero &= Known2.Zero; - Known.One &= Known2.One; - Known.Zero.setHighBits(LeadZero); + Known = KnownBits::umin(Known, Known2); break; } case ISD::UMAX: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - - // UMAX - we know that the result will have the maximum of the - // known one leading bits of the inputs. - unsigned LeadOne = Known.countMinLeadingOnes(); - LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes()); - - Known.Zero &= Known2.Zero; - Known.One &= Known2.One; - Known.One.setHighBits(LeadOne); + Known = KnownBits::umax(Known, Known2); break; } case ISD::SMIN: @@ -3447,12 +3416,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } } - // Fallback - just get the shared known bits of the operands. Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - if (Known.isUnknown()) break; // Early-out Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known.Zero &= Known2.Zero; - Known.One &= Known2.One; + if (IsMax) + Known = KnownBits::smax(Known, Known2); + else + Known = KnownBits::smin(Known, Known2); break; } case ISD::FrameIndex: @@ -4395,11 +4364,16 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, for (SDValue Op : Elts) SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); - if (SVT.bitsGT(VT.getScalarType())) - for (SDValue &Op : Elts) - Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) - ? DAG.getZExtOrTrunc(Op, DL, SVT) - : DAG.getSExtOrTrunc(Op, DL, SVT); + if (SVT.bitsGT(VT.getScalarType())) { + for (SDValue &Op : Elts) { + if (Op.isUndef()) + Op = DAG.getUNDEF(SVT); + else + Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, DL, SVT) + : DAG.getSExtOrTrunc(Op, DL, SVT); + } + } SDValue V = DAG.getBuildVector(VT, DL, Elts); NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG); @@ -4425,6 +4399,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue Operand) { + SDNodeFlags Flags; + if (Inserter) + Flags = Inserter->getFlags(); + return getNode(Opcode, DL, VT, Operand, Flags); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand, const SDNodeFlags Flags) { // Constant fold unary operations with an integer constant operand. Even // opaque constant will be folded, because the folding of unary operations @@ -4625,8 +4607,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); if (Operand.getValueType() == VT) return Operand; // noop conversion. assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!"); @@ -4811,6 +4793,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::VSCALE: assert(VT == Operand.getValueType() && "Unexpected VT!"); break; + case ISD::CTPOP: + if (Operand.getValueType().getScalarType() == MVT::i1) + return Operand; + break; + case ISD::CTLZ: + case ISD::CTTZ: + if (Operand.getValueType().getScalarType() == MVT::i1) + return getNOT(DL, Operand, Operand.getValueType()); + break; + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + if (Operand.getValueType().getScalarType() == MVT::i1) + return getNode(ISD::VECREDUCE_OR, DL, VT, Operand); + break; + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_UMIN: + if (Operand.getValueType().getScalarType() == MVT::i1) + return getNode(ISD::VECREDUCE_AND, DL, VT, Operand); + break; } SDNode *N; @@ -5233,6 +5234,14 @@ SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) { } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2) { + SDNodeFlags Flags; + if (Inserter) + Flags = Inserter->getFlags(); + return getNode(Opcode, DL, VT, N1, N2, Flags); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); @@ -5312,10 +5321,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::MULHS: case ISD::SDIV: case ISD::SREM: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: case ISD::SADDSAT: case ISD::SSUBSAT: case ISD::UADDSAT: @@ -5324,6 +5329,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); break; + case ISD::SMIN: + case ISD::UMAX: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + return getNode(ISD::OR, DL, VT, N1, N2); + break; + case ISD::SMAX: + case ISD::UMIN: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + return getNode(ISD::AND, DL, VT, N1, N2); + break; case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -5365,8 +5386,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). - assert(N2.getValueType().getScalarSizeInBits().getFixedSize() >= - Log2_32_Ceil(VT.getScalarSizeInBits().getFixedSize()) && + assert(N2.getValueType().getScalarSizeInBits() >= + Log2_32_Ceil(VT.getScalarSizeInBits()) && "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to @@ -5562,6 +5583,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, (VT.getVectorMinNumElements() + N2C->getZExtValue()) <= N1VT.getVectorMinNumElements()) && "Extract subvector overflow!"); + assert(N2C->getAPIntValue().getBitWidth() == + TLI->getVectorIdxTy(getDataLayout()) + .getSizeInBits() + .getFixedSize() && + "Constant index for EXTRACT_SUBVECTOR has an invalid size"); // Trivial extraction. if (VT == N1VT) @@ -5573,8 +5599,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of // the concat have the same type as the extract. - if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && - N1.getNumOperands() > 0 && VT == N1.getOperand(0).getValueType()) { + if (N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0 && + VT == N1.getOperand(0).getValueType()) { unsigned Factor = VT.getVectorMinNumElements(); return N1.getOperand(N2C->getZExtValue() / Factor); } @@ -5671,6 +5697,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3) { + SDNodeFlags Flags; + if (Inserter) + Flags = Inserter->getFlags(); + return getNode(Opcode, DL, VT, N1, N2, N3, Flags); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, const SDNodeFlags Flags) { // Perform various simplifications. @@ -5940,11 +5974,20 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, return SDValue(nullptr, 0); } -SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, int64_t Offset, +SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags) { EVT VT = Base.getValueType(); - return getMemBasePlusOffset(Base, getConstant(Offset, DL, VT), DL, Flags); + SDValue Index; + + if (Offset.isScalable()) + Index = getVScale(DL, Base.getValueType(), + APInt(Base.getValueSizeInBits().getFixedSize(), + Offset.getKnownMinSize())); + else + Index = getConstant(Offset.getFixedSize(), DL, VT); + + return getMemBasePlusOffset(Base, Index, DL, Flags); } SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset, @@ -6039,7 +6082,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SrcAlign = Alignment; assert(SrcAlign && "SrcAlign must be set"); ConstantDataArraySlice Slice; - bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); + // If marked as volatile, perform a copy even when marked as constant. + bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); const MemOp Op = isZeroConstant @@ -6111,8 +6155,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); if (Value.getNode()) { Store = DAG.getStore( - Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); + Chain, dl, Value, + DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); OutChains.push_back(Store); } } @@ -6132,16 +6177,17 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; - Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, - DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), VT, - commonAlignment(*SrcAlign, SrcOff).value(), - SrcMMOFlags); + Value = DAG.getExtLoad( + ISD::EXTLOAD, dl, NVT, Chain, + DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), + SrcPtrInfo.getWithOffset(SrcOff), VT, + commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( - Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Alignment.value(), MMOFlags); + Chain, dl, Value, + DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6261,9 +6307,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; - Value = DAG.getLoad( - VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign->value(), SrcMMOFlags); + Value = + DAG.getLoad(VT, dl, Chain, + DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), + SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -6275,9 +6322,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Store; - Store = DAG.getStore( - Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); + Store = + DAG.getStore(Chain, dl, LoadValues[i], + DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); OutChains.push_back(Store); DstOff += VTSize; } @@ -6375,8 +6423,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, } assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore( - Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Alignment.value(), + Chain, dl, Value, + DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; @@ -6390,7 +6439,7 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, unsigned AS) { // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all // pointer operands can be losslessly bitcasted to pointers of address space 0 - if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) { + if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) { report_fatal_error("cannot lower memory intrinsic in address space " + Twine(AS)); } @@ -6882,6 +6931,30 @@ SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, return V; } +SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, + uint64_t Guid, uint64_t Index, + uint32_t Attr) { + const unsigned Opcode = ISD::PSEUDO_PROBE; + const auto VTs = getVTList(MVT::Other); + SDValue Ops[] = {Chain}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + ID.AddInteger(Guid); + ID.AddInteger(Index); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, Dl, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<PseudoProbeSDNode>( + Opcode, Dl.getIROrder(), Dl.getDebugLoc(), VTs, Guid, Index, Attr); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a @@ -6962,7 +7035,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, assert(VT.isVector() == MemVT.isVector() && "Cannot use an ext load to convert to or from a vector!"); assert((!VT.isVector() || - VT.getVectorNumElements() == MemVT.getVectorNumElements()) && + VT.getVectorElementCount() == MemVT.getVectorElementCount()) && "Cannot use an ext load to change the number of vector elements!"); } @@ -7041,8 +7114,7 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), LD->getAlignment(), MMOFlags, - LD->getAAInfo()); + LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo()); } SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, @@ -7112,7 +7184,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo); + PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), + Alignment, AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -7133,7 +7206,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, assert(VT.isVector() == SVT.isVector() && "Cannot use trunc store to convert to or from a vector!"); assert((!VT.isVector() || - VT.getVectorNumElements() == SVT.getVectorNumElements()) && + VT.getVectorElementCount() == SVT.getVectorElementCount()) && "Cannot use trunc store to change the number of vector elements!"); SDVTList VTs = getVTList(MVT::Other); @@ -7285,14 +7358,15 @@ SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, - ISD::MemIndexType IndexType) { + ISD::MemIndexType IndexType, + ISD::LoadExtType ExtTy) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( - dl.getIROrder(), VTs, VT, MMO, IndexType)); + dl.getIROrder(), VTs, VT, MMO, IndexType, ExtTy)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7300,17 +7374,22 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, return SDValue(E, 0); } + IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]); auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO, IndexType); + VTs, VT, MMO, IndexType, ExtTy); createOperands(N, Ops); assert(N->getPassThru().getValueType() == N->getValueType(0) && "Incompatible type of the PassThru value in MaskedGatherSDNode"); - assert(N->getMask().getValueType().getVectorNumElements() == - N->getValueType(0).getVectorNumElements() && + assert(N->getMask().getValueType().getVectorElementCount() == + N->getValueType(0).getVectorElementCount() && "Vector width mismatch between mask and data"); - assert(N->getIndex().getValueType().getVectorNumElements() >= - N->getValueType(0).getVectorNumElements() && + assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == + N->getValueType(0).getVectorElementCount().isScalable() && + "Scalable flags of index and data do not match"); + assert(ElementCount::isKnownGE( + N->getIndex().getValueType().getVectorElementCount(), + N->getValueType(0).getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && @@ -7326,29 +7405,37 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, - ISD::MemIndexType IndexType) { + ISD::MemIndexType IndexType, + bool IsTrunc) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( - dl.getIROrder(), VTs, VT, MMO, IndexType)); + dl.getIROrder(), VTs, VT, MMO, IndexType, IsTrunc)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } + + IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]); auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO, IndexType); + VTs, VT, MMO, IndexType, IsTrunc); createOperands(N, Ops); - assert(N->getMask().getValueType().getVectorNumElements() == - N->getValue().getValueType().getVectorNumElements() && + assert(N->getMask().getValueType().getVectorElementCount() == + N->getValue().getValueType().getVectorElementCount() && "Vector width mismatch between mask and data"); - assert(N->getIndex().getValueType().getVectorNumElements() >= - N->getValue().getValueType().getVectorNumElements() && + assert( + N->getIndex().getValueType().getVectorElementCount().isScalable() == + N->getValue().getValueType().getVectorElementCount().isScalable() && + "Scalable flags of index and data do not match"); + assert(ElementCount::isKnownGE( + N->getIndex().getValueType().getVectorElementCount(), + N->getValue().getValueType().getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && @@ -7452,6 +7539,11 @@ SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, if (YC->getValueAPF().isExactlyValue(1.0)) return X; + // X * 0.0 --> 0.0 + if (Opcode == ISD::FMUL && Flags.hasNoNaNs() && Flags.hasNoSignedZeros()) + if (YC->getValueAPF().isZero()) + return getConstantFP(0.0, SDLoc(Y), Y.getValueType()); + return SDValue(); } @@ -7478,6 +7570,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + ArrayRef<SDValue> Ops) { + SDNodeFlags Flags; + if (Inserter) + Flags = Inserter->getFlags(); + return getNode(Opcode, DL, VT, Ops, Flags); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { @@ -7549,6 +7649,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + ArrayRef<SDValue> Ops) { + SDNodeFlags Flags; + if (Inserter) + Flags = Inserter->getFlags(); + return getNode(Opcode, DL, VTList, Ops, Flags); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); @@ -8245,6 +8353,14 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, + ArrayRef<SDValue> Ops) { + SDNodeFlags Flags; + if (Inserter) + Flags = Inserter->getFlags(); + return getNodeIfExists(Opcode, VTList, Ops, Flags); +} + +SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { @@ -8259,6 +8375,19 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, return nullptr; } +/// doesNodeExist - Check if a node exists without modifying its flags. +bool SelectionDAG::doesNodeExist(unsigned Opcode, SDVTList VTList, + ArrayRef<SDValue> Ops) { + if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops); + void *IP = nullptr; + if (FindNodeOrInsertPos(ID, SDLoc(), IP)) + return true; + } + return false; +} + /// getDbgValue - Creates a SDDbgValue node. /// /// SDNode @@ -8676,21 +8805,31 @@ namespace { } // end anonymous namespace -void SelectionDAG::updateDivergence(SDNode * N) -{ - if (TLI->isSDNodeAlwaysUniform(N)) - return; - bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); +bool SelectionDAG::calculateDivergence(SDNode *N) { + if (TLI->isSDNodeAlwaysUniform(N)) { + assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, DA) && + "Conflicting divergence information!"); + return false; + } + if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA)) + return true; for (auto &Op : N->ops()) { - if (Op.Val.getValueType() != MVT::Other) - IsDivergent |= Op.getNode()->isDivergent(); + if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent()) + return true; } - if (N->SDNodeBits.IsDivergent != IsDivergent) { - N->SDNodeBits.IsDivergent = IsDivergent; - for (auto U : N->uses()) { - updateDivergence(U); + return false; +} + +void SelectionDAG::updateDivergence(SDNode *N) { + SmallVector<SDNode *, 16> Worklist(1, N); + do { + N = Worklist.pop_back_val(); + bool IsDivergent = calculateDivergence(N); + if (N->SDNodeBits.IsDivergent != IsDivergent) { + N->SDNodeBits.IsDivergent = IsDivergent; + llvm::append_range(Worklist, N->uses()); } - } + } while (!Worklist.empty()); } void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { @@ -8716,26 +8855,9 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { void SelectionDAG::VerifyDAGDiverence() { std::vector<SDNode *> TopoOrder; CreateTopologicalOrder(TopoOrder); - const TargetLowering &TLI = getTargetLoweringInfo(); - DenseMap<const SDNode *, bool> DivergenceMap; - for (auto &N : allnodes()) { - DivergenceMap[&N] = false; - } - for (auto N : TopoOrder) { - bool IsDivergent = DivergenceMap[N]; - bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); - for (auto &Op : N->ops()) { - if (Op.Val.getValueType() != MVT::Other) - IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; - } - if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { - DivergenceMap[N] = true; - } - } - for (auto &N : allnodes()) { - (void)N; - assert(DivergenceMap[&N] == N.isDivergent() && - "Divergence bit inconsistency detected\n"); + for (auto *N : TopoOrder) { + assert(calculateDivergence(N) == N->isDivergent() && + "Divergence bit inconsistency detected"); } } #endif @@ -8904,25 +9026,32 @@ void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { DbgInfo->add(DB); } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, - SDValue NewMemOp) { - assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); +SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, + SDValue NewMemOpChain) { + assert(isa<MemSDNode>(NewMemOpChain) && "Expected a memop node"); + assert(NewMemOpChain.getValueType() == MVT::Other && "Expected a token VT"); // The new memory operation must have the same position as the old load in // terms of memory dependency. Create a TokenFactor for the old load and new // memory operation and update uses of the old load's output chain to use that // TokenFactor. - SDValue OldChain = SDValue(OldLoad, 1); - SDValue NewChain = SDValue(NewMemOp.getNode(), 1); - if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1)) - return NewChain; + if (OldChain == NewMemOpChain || OldChain.use_empty()) + return NewMemOpChain; - SDValue TokenFactor = - getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); + SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldChain), MVT::Other, + OldChain, NewMemOpChain); ReplaceAllUsesOfValueWith(OldChain, TokenFactor); - UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); + UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewMemOpChain); return TokenFactor; } +SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, + SDValue NewMemOp) { + assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); + SDValue OldChain = SDValue(OldLoad, 1); + SDValue NewMemOpChain = NewMemOp.getValue(1); + return makeEquivalentMemoryOrdering(OldChain, NewMemOpChain); +} + SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, Function **OutFunction) { assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol"); @@ -9006,6 +9135,18 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) return CN; + // SplatVectors can truncate their operands. Ignore that case here unless + // AllowTruncation is set. + if (N->getOpcode() == ISD::SPLAT_VECTOR) { + EVT VecEltVT = N->getValueType(0).getVectorElementType(); + if (auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) { + EVT CVT = CN->getValueType(0); + assert(CVT.bitsGE(VecEltVT) && "Illegal splat_vector element extension"); + if (AllowTruncation || CVT == VecEltVT) + return CN; + } + } + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { BitVector UndefElements; ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); @@ -9059,6 +9200,10 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { return CN; } + if (N.getOpcode() == ISD::SPLAT_VECTOR) + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0))) + return CN; + return nullptr; } @@ -9220,8 +9365,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { bool Seen = false; for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { SDNode *User = *I; - if (llvm::any_of(Nodes, - [&User](const SDNode *Node) { return User == Node; })) + if (llvm::is_contained(Nodes, User)) Seen = true; else return false; @@ -9232,7 +9376,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { /// isOperand - Return true if this node is an operand of N. bool SDValue::isOperandOf(const SDNode *N) const { - return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; }); + return is_contained(N->op_values(), *this); } bool SDNode::isOperandOf(const SDNode *N) const { @@ -9616,24 +9760,24 @@ std::pair<EVT, EVT> SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, bool *HiIsEmpty) const { EVT EltTp = VT.getVectorElementType(); - bool IsScalable = VT.isScalableVector(); // Examples: // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty) // custom VL=9 with enveloping VL=8/8 yields 8/1 // custom VL=10 with enveloping VL=8/8 yields 8/2 // etc. - unsigned VTNumElts = VT.getVectorNumElements(); - unsigned EnvNumElts = EnvVT.getVectorNumElements(); + ElementCount VTNumElts = VT.getVectorElementCount(); + ElementCount EnvNumElts = EnvVT.getVectorElementCount(); + assert(VTNumElts.isScalable() == EnvNumElts.isScalable() && + "Mixing fixed width and scalable vectors when enveloping a type"); EVT LoVT, HiVT; - if (VTNumElts > EnvNumElts) { + if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) { LoVT = EnvVT; - HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts, - IsScalable); + HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts); *HiIsEmpty = false; } else { // Flag that hi type has zero storage size, but return split envelop type // (this would be easier if vector types with zero elements were allowed). - LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts, IsScalable); + LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts); HiVT = EnvVT; *HiIsEmpty = true; } @@ -9768,16 +9912,16 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, BitVector *UndefElements) const { + unsigned NumOps = getNumOperands(); if (UndefElements) { UndefElements->clear(); - UndefElements->resize(getNumOperands()); + UndefElements->resize(NumOps); } - assert(getNumOperands() == DemandedElts.getBitWidth() && - "Unexpected vector size"); + assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size"); if (!DemandedElts) return SDValue(); SDValue Splatted; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + for (unsigned i = 0; i != NumOps; ++i) { if (!DemandedElts[i]) continue; SDValue Op = getOperand(i); @@ -9806,6 +9950,58 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { return getSplatValue(DemandedElts, UndefElements); } +bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts, + SmallVectorImpl<SDValue> &Sequence, + BitVector *UndefElements) const { + unsigned NumOps = getNumOperands(); + Sequence.clear(); + if (UndefElements) { + UndefElements->clear(); + UndefElements->resize(NumOps); + } + assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size"); + if (!DemandedElts || NumOps < 2 || !isPowerOf2_32(NumOps)) + return false; + + // Set the undefs even if we don't find a sequence (like getSplatValue). + if (UndefElements) + for (unsigned I = 0; I != NumOps; ++I) + if (DemandedElts[I] && getOperand(I).isUndef()) + (*UndefElements)[I] = true; + + // Iteratively widen the sequence length looking for repetitions. + for (unsigned SeqLen = 1; SeqLen < NumOps; SeqLen *= 2) { + Sequence.append(SeqLen, SDValue()); + for (unsigned I = 0; I != NumOps; ++I) { + if (!DemandedElts[I]) + continue; + SDValue &SeqOp = Sequence[I % SeqLen]; + SDValue Op = getOperand(I); + if (Op.isUndef()) { + if (!SeqOp) + SeqOp = Op; + continue; + } + if (SeqOp && !SeqOp.isUndef() && SeqOp != Op) { + Sequence.clear(); + break; + } + SeqOp = Op; + } + if (!Sequence.empty()) + return true; + } + + assert(Sequence.empty() && "Failed to empty non-repeating sequence pattern"); + return false; +} + +bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, + BitVector *UndefElements) const { + APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); + return getRepeatedSequence(DemandedElts, Sequence, UndefElements); +} + ConstantSDNode * BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements) const { @@ -9878,7 +10074,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { // Returns the SDNode if it is a constant integer BuildVector // or constant integer. -SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { +SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) const { if (isa<ConstantSDNode>(N)) return N.getNode(); if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) @@ -9889,10 +10085,15 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { if (GA->getOpcode() == ISD::GlobalAddress && TLI->isOffsetFoldingLegal(GA)) return GA; + if ((N.getOpcode() == ISD::SPLAT_VECTOR) && + isa<ConstantSDNode>(N.getOperand(0))) + return N.getNode(); return nullptr; } -SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { +// Returns the SDNode if it is a constant float BuildVector +// or constant float. +SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const { if (isa<ConstantFPSDNode>(N)) return N.getNode(); @@ -9914,13 +10115,14 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { Ops[I].setUser(Node); Ops[I].setInitial(Vals[I]); if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. - IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); + IsDivergent |= Ops[I].getNode()->isDivergent(); } Node->NumOperands = Vals.size(); Node->OperandList = Ops; - IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); - if (!TLI->isSDNodeAlwaysUniform(Node)) + if (!TLI->isSDNodeAlwaysUniform(Node)) { + IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); Node->SDNodeBits.IsDivergent = IsDivergent; + } checkForCycles(Node); } @@ -9937,6 +10139,44 @@ SDValue SelectionDAG::getTokenFactor(const SDLoc &DL, return getNode(ISD::TokenFactor, DL, MVT::Other, Vals); } +SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL, + EVT VT, SDNodeFlags Flags) { + switch (Opcode) { + default: + return SDValue(); + case ISD::ADD: + case ISD::OR: + case ISD::XOR: + case ISD::UMAX: + return getConstant(0, DL, VT); + case ISD::MUL: + return getConstant(1, DL, VT); + case ISD::AND: + case ISD::UMIN: + return getAllOnesConstant(DL, VT); + case ISD::SMAX: + return getConstant(APInt::getSignedMinValue(VT.getSizeInBits()), DL, VT); + case ISD::SMIN: + return getConstant(APInt::getSignedMaxValue(VT.getSizeInBits()), DL, VT); + case ISD::FADD: + return getConstantFP(-0.0, DL, VT); + case ISD::FMUL: + return getConstantFP(1.0, DL, VT); + case ISD::FMINNUM: + case ISD::FMAXNUM: { + // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. + const fltSemantics &Semantics = EVTToAPFloatSemantics(VT); + APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) : + !Flags.hasNoInfs() ? APFloat::getInf(Semantics) : + APFloat::getLargest(Semantics); + if (Opcode == ISD::FMAXNUM) + NeutralAF.changeSign(); + + return getConstantFP(NeutralAF, DL, VT); + } + } +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 3a53ab9717a4..20c7d771bfb6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -96,18 +97,28 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, int64_t PtrDiff; if (NumBytes0.hasValue() && NumBytes1.hasValue() && BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { + // If the size of memory access is unknown, do not use it to analysis. + // One example of unknown size memory access is to load/store scalable + // vector objects on the stack. // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the // following situations arise: - IsAlias = !( - // [----BasePtr0----] - // [---BasePtr1--] - // ========PtrDiff========> - (*NumBytes0 <= PtrDiff) || - // [----BasePtr0----] - // [---BasePtr1--] - // =====(-PtrDiff)====> - (PtrDiff + *NumBytes1 <= 0)); // i.e. *NumBytes1 < -PtrDiff. - return true; + if (PtrDiff >= 0 && + *NumBytes0 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { + // [----BasePtr0----] + // [---BasePtr1--] + // ========PtrDiff========> + IsAlias = !(*NumBytes0 <= PtrDiff); + return true; + } + if (PtrDiff < 0 && + *NumBytes1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { + // [----BasePtr0----] + // [---BasePtr1--] + // =====(-PtrDiff)====> + IsAlias = !((PtrDiff + *NumBytes1) <= 0); + return true; + } + return false; } // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be // able to calculate their relative offset if at least one arises diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1d596c89c911..6638ff6a6358 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -14,15 +14,12 @@ #include "SDNodeDbgValue.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" @@ -40,7 +37,6 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -53,17 +49,14 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -75,13 +68,11 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -99,31 +90,22 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/Local.h" -#include <algorithm> -#include <cassert> #include <cstddef> -#include <cstdint> #include <cstring> #include <iterator> #include <limits> #include <numeric> #include <tuple> -#include <utility> -#include <vector> using namespace llvm; using namespace PatternMatch; @@ -169,32 +151,6 @@ static cl::opt<unsigned> SwitchPeelThreshold( // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; -// Return the calling convention if the Value passed requires ABI mangling as it -// is a parameter to a function or a return value from a function which is not -// an intrinsic. -static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) { - if (auto *R = dyn_cast<ReturnInst>(V)) - return R->getParent()->getParent()->getCallingConv(); - - if (auto *CI = dyn_cast<CallInst>(V)) { - const bool IsInlineAsm = CI->isInlineAsm(); - const bool IsIndirectFunctionCall = - !IsInlineAsm && !CI->getCalledFunction(); - - // It is possible that the call instruction is an inline asm statement or an - // indirect function call in which case the return value of - // getCalledFunction() would be nullptr. - const bool IsInstrinsicCall = - !IsInlineAsm && !IsIndirectFunctionCall && - CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic; - - if (!IsInlineAsm && !IsInstrinsicCall) - return CI->getCallingConv(); - } - - return None; -} - static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, @@ -409,7 +365,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, - PartVT, IntermediateVT, V); + PartVT, IntermediateVT, V, CallConv); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. @@ -418,7 +374,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, - PartVT, IntermediateVT, V); + PartVT, IntermediateVT, V, CallConv); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the @@ -448,10 +404,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { - assert((PartEVT.getVectorElementCount().Min > - ValueVT.getVectorElementCount().Min) && - (PartEVT.getVectorElementCount().Scalable == - ValueVT.getVectorElementCount().Scalable) && + assert((PartEVT.getVectorElementCount().getKnownMinValue() > + ValueVT.getVectorElementCount().getKnownMinValue()) && + (PartEVT.getVectorElementCount().isScalable() == + ValueVT.getVectorElementCount().isScalable()) && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getVectorIdxConstant(0, DL)); @@ -479,7 +435,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // are the same size, this is an obvious bitcast. if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) { + } else if (ValueVT.bitsLT(PartEVT)) { // Bitcast Val back the original type and extract the corresponding // vector we want. unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); @@ -709,14 +665,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else { - if (ValueVT.getVectorNumElements() == 1) { + if (ValueVT.getVectorElementCount().isScalar()) { Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getVectorIdxConstant(0, DL)); } else { - assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && + uint64_t ValueSize = ValueVT.getFixedSizeInBits(); + assert(PartVT.getFixedSizeInBits() > ValueSize && "lossy conversion of vector to scalar type"); - EVT IntermediateType = - EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize); Val = DAG.getBitcast(IntermediateType, Val); Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } @@ -749,15 +705,15 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() && "Mixing scalable and fixed vectors when copying in parts"); - ElementCount DestEltCnt; + Optional<ElementCount> DestEltCnt; if (IntermediateVT.isVector()) DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates; else - DestEltCnt = ElementCount(NumIntermediates, false); + DestEltCnt = ElementCount::getFixed(NumIntermediates); EVT BuiltVectorTy = EVT::getVectorVT( - *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt); + *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue()); if (ValueVT != BuiltVectorTy) { if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) Val = Widened; @@ -1001,7 +957,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, // shouldn't try to apply any sort of splitting logic to them. assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && "No 1:1 mapping from clobbers to regs?"); - unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + Register SP = TLI.getStackPointerRegisterToSaveRestore(); (void)SP; for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); @@ -1024,14 +980,14 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -SmallVector<std::pair<unsigned, unsigned>, 4> +SmallVector<std::pair<unsigned, TypeSize>, 4> RegsForValue::getRegsAndSizes() const { - SmallVector<std::pair<unsigned, unsigned>, 4> OutVec; + SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec; unsigned I = 0; for (auto CountAndVT : zip_first(RegCount, RegVTs)) { unsigned RegCount = std::get<0>(CountAndVT); MVT RegisterVT = std::get<1>(CountAndVT); - unsigned RegisterSize = RegisterVT.getSizeInBits(); + TypeSize RegisterSize = RegisterVT.getSizeInBits(); for (unsigned E = I + RegCount; I != E; ++I) OutVec.push_back(std::make_pair(Regs[I], RegisterSize)); } @@ -1140,25 +1096,6 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); - if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) { - // ConstrainedFPIntrinsics handle their own FMF. - if (!isa<ConstrainedFPIntrinsic>(&I)) { - // Propagate the fast-math-flags of this IR instruction to the DAG node that - // maps to this instruction. - // TODO: We could handle all flags (nsw, etc) here. - // TODO: If an IR instruction maps to >1 node, only the final node will have - // flags set. - if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags IncomingFlags; - IncomingFlags.copyFMF(*FPMO); - if (!Node->getFlags().isDefined()) - Node->setFlags(IncomingFlags); - else - Node->intersectFlagsWith(IncomingFlags); - } - } - } - if (!I.isTerminator() && !HasTailCall && !isa<GCStatepointInst>(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); @@ -1204,7 +1141,7 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, if (isMatchingDbgValue(DDI)) salvageUnresolvedDbgValue(DDI); - DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end()); + erase_if(DDIV, isMatchingDbgValue); } } @@ -1577,6 +1514,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) return DAG.getBlockAddress(BA, VT); + if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C)) + return getValue(Equiv->getGlobalValue()); + VectorType *VecTy = cast<VectorType>(V->getType()); // Now that we know the number and type of the elements, get that number of @@ -1624,7 +1564,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType(), getABIRegCopyCC(V)); + Inst->getType(), None); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1697,10 +1637,32 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { } } -// For wasm, there's alwyas a single catch pad attached to a catchswitch, and -// the control flow always stops at the single catch pad, as it does for a -// cleanup pad. In case the exception caught is not of the types the catch pad -// catches, it will be rethrown by a rethrow. +// In wasm EH, even though a catchpad may not catch an exception if a tag does +// not match, it is OK to add only the first unwind destination catchpad to the +// successors, because there will be at least one invoke instruction within the +// catch scope that points to the next unwind destination, if one exists, so +// CFGSort cannot mess up with BB sorting order. +// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic +// call within them, and catchpads only consisting of 'catch (...)' have a +// '__cxa_end_catch' call within them, both of which generate invokes in case +// the next unwind destination exists, i.e., the next unwind destination is not +// the caller.) +// +// Having at most one EH pad successor is also simpler and helps later +// transformations. +// +// For example, +// current: +// invoke void @foo to ... unwind label %catch.dispatch +// catch.dispatch: +// %0 = catchswitch within ... [label %catch.start] unwind label %next +// catch.start: +// ... +// ... in this BB or some other child BB dominated by this BB there will be an +// invoke that points to 'next' BB as an unwind destination +// +// next: ; We don't need to add this to 'current' BB's successor +// ... static void findWasmUnwindDestinations( FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, BranchProbability Prob, @@ -1863,7 +1825,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i != NumValues; ++i) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. - SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); + SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, + TypeSize::Fixed(Offsets[i])); SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) @@ -2144,14 +2107,19 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, } const Instruction *BOp = dyn_cast<Instruction>(Cond); + const Value *BOpOp0, *BOpOp1; // Compute the effective opcode for Cond, taking into account whether it needs // to be inverted, e.g. // and (not (or A, B)), C // gets lowered as // and (and (not A, not B), C) - unsigned BOpc = 0; + Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0; if (BOp) { - BOpc = BOp->getOpcode(); + BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::And + : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::Or + : (Instruction::BinaryOps)0); if (InvertCond) { if (BOpc == Instruction::And) BOpc = Instruction::Or; @@ -2161,11 +2129,11 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, } // If this node is not part of the or/and tree, emit it as a branch. - if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || - BOpc != unsigned(Opc) || !BOp->hasOneUse() || - BOp->getParent() != CurBB->getBasicBlock() || - !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || - !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + // Note that all nodes in the tree should have same opcode. + bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse(); + if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOpOp0, CurBB->getBasicBlock()) || + !InBlock(BOpOp1, CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, InvertCond); return; @@ -2201,15 +2169,15 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewTrueProb = TProb / 2; auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb, InvertCond); + FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1], InvertCond); + FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -2234,15 +2202,15 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewTrueProb = TProb + FProb / 2; auto NewFalseProb = FProb / 2; // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb, InvertCond); + FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1], InvertCond); + FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); } } @@ -2319,16 +2287,20 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // je foo // cmp D, E // jle foo - if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - Instruction::BinaryOps Opcode = BOp->getOpcode(); - Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1); - if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && - !I.hasMetadata(LLVMContext::MD_unpredictable) && - (Opcode == Instruction::And || Opcode == Instruction::Or) && - !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && - match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { - FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - Opcode, + const Instruction *BOp = dyn_cast<Instruction>(CondVal); + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp && + BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) { + Value *Vec; + const Value *BOp0, *BOp1; + Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; + if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::And; + else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::Or; + + if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), getEdgeProbability(BrMBB, Succ1MBB), /*InvertCond=*/false); @@ -2577,7 +2549,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); const Module &M = *ParentBB->getParent()->getFunction().getParent(); - unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); + Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. SDValue GuardVal = DAG.getLoad( @@ -2835,7 +2807,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { case Intrinsic::experimental_gc_statepoint: LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB); break; - case Intrinsic::wasm_rethrow_in_catch: { + case Intrinsic::wasm_rethrow: { // This is usually done in visitTargetIntrinsic, but this intrinsic is // special because it can be invoked, so we manually lower it to a DAG // node here. @@ -2843,7 +2815,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { Ops.push_back(getRoot()); // inchain const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Ops.push_back( - DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(), + DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops)); @@ -3025,20 +2997,6 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } -void SelectionDAGBuilder::visitFSub(const User &I) { - // -0.0 - X --> fneg - Type *Ty = I.getType(); - if (isa<Constant>(I.getOperand(0)) && - I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), - Op2.getValueType(), Op2)); - return; - } - - visitBinary(I, ISD::FSUB); -} - void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { SDNodeFlags Flags; @@ -3054,9 +3012,10 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); } - if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) { + if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) Flags.setExact(ExactOp->isExact()); - } + if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*FPOp); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -3166,10 +3125,14 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); - auto *FPMO = dyn_cast<FPMathOperator>(&I); - if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath) + auto *FPMO = cast<FPMathOperator>(&I); + if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); + SDNodeFlags Flags; + Flags.copyFMF(*FPMO); + SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); @@ -3199,6 +3162,11 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; bool IsUnaryAbs = false; + bool Negate = false; + + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*FPOp); // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { @@ -3259,12 +3227,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) { break; } break; + case SPF_NABS: + Negate = true; + LLVM_FALLTHROUGH; case SPF_ABS: IsUnaryAbs = true; Opc = ISD::ABS; break; - case SPF_NABS: - // TODO: we need to produce sub(0, abs(X)). default: break; } @@ -3291,10 +3260,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) { if (IsUnaryAbs) { for (unsigned i = 0; i != NumValues; ++i) { + SDLoc dl = getCurSDLoc(); + EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i); Values[i] = - DAG.getNode(OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), - SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i)); + if (Negate) + Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), + Values[i]); } } else { for (unsigned i = 0; i != NumValues; ++i) { @@ -3303,7 +3275,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode( OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags); } } @@ -3445,7 +3417,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); - if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS)) N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); setValue(&I, N); @@ -3773,20 +3745,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); auto &TLI = DAG.getTargetLoweringInfo(); - MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS); - MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS); // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. bool IsVectorGEP = I.getType()->isVectorTy(); ElementCount VectorElementCount = IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount() - : ElementCount(0, false); + : ElementCount::getFixed(0); if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); - if (VectorElementCount.Scalable) + if (VectorElementCount.isScalable()) N = DAG.getSplatVector(VT, dl, N); else N = DAG.getSplatBuildVector(VT, dl, N); @@ -3859,7 +3829,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (!IdxN.getValueType().isVector() && IsVectorGEP) { EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorElementCount); - if (VectorElementCount.Scalable) + if (VectorElementCount.isScalable()) IdxN = DAG.getSplatVector(VT, dl, IdxN); else IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); @@ -3900,6 +3870,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } } + MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS); + MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS); + if (IsVectorGEP) { + PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount); + PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount); + } + if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds()) N = DAG.getPtrExtendInReg(N, dl, PtrMemTy); @@ -4196,7 +4173,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { Root = Chain; ChainI = 0; } - SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags); + SDValue Add = + DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); @@ -4358,12 +4336,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_SCALED; + IndexType = ISD::SIGNED_UNSCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, - Ops, MMO, IndexType); + Ops, MMO, IndexType, false); DAG.setRoot(Scatter); setValue(&I, Scatter); } @@ -4411,7 +4389,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { // Do not serialize masked loads of constant memory with anything. MemoryLocation ML; if (VT.isScalableVector()) - ML = MemoryLocation(PtrOperand); + ML = MemoryLocation::getAfter(PtrOperand); else ML = MemoryLocation(PtrOperand, LocationSize::precise( DAG.getDataLayout().getTypeStoreSize(I.getType())), @@ -4469,12 +4447,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_SCALED; + IndexType = ISD::SIGNED_UNSCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, - Ops, MMO, IndexType); + Ops, MMO, IndexType, ISD::NON_EXTLOAD); PendingLoads.push_back(Gather.getValue(1)); setValue(&I, Gather); @@ -4901,7 +4879,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4917,13 +4895,13 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags); } /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5016,13 +4994,13 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags); } /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5113,13 +5091,13 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags); } /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5203,25 +5181,26 @@ static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags); } /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. - return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const TargetLowering &TLI) { + SelectionDAG &DAG, const TargetLowering &TLI, + SDNodeFlags Flags) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -5244,7 +5223,7 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, } // No special expansion. - return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags); } /// ExpandPowI - Expand a llvm.powi intrinsic. @@ -5369,7 +5348,7 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, // getUnderlyingArgRegs - Find underlying registers used for a truncated, // bitcasted, or split argument. Returns a list of <Register, size in bits> static void -getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs, +getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs, const SDValue &N) { switch (N.getOpcode()) { case ISD::CopyFromReg: { @@ -5480,7 +5459,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (FI != std::numeric_limits<int>::max()) Op = MachineOperand::CreateFI(FI); - SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes; + SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes; if (!Op && N.getNode()) { getUnderlyingArgRegs(ArgRegsAndSizes, N); Register Reg; @@ -5510,8 +5489,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Op) { // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg - auto splitMultiRegDbgValue - = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) { + auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>> + SplitRegs) { unsigned Offset = 0; for (auto RegAndSize : SplitRegs) { // If the expression is already a fragment, the current register @@ -5555,7 +5534,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, - V->getType(), getABIRegCopyCC(V)); + V->getType(), None); if (RFV.occupiesMultipleRegs()) { splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; @@ -5665,6 +5644,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DebugLoc dl = getCurDebugLoc(); SDValue Res; + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*FPOp); + switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. @@ -6079,23 +6062,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(1)), DAG)); return; case Intrinsic::log: - setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log2: - setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log10: - setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp: - setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp2: - setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, TLI)); + getValue(I.getArgOperand(1)), DAG, TLI, Flags)); return; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -6128,7 +6114,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); + getValue(I.getArgOperand(0)), Flags)); return; } case Intrinsic::lround: @@ -6153,44 +6139,47 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maxnum: setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::minimum: setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maximum: setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::fma: - setValue(&I, DAG.getNode(ISD::FMA, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + setValue(&I, DAG.getNode( + ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); return; #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); return; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#include "llvm/IR/VPIntrinsics.def" + visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I)); + return; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -6199,17 +6188,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. - SDValue Mul = DAG.getNode(ISD::FMUL, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1))); + SDValue Mul = DAG.getNode( + ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags); SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), - Mul, - getValue(I.getArgOperand(2))); + Mul, getValue(I.getArgOperand(2)), Flags); setValue(&I, Add); } return; @@ -6227,6 +6214,20 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); return; + case Intrinsic::fptosi_sat: { + EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, Type, + getValue(I.getArgOperand(0)), SatW)); + return; + } + case Intrinsic::fptoui_sat: { + EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, Type, + getValue(I.getArgOperand(0)), SatW)); + return; + } case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); @@ -6279,62 +6280,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Y = getValue(I.getArgOperand(1)); SDValue Z = getValue(I.getArgOperand(2)); EVT VT = X.getValueType(); - SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT); - SDValue Zero = DAG.getConstant(0, sdl, VT); - SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); - - auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; - if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { - setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); - return; - } - // When X == Y, this is rotate. If the data type has a power-of-2 size, we - // avoid the select that is necessary in the general case to filter out - // the 0-shift possibility that leads to UB. - if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) { + if (X == Y) { auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; - if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { - setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); - return; - } - - // Some targets only rotate one way. Try the opposite direction. - RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL; - if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { - // Negate the shift amount because it is safe to ignore the high bits. - SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); - setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt)); - return; - } - - // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW)) - // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW)) - SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); - SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC); - SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt); - SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt); - setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY)); - return; + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); + } else { + auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; + setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); } - - // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) - // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) - SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt); - SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt); - SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt); - SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY); - - // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, - // and that is undefined. We must compare and select to avoid UB. - EVT CCVT = MVT::i1; - if (VT.isVector()) - CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements()); - - // For fshl, 0-shift returns the 1st arg (X). - // For fshr, 0-shift returns the 2nd arg (Y). - SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); - setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or)); return; } case Intrinsic::sadd_sat: { @@ -6361,6 +6314,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } + case Intrinsic::sshl_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::ushl_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } case Intrinsic::smul_fix: case Intrinsic::umul_fix: case Intrinsic::smul_fix_sat: @@ -6383,6 +6348,36 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op1, Op2, Op3, DAG, TLI)); return; } + case Intrinsic::smax: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::smin: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::umax: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::umin: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::abs: { + // TODO: Preserve "int min is poison" arg in SDAG? + SDValue Op1 = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1)); + return; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -6401,7 +6396,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Result type for @llvm.get.dynamic.area.offset should match PtrTy for // target. - if (PtrTy.getSizeInBits() < ResTy.getSizeInBits()) + if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits()) report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" " intrinsic!"); Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), @@ -6419,7 +6414,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } else { EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Global = TLI.getSDagStackGuard(M); - unsigned Align = DL->getPrefTypeAlignment(Global->getType()); + Align Align = DL->getPrefTypeAlign(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); @@ -6450,9 +6445,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), FI), - /* Alignment = */ 0, MachineMemOperand::MOVolatile); + Res = DAG.getStore( + Chain, sdl, Src, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + MaybeAlign(), MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); return; @@ -6470,10 +6466,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return; + case Intrinsic::assume: + case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::var_annotation: case Intrinsic::sideeffect: - // Discard annotate attributes, assumptions, and artificial side-effects. + // Discard annotate attributes, noalias scope declarations, assumptions, and + // artificial side-effects. return; case Intrinsic::codeview_annotation: { @@ -6534,6 +6533,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, getValue(I.getArgOperand(0))); return; + case Intrinsic::ubsantrap: case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = @@ -6541,12 +6541,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, .getAttribute(AttributeList::FunctionIndex, "trap-func-name") .getValueAsString(); if (TrapFuncName.empty()) { - ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? - ISD::TRAP : ISD::DEBUGTRAP; - DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); + switch (Intrinsic) { + case Intrinsic::trap: + DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot())); + break; + case Intrinsic::debugtrap: + DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot())); + break; + case Intrinsic::ubsantrap: + DAG.setRoot(DAG.getNode( + ISD::UBSANTRAP, sdl, MVT::Other, getRoot(), + DAG.getTargetConstant( + cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl, + MVT::i32))); + break; + default: llvm_unreachable("unknown trap intrinsic"); + } return; } TargetLowering::ArgListTy Args; + if (Intrinsic == Intrinsic::ubsantrap) { + Args.push_back(TargetLoweringBase::ArgListEntry()); + Args[0].Val = I.getArgOperand(0); + Args[0].Node = getValue(Args[0].Val); + Args[0].Ty = Args[0].Val->getType(); + } TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( @@ -6583,7 +6602,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, EVT OverflowVT = MVT::i1; if (ResultVT.isVector()) OverflowVT = EVT::getVectorVT( - *Context, OverflowVT, ResultVT.getVectorNumElements()); + *Context, OverflowVT, ResultVT.getVectorElementCount()); SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); @@ -6621,7 +6640,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, cast<ConstantInt>(I.getArgOperand(0))->getSExtValue(); Value *const ObjectPtr = I.getArgOperand(1); SmallVector<const Value *, 4> Allocas; - GetUnderlyingObjects(ObjectPtr, Allocas, *DL); + getUnderlyingObjects(ObjectPtr, Allocas); for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -6648,6 +6667,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } return; } + case Intrinsic::pseudoprobe: { + auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); + auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); + Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr); + DAG.setRoot(Res); + return; + } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); @@ -6758,7 +6785,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // specific calling convention, and only for x86_64. // FIXME: Support other platforms later. const auto &Triple = DAG.getTarget().getTargetTriple(); - if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + if (Triple.getArch() != Triple::x86_64) return; SDLoc DL = getCurSDLoc(); @@ -6789,7 +6816,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // specific calling convention, and only for x86_64. // FIXME: Support other platforms later. const auto &Triple = DAG.getTarget().getTargetTriple(); - if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + if (Triple.getArch() != Triple::x86_64) return; SDLoc DL = getCurSDLoc(); @@ -6823,19 +6850,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, LowerDeoptimizeCall(&I); return; - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: visitVectorReduce(I, Intrinsic); return; @@ -6923,36 +6950,57 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::get_active_lane_mask: { auto DL = getCurSDLoc(); SDValue Index = getValue(I.getOperand(0)); - SDValue BTC = getValue(I.getOperand(1)); + SDValue TripCount = getValue(I.getOperand(1)); Type *ElementTy = I.getOperand(0)->getType(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned VecWidth = VT.getVectorNumElements(); - SmallVector<SDValue, 16> OpsBTC; + SmallVector<SDValue, 16> OpsTripCount; SmallVector<SDValue, 16> OpsIndex; SmallVector<SDValue, 16> OpsStepConstants; for (unsigned i = 0; i < VecWidth; i++) { - OpsBTC.push_back(BTC); + OpsTripCount.push_back(TripCount); OpsIndex.push_back(Index); - OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy))); + OpsStepConstants.push_back( + DAG.getConstant(i, DL, EVT::getEVT(ElementTy))); } - EVT CCVT = MVT::i1; - CCVT = EVT::getVectorVT(I.getContext(), CCVT, VecWidth); + EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth); - auto VecTy = MVT::getVT(FixedVectorType::get(ElementTy, VecWidth)); + auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth)); SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex); SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants); SDValue VectorInduction = DAG.getNode( ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); - SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC); + SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount); SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0), - VectorBTC, ISD::CondCode::SETULE); + VectorTripCount, ISD::CondCode::SETULT); setValue(&I, DAG.getNode(ISD::AND, DL, CCVT, DAG.getNOT(DL, VectorInduction.getValue(1), CCVT), SetCC)); return; } + case Intrinsic::experimental_vector_insert: { + auto DL = getCurSDLoc(); + + SDValue Vec = getValue(I.getOperand(0)); + SDValue SubVec = getValue(I.getOperand(1)); + SDValue Index = getValue(I.getOperand(2)); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec, + Index)); + return; + } + case Intrinsic::experimental_vector_extract: { + auto DL = getCurSDLoc(); + + SDValue Vec = getValue(I.getOperand(0)); + SDValue Index = getValue(I.getOperand(1)); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); + return; + } } } @@ -7068,6 +7116,41 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( setValue(&FPI, FPResult); } +static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { + Optional<unsigned> ResOPC; + switch (VPIntrin.getIntrinsicID()) { +#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN: +#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID; +#define END_REGISTER_VP_INTRINSIC(...) break; +#include "llvm/IR/VPIntrinsics.def" + } + + if (!ResOPC.hasValue()) + llvm_unreachable( + "Inconsistency: no SDNode available for this VPIntrinsic!"); + + return ResOPC.getValue(); +} + +void SelectionDAGBuilder::visitVectorPredicationIntrinsic( + const VPIntrinsic &VPIntrin) { + unsigned Opcode = getISDForVPIntrinsic(VPIntrin); + + SmallVector<EVT, 4> ValueVTs; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs); + SDVTList VTs = DAG.getVTList(ValueVTs); + + // Request operands. + SmallVector<SDValue, 7> OpValues; + for (int i = 0; i < (int)VPIntrin.getNumArgOperands(); ++i) + OpValues.push_back(getValue(VPIntrin.getArgOperand(i))); + + SDLoc DL = getCurSDLoc(); + SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); + setValue(&VPIntrin, Result); +} + std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { @@ -7284,9 +7367,9 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, } SDValue Ptr = Builder.getValue(PtrVal); - SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, - Ptr, MachinePointerInfo(PtrVal), - /* Alignment = */ 1); + SDValue LoadVal = + Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, + MachinePointerInfo(PtrVal), Align(1)); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); @@ -7307,12 +7390,12 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, setValue(&I, Value); } -/// See if we can lower a memcmp call into an optimized form. If so, return +/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return /// true and lower it. Otherwise return false, and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. -bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { +bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); @@ -7563,8 +7646,12 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast<FPMathOperator>(I)); + SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); + setValue(&I, + DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags)); return true; } @@ -7579,10 +7666,13 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast<FPMathOperator>(I)); + SDValue Tmp0 = getValue(I.getArgOperand(0)); SDValue Tmp1 = getValue(I.getArgOperand(1)); EVT VT = Tmp0.getValueType(); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags)); return true; } @@ -7616,6 +7706,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; + case LibFunc_bcmp: + if (visitMemCmpBCmpCall(I)) + return; + break; case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: @@ -7717,7 +7811,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; break; case LibFunc_memcmp: - if (visitMemCmpCall(I)) + if (visitMemCmpBCmpCall(I)) return; break; case LibFunc_mempcpy: @@ -8137,10 +8231,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpInfo.ConstraintVT = - OpInfo - .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout()) - .getSimpleVT(); + EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, + DAG.getDataLayout()); + OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other; } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { // The return value of the call is this value. As such, there is no // corresponding argument. @@ -8402,7 +8495,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant( ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); - AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); + llvm::append_range(AsmNodeOperands, Ops); break; } @@ -8982,57 +9075,59 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, SDLoc dl = getCurSDLoc(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Res; - FastMathFlags FMF; - if (isa<FPMathOperator>(I)) - FMF = I.getFastMathFlags(); + SDNodeFlags SDFlags; + if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) + SDFlags.copyFMF(*FPMO); switch (Intrinsic) { - case Intrinsic::experimental_vector_reduce_v2_fadd: - if (FMF.allowReassoc()) + case Intrinsic::vector_reduce_fadd: + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FADD, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags); break; - case Intrinsic::experimental_vector_reduce_v2_fmul: - if (FMF.allowReassoc()) + case Intrinsic::vector_reduce_fmul: + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags); break; - case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::vector_reduce_add: Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::vector_reduce_mul: Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::vector_reduce_and: Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::vector_reduce_or: Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::vector_reduce_xor: Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::vector_reduce_smax: Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::vector_reduce_smin: Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::vector_reduce_umax: Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::vector_reduce_umin: Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_fmax: - Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1); + case Intrinsic::vector_reduce_fmax: + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; - case Intrinsic::experimental_vector_reduce_fmin: - Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1); + case Intrinsic::vector_reduce_fmin: + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; default: llvm_unreachable("Unhandled vector reduce intrinsic"); @@ -9119,6 +9214,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSRet = true; Entry.IsNest = false; Entry.IsByVal = false; + Entry.IsByRef = false; Entry.IsReturned = false; Entry.IsSwiftSelf = false; Entry.IsSwiftError = false; @@ -9239,6 +9335,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setCFGuardTarget(); if (Args[i].IsByVal) Flags.setByVal(); + if (Args[i].IsByRef) + Flags.setByRef(); if (Args[i].IsPreallocated) { Flags.setPreallocated(); // Set the byval flag for CCAssignFn callbacks that don't know about @@ -9444,11 +9542,33 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { return std::make_pair(Res, CLI.Chain); } +/// Places new result values for the node in Results (their number +/// and types must exactly match those of the original return values of +/// the node), or leaves Results empty, which indicates that the node is not +/// to be custom lowered after all. void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { - if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + + if (!Res.getNode()) + return; + + // If the original node has one result, take the return value from + // LowerOperation as is. It might not be result number 0. + if (N->getNumValues() == 1) { Results.push_back(Res); + return; + } + + // If the original node has multiple results, then the return node should + // have the same number of results. + assert((N->getNumValues() == Res->getNumValues()) && + "Lowering returned the wrong number of results!"); + + // Places new result values base on N result number. + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + Results.push_back(Res.getValue(I)); } SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -9571,7 +9691,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, // initializes the alloca. Don't elide copies from the same argument twice. const Value *Val = SI->getValueOperand()->stripPointerCasts(); const auto *Arg = dyn_cast<Argument>(Val); - if (!Arg || Arg->hasPassPointeeByValueAttr() || + if (!Arg || Arg->hasPassPointeeByValueCopyAttr() || Arg->getType()->isEmptyTy() || DL.getTypeStoreSize(Arg->getType()) != DL.getTypeAllocSize(AI->getAllocatedType()) || @@ -9752,6 +9872,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setSwiftError(); if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); + if (Arg.hasAttribute(Attribute::ByRef)) + Flags.setByRef(); if (Arg.hasAttribute(Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about @@ -9770,27 +9892,31 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // preallocated handling in the various CC lowering callbacks. Flags.setByVal(); } - if (F.getCallingConv() == CallingConv::X86_INTR) { - // IA Interrupt passes frame (1st parameter) by value in the stack. - if (ArgNo == 0) - Flags.setByVal(); - } - if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { - Type *ElementTy = Arg.getParamByValType(); - // For ByVal, size and alignment should be passed from FE. BE will - // guess if this info is not there but there are cases it cannot get - // right. - unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType()); - Flags.setByValSize(FrameSize); + Type *ArgMemTy = nullptr; + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() || + Flags.isByRef()) { + if (!ArgMemTy) + ArgMemTy = Arg.getPointeeInMemoryValueType(); - unsigned FrameAlign; - if (Arg.getParamAlignment()) - FrameAlign = Arg.getParamAlignment(); - else - FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(Align(FrameAlign)); + uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy); + + // For in-memory arguments, size and alignment should be passed from FE. + // BE will guess if this info is not there but there are cases it cannot + // get right. + MaybeAlign MemAlign = Arg.getParamAlign(); + if (!MemAlign) + MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL)); + + if (Flags.isByRef()) { + Flags.setByRefSize(MemSize); + Flags.setByRefAlign(*MemAlign); + } else { + Flags.setByValSize(MemSize); + Flags.setByValAlign(*MemAlign); + } } + if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) @@ -10667,8 +10793,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { - SwitchWorkListItem W = WorkList.back(); - WorkList.pop_back(); + SwitchWorkListItem W = WorkList.pop_back_val(); unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f0b7fb0d5229..8f6e98c40161 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,7 +18,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" @@ -26,7 +25,6 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/Statepoint.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" @@ -39,6 +37,7 @@ namespace llvm { +class AAResults; class AllocaInst; class AtomicCmpXchgInst; class AtomicRMWInst; @@ -63,6 +62,7 @@ class FunctionLoweringInfo; class GCFunctionInfo; class GCRelocateInst; class GCResultInst; +class GCStatepointInst; class IndirectBrInst; class InvokeInst; class LandingPadInst; @@ -388,7 +388,7 @@ public: SelectionDAG &DAG; const DataLayout *DL = nullptr; - AliasAnalysis *AA = nullptr; + AAResults *AA = nullptr; const TargetLibraryInfo *LibInfo; class SDAGSwitchLowering : public SwitchCG::SwitchLowering { @@ -442,7 +442,7 @@ public: SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), SwiftError(swifterror) {} - void init(GCFunctionInfo *gfi, AliasAnalysis *AA, + void init(GCFunctionInfo *gfi, AAResults *AA, const TargetLibraryInfo *li); /// Clear out the current SelectionDAG and the associated state and prepare @@ -518,13 +518,6 @@ public: SDValue getValue(const Value *V); - /// Return the SDNode for the specified IR value if it exists. - SDNode *getNodeForIRValue(const Value *V) { - if (NodeMap.find(V) == NodeMap.end()) - return nullptr; - return NodeMap[V].getNode(); - } - SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); @@ -692,7 +685,7 @@ private: void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } void visitSub(const User &I) { visitBinary(I, ISD::SUB); } - void visitFSub(const User &I); + void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); } void visitMul(const User &I) { visitBinary(I, ISD::MUL); } void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } void visitURem(const User &I) { visitBinary(I, ISD::UREM); } @@ -747,7 +740,7 @@ private: void visitFence(const FenceInst &I); void visitPHI(const PHINode &I); void visitCall(const CallInst &I); - bool visitMemCmpCall(const CallInst &I); + bool visitMemCmpBCmpCall(const CallInst &I); bool visitMemPCpyCall(const CallInst &I); bool visitMemChrCall(const CallInst &I); bool visitStrCpyCall(const CallInst &I, bool isStpcpy); @@ -766,6 +759,7 @@ private: void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); + void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin); void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); @@ -902,7 +896,7 @@ struct RegsForValue { } /// Return a list of registers and their sizes. - SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const; + SmallVector<std::pair<unsigned, TypeSize>, 4> getRegsAndSizes() const; }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 42e3016e65b8..d867f3e09e9c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -293,6 +293,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; case ISD::ADDCARRY: return "addcarry"; + case ISD::SADDO_CARRY: return "saddo_carry"; case ISD::SADDO: return "saddo"; case ISD::UADDO: return "uaddo"; case ISD::SSUBO: return "ssubo"; @@ -302,6 +303,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SUBC: return "subc"; case ISD::SUBE: return "sube"; case ISD::SUBCARRY: return "subcarry"; + case ISD::SSUBO_CARRY: return "ssubo_carry"; case ISD::SHL_PARTS: return "shl_parts"; case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; @@ -310,6 +312,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UADDSAT: return "uaddsat"; case ISD::SSUBSAT: return "ssubsat"; case ISD::USUBSAT: return "usubsat"; + case ISD::SSHLSAT: return "sshlsat"; + case ISD::USHLSAT: return "ushlsat"; case ISD::SMULFIX: return "smulfix"; case ISD::SMULFIXSAT: return "smulfixsat"; @@ -344,6 +348,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint"; + case ISD::FP_TO_SINT_SAT: return "fp_to_sint_sat"; + case ISD::FP_TO_UINT_SAT: return "fp_to_uint_sat"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; @@ -390,8 +396,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; case ISD::DEBUGTRAP: return "debugtrap"; + case ISD::UBSANTRAP: return "ubsantrap"; case ISD::LIFETIME_START: return "lifetime.start"; case ISD::LIFETIME_END: return "lifetime.end"; + case ISD::PSEUDO_PROBE: + return "pseudoprobe"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; @@ -410,6 +419,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; case ISD::CTLZ: return "ctlz"; case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + case ISD::PARITY: return "parity"; // Trampolines case ISD::INIT_TRAMPOLINE: return "init_trampoline"; @@ -447,9 +457,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETFALSE2: return "setfalse2"; } case ISD::VECREDUCE_FADD: return "vecreduce_fadd"; - case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd"; + case ISD::VECREDUCE_SEQ_FADD: return "vecreduce_seq_fadd"; case ISD::VECREDUCE_FMUL: return "vecreduce_fmul"; - case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul"; + case ISD::VECREDUCE_SEQ_FMUL: return "vecreduce_seq_fmul"; case ISD::VECREDUCE_ADD: return "vecreduce_add"; case ISD::VECREDUCE_MUL: return "vecreduce_mul"; case ISD::VECREDUCE_AND: return "vecreduce_and"; @@ -461,6 +471,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VECREDUCE_UMIN: return "vecreduce_umin"; case ISD::VECREDUCE_FMAX: return "vecreduce_fmax"; case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; + + // Vector Predication +#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ + case ISD::SDID: \ + return #NAME; +#include "llvm/IR/VPIntrinsics.def" } } @@ -730,7 +746,38 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ", compressing"; OS << ">"; - } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + } else if (const auto *MGather = dyn_cast<MaskedGatherSDNode>(this)) { + OS << "<"; + printMemOperand(OS, *MGather->getMemOperand(), G); + + bool doExt = true; + switch (MGather->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << MGather->getMemoryVT().getEVTString(); + + auto Signed = MGather->isIndexSigned() ? "signed" : "unsigned"; + auto Scaled = MGather->isIndexScaled() ? "scaled" : "unscaled"; + OS << ", " << Signed << " " << Scaled << " offset"; + + OS << ">"; + } else if (const auto *MScatter = dyn_cast<MaskedScatterSDNode>(this)) { + OS << "<"; + printMemOperand(OS, *MScatter->getMemOperand(), G); + + if (MScatter->isTruncatingStore()) + OS << ", trunc to " << MScatter->getMemoryVT().getEVTString(); + + auto Signed = MScatter->isIndexSigned() ? "signed" : "unsigned"; + auto Scaled = MScatter->isIndexScaled() ? "scaled" : "unscaled"; + OS << ", " << Signed << " " << Scaled << " offset"; + + OS << ">"; + } else if (const MemSDNode *M = dyn_cast<MemSDNode>(this)) { OS << "<"; printMemOperand(OS, *M->getMemOperand(), G); OS << ">"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1f0432196a2d..7bae5048fc0e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -75,6 +75,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -778,6 +779,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + if (ViewDAGCombine1 && MatchFilterBB) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -788,16 +794,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + // Second step, hack on the DAG until it only uses operations and types that // the target supports. if (ViewLegalizeTypesDAGs && MatchFilterBB) @@ -810,16 +816,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + // Only allow creation of legal node types. CurDAG->NewNodesMustHaveLegalTypes = true; @@ -834,15 +840,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); + +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif } { @@ -857,6 +863,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + { NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, GroupDescription, TimePassesIsEnabled); @@ -868,6 +879,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); @@ -898,16 +914,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + if (ViewDAGCombine2 && MatchFilterBB) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -918,16 +934,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -1251,6 +1267,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() { BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); + // If the unwinder does not preserve all registers, ensure that the + // function marks the clobbered registers as used. + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF)) + MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask); + if (Pers == EHPersonality::Wasm_CXX) { if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) mapWasmLandingPadIndex(MBB, CPI); @@ -2072,7 +2094,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); - Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); + llvm::append_range(Ops, SelOps); i += 2; } } @@ -2272,7 +2294,7 @@ void SelectionDAGISel::Select_FREEZE(SDNode *N) { } /// GetVBR - decode a vbr encoding whose top bit is set. -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t +LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { assert(Val >= 128 && "Not a VBR"); Val &= 127; // Remove first vbr bit. @@ -2331,7 +2353,7 @@ void SelectionDAGISel::UpdateChains( // If the node became dead and we haven't already seen it, delete it. if (ChainNode != NodeToMatch && ChainNode->use_empty() && - !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) + !llvm::is_contained(NowDeadNodes, ChainNode)) NowDeadNodes.push_back(ChainNode); } } @@ -2469,10 +2491,9 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, } /// CheckSame - Implements OP_CheckSame. -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool -CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, - const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) { +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes) { // Accept if it is exactly the same as a previously recorded node. unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); @@ -2480,11 +2501,10 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, } /// CheckChildSame - Implements OP_CheckChildXSame. -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool -CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, - const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes, - unsigned ChildNo) { +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame( + const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes, + unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), @@ -2492,20 +2512,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, } /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel) { return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel, SDNode *N) { return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDNode *N) { uint16_t Opc = MatcherTable[MatcherIndex++]; @@ -2513,7 +2533,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N->getOpcode() == Opc; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; @@ -2523,7 +2543,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL, unsigned ChildNo) { @@ -2533,14 +2553,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { return cast<CondCodeSDNode>(N)->get() == (ISD::CondCode)MatcherTable[MatcherIndex++]; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { if (2 >= N.getNumOperands()) @@ -2548,7 +2568,7 @@ CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2)); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; @@ -2559,7 +2579,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2570,7 +2590,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, return C && C->getSExtValue() == Val; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) @@ -2578,7 +2598,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2591,9 +2611,9 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool -CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const SelectionDAGISel &SDISel) { +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, + const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); @@ -2786,6 +2806,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::ANNOTATION_LABEL: case ISD::LIFETIME_START: case ISD::LIFETIME_END: + case ISD::PSEUDO_PROBE: NodeToMatch->setNodeId(-1); // Mark selected. return; case ISD::AssertSext: @@ -3181,10 +3202,12 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; continue; case OPC_CheckImmAllOnesV: - if (!ISD::isBuildVectorAllOnes(N.getNode())) break; + if (!ISD::isConstantSplatVectorAllOnes(N.getNode())) + break; continue; case OPC_CheckImmAllZerosV: - if (!ISD::isBuildVectorAllZeros(N.getNode())) break; + if (!ISD::isConstantSplatVectorAllZeros(N.getNode())) + break; continue; case OPC_CheckFoldableChainNode: { @@ -3489,7 +3512,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, auto &Chain = ChainNodesMatched; assert((!E || !is_contained(Chain, N)) && "Chain node replaced during MorphNode"); - Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end()); + llvm::erase_value(Chain, N); }); Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo)); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 2cb57c1d1ccc..0172646c22ec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -14,12 +14,10 @@ #include "StatepointLowering.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -30,7 +28,6 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" @@ -67,6 +64,20 @@ cl::opt<bool> UseRegistersForDeoptValues( "use-registers-for-deopt-values", cl::Hidden, cl::init(false), cl::desc("Allow using registers for non pointer deopt args")); +cl::opt<bool> UseRegistersForGCPointersInLandingPad( + "use-registers-for-gc-values-in-landing-pad", cl::Hidden, cl::init(false), + cl::desc("Allow using registers for gc pointer in landing pad")); + +cl::opt<unsigned> MaxRegistersForGCPointers( + "max-registers-for-gc-values", cl::Hidden, cl::init(0), + cl::desc("Max number of VRegs allowed to pass GC pointer meta args in")); + +cl::opt<bool> AlwaysSpillBase("statepoint-always-spill-base", cl::Hidden, + cl::init(true), + cl::desc("Force spilling of base GC pointers")); + +typedef FunctionLoweringInfo::StatepointRelocationRecord RecordType; + static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops, SelectionDAGBuilder &Builder, uint64_t Value) { SDLoc L = Builder.getCurSDLoc(); @@ -156,14 +167,18 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // Spill location is known for gc relocates if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { - const auto &SpillMap = - Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()]; + const auto &RelocationMap = + Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()]; - auto It = SpillMap.find(Relocate->getDerivedPtr()); - if (It == SpillMap.end()) + auto It = RelocationMap.find(Relocate->getDerivedPtr()); + if (It == RelocationMap.end()) return None; - return It->second; + auto &Record = It->second; + if (Record.type != RecordType::Spill) + return None; + + return Record.payload.FI; } // Look through bitcast instructions. @@ -221,7 +236,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, return None; } - /// Return true if-and-only-if the given SDValue can be lowered as either a /// constant argument or a stack reference. The key point is that the value /// doesn't need to be spilled or tracked as a vreg use. @@ -242,7 +256,6 @@ static bool willLowerDirectly(SDValue Incoming) { Incoming.isUndef()); } - /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. @@ -388,7 +401,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, StoreMMO); MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc)); - + Builder.StatepointLowering.setLocation(Incoming, Loc); } @@ -485,7 +498,10 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot, /// will be set to the last value spilled (if any were). static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, - SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI, + SmallVectorImpl<MachineMemOperand *> &MemRefs, + SmallVectorImpl<SDValue> &GCPtrs, + DenseMap<SDValue, int> &LowerAsVReg, + SelectionDAGBuilder::StatepointLoweringInfo &SI, SelectionDAGBuilder &Builder) { // Lower the deopt and gc arguments for this statepoint. Layout will be: // deopt argument length, deopt arguments.., gc arguments... @@ -531,6 +547,66 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, const bool LiveInDeopt = SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn; + // Decide which deriver pointers will go on VRegs + unsigned MaxVRegPtrs = MaxRegistersForGCPointers.getValue(); + + // Pointers used on exceptional path of invoke statepoint. + // We cannot assing them to VRegs. + SmallSet<SDValue, 8> LPadPointers; + if (!UseRegistersForGCPointersInLandingPad) + if (auto *StInvoke = dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) { + LandingPadInst *LPI = StInvoke->getLandingPadInst(); + for (auto *Relocate : SI.GCRelocates) + if (Relocate->getOperand(0) == LPI) { + LPadPointers.insert(Builder.getValue(Relocate->getBasePtr())); + LPadPointers.insert(Builder.getValue(Relocate->getDerivedPtr())); + } + } + + LLVM_DEBUG(dbgs() << "Deciding how to lower GC Pointers:\n"); + + // List of unique lowered GC Pointer values. + SmallSetVector<SDValue, 16> LoweredGCPtrs; + // Map lowered GC Pointer value to the index in above vector + DenseMap<SDValue, unsigned> GCPtrIndexMap; + + unsigned CurNumVRegs = 0; + + auto canPassGCPtrOnVReg = [&](SDValue SD) { + if (SD.getValueType().isVector()) + return false; + if (LPadPointers.count(SD)) + return false; + return !willLowerDirectly(SD); + }; + + auto processGCPtr = [&](const Value *V) { + SDValue PtrSD = Builder.getValue(V); + if (!LoweredGCPtrs.insert(PtrSD)) + return; // skip duplicates + GCPtrIndexMap[PtrSD] = LoweredGCPtrs.size() - 1; + + assert(!LowerAsVReg.count(PtrSD) && "must not have been seen"); + if (LowerAsVReg.size() == MaxVRegPtrs) + return; + assert(V->getType()->isVectorTy() == PtrSD.getValueType().isVector() && + "IR and SD types disagree"); + if (!canPassGCPtrOnVReg(PtrSD)) { + LLVM_DEBUG(dbgs() << "direct/spill "; PtrSD.dump(&Builder.DAG)); + return; + } + LLVM_DEBUG(dbgs() << "vreg "; PtrSD.dump(&Builder.DAG)); + LowerAsVReg[PtrSD] = CurNumVRegs++; + }; + + // Process derived pointers first to give them more chance to go on VReg. + for (const Value *V : SI.Ptrs) + processGCPtr(V); + for (const Value *V : SI.Bases) + processGCPtr(V); + + LLVM_DEBUG(dbgs() << LowerAsVReg.size() << " pointers will go in vregs\n"); + auto isGCValue = [&](const Value *V) { auto *Ty = V->getType(); if (!Ty->isPtrOrPtrVectorTy()) @@ -542,7 +618,9 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, }; auto requireSpillSlot = [&](const Value *V) { - return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V); + if (isGCValue(V)) + return !LowerAsVReg.count(Builder.getValue(V)); + return !(LiveInDeopt || UseRegistersForDeoptValues); }; // Before we actually start lowering (and allocating spill slots for values), @@ -554,9 +632,17 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, if (requireSpillSlot(V)) reservePreviousStackSlotForValue(V, Builder); } - for (unsigned i = 0; i < SI.Bases.size(); ++i) { - reservePreviousStackSlotForValue(SI.Bases[i], Builder); - reservePreviousStackSlotForValue(SI.Ptrs[i], Builder); + + for (const Value *V : SI.Ptrs) { + SDValue SDV = Builder.getValue(V); + if (!LowerAsVReg.count(SDV)) + reservePreviousStackSlotForValue(V, Builder); + } + + for (const Value *V : SI.Bases) { + SDValue SDV = Builder.getValue(V); + if (!LowerAsVReg.count(SDV)) + reservePreviousStackSlotForValue(V, Builder); } // First, prefix the list with the number of unique values to be @@ -567,6 +653,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // The vm state arguments are lowered in an opaque manner. We do not know // what type of values are contained within. + LLVM_DEBUG(dbgs() << "Lowering deopt state\n"); for (const Value *V : SI.DeoptState) { SDValue Incoming; // If this is a function argument at a static frame index, generate it as @@ -578,78 +665,56 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, } if (!Incoming.getNode()) Incoming = Builder.getValue(V); + LLVM_DEBUG(dbgs() << "Value " << *V + << " requireSpillSlot = " << requireSpillSlot(V) << "\n"); lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs, Builder); } - // Finally, go ahead and lower all the gc arguments. There's no prefixed - // length for this one. After lowering, we'll have the base and pointer - // arrays interwoven with each (lowered) base pointer immediately followed by - // it's (lowered) derived pointer. i.e - // (base[0], ptr[0], base[1], ptr[1], ...) - for (unsigned i = 0; i < SI.Bases.size(); ++i) { - const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), - /*RequireSpillSlot*/ true, Ops, MemRefs, + // Finally, go ahead and lower all the gc arguments. + pushStackMapConstant(Ops, Builder, LoweredGCPtrs.size()); + for (SDValue SDV : LoweredGCPtrs) + lowerIncomingStatepointValue(SDV, !LowerAsVReg.count(SDV), Ops, MemRefs, Builder); - const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), - /*RequireSpillSlot*/ true, Ops, MemRefs, - Builder); - } + // Copy to out vector. LoweredGCPtrs will be empty after this point. + GCPtrs = LoweredGCPtrs.takeVector(); // If there are any explicit spill slots passed to the statepoint, record // them, but otherwise do not do anything special. These are user provided // allocas and give control over placement to the consumer. In this case, // it is the contents of the slot which may get updated, not the pointer to // the alloca + SmallVector<SDValue, 4> Allocas; for (Value *V : SI.GCArgs) { SDValue Incoming = Builder.getValue(V); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { // This handles allocas as arguments to the statepoint assert(Incoming.getValueType() == Builder.getFrameIndexTy() && "Incoming value is a frame index!"); - Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Builder.getFrameIndexTy())); + Allocas.push_back(Builder.DAG.getTargetFrameIndex( + FI->getIndex(), Builder.getFrameIndexTy())); auto &MF = Builder.DAG.getMachineFunction(); auto *MMO = getMachineMemOperand(MF, *FI); MemRefs.push_back(MMO); } } + pushStackMapConstant(Ops, Builder, Allocas.size()); + Ops.append(Allocas.begin(), Allocas.end()); - // Record computed locations for all lowered values. - // This can not be embedded in lowering loops as we need to record *all* - // values, while previous loops account only values with unique SDValues. - const Instruction *StatepointInstr = SI.StatepointInstr; - auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr]; - - for (const GCRelocateInst *Relocate : SI.GCRelocates) { - const Value *V = Relocate->getDerivedPtr(); - SDValue SDV = Builder.getValue(V); - SDValue Loc = Builder.StatepointLowering.getLocation(SDV); - - if (Loc.getNode()) { - SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); - } else { - // Record value as visited, but not spilled. This is case for allocas - // and constants. For this values we can avoid emitting spill load while - // visiting corresponding gc_relocate. - // Actually we do not need to record them in this map at all. - // We do this only to check that we are not relocating any unvisited - // value. - SpillMap[V] = None; - - // Default llvm mechanisms for exporting values which are used in - // different basic blocks does not work for gc relocates. - // Note that it would be incorrect to teach llvm that all relocates are - // uses of the corresponding values so that it would automatically - // export them. Relocates of the spilled values does not use original - // value. - if (Relocate->getParent() != StatepointInstr->getParent()) - Builder.ExportFromCurrentBlock(V); - } + // Now construct GC base/derived map; + pushStackMapConstant(Ops, Builder, SI.Ptrs.size()); + SDLoc L = Builder.getCurSDLoc(); + for (unsigned i = 0; i < SI.Ptrs.size(); ++i) { + SDValue Base = Builder.getValue(SI.Bases[i]); + assert(GCPtrIndexMap.count(Base) && "base not found in index map"); + Ops.push_back( + Builder.DAG.getTargetConstant(GCPtrIndexMap[Base], L, MVT::i64)); + SDValue Derived = Builder.getValue(SI.Ptrs[i]); + assert(GCPtrIndexMap.count(Derived) && "derived not found in index map"); + Ops.push_back( + Builder.DAG.getTargetConstant(GCPtrIndexMap[Derived], L, MVT::i64)); } } @@ -665,6 +730,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( assert(SI.Bases.size() == SI.Ptrs.size() && SI.Ptrs.size() <= SI.GCRelocates.size()); + LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n"); #ifndef NDEBUG for (auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) @@ -672,9 +738,16 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( #endif // Lower statepoint vmstate and gcstate arguments + + // All lowered meta args. SmallVector<SDValue, 10> LoweredMetaArgs; + // Lowered GC pointers (subset of above). + SmallVector<SDValue, 16> LoweredGCArgs; SmallVector<MachineMemOperand*, 16> MemRefs; - lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this); + // Maps derived pointer SDValue to statepoint result of relocated pointer. + DenseMap<SDValue, int> LowerAsVReg; + lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, LoweredGCArgs, LowerAsVReg, + SI, *this); // Now that we've emitted the spills, we need to update the root so that the // call sequence is ordered correctly. @@ -774,7 +847,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( pushStackMapConstant(Ops, *this, Flags); // Insert all vmstate and gcstate arguments - Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end()); + llvm::append_range(Ops, LoweredMetaArgs); // Add register mask from call node Ops.push_back(*RegMaskIt); @@ -788,12 +861,79 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( // Compute return values. Provide a glue output since we consume one as // input. This allows someone else to chain off us as needed. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector<EVT, 8> NodeTys; + for (auto SD : LoweredGCArgs) { + if (!LowerAsVReg.count(SD)) + continue; + NodeTys.push_back(SD.getValueType()); + } + LLVM_DEBUG(dbgs() << "Statepoint has " << NodeTys.size() << " results\n"); + assert(NodeTys.size() == LowerAsVReg.size() && "Inconsistent GC Ptr lowering"); + NodeTys.push_back(MVT::Other); + NodeTys.push_back(MVT::Glue); + unsigned NumResults = NodeTys.size(); MachineSDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); DAG.setNodeMemRefs(StatepointMCNode, MemRefs); + // For values lowered to tied-defs, create the virtual registers. Note that + // for simplicity, we *always* create a vreg even within a single block. + DenseMap<SDValue, Register> VirtRegs; + for (const auto *Relocate : SI.GCRelocates) { + Value *Derived = Relocate->getDerivedPtr(); + SDValue SD = getValue(Derived); + if (!LowerAsVReg.count(SD)) + continue; + + // Handle multiple gc.relocates of the same input efficiently. + if (VirtRegs.count(SD)) + continue; + + SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]); + + auto *RetTy = Relocate->getType(); + Register Reg = FuncInfo.CreateRegs(RetTy); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), Reg, RetTy, None); + SDValue Chain = DAG.getRoot(); + RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr); + PendingExports.push_back(Chain); + + VirtRegs[SD] = Reg; + } + + // Record for later use how each relocation was lowered. This is needed to + // allow later gc.relocates to mirror the lowering chosen. + const Instruction *StatepointInstr = SI.StatepointInstr; + auto &RelocationMap = FuncInfo.StatepointRelocationMaps[StatepointInstr]; + for (const GCRelocateInst *Relocate : SI.GCRelocates) { + const Value *V = Relocate->getDerivedPtr(); + SDValue SDV = getValue(V); + SDValue Loc = StatepointLowering.getLocation(SDV); + + RecordType Record; + if (LowerAsVReg.count(SDV)) { + Record.type = RecordType::VReg; + assert(VirtRegs.count(SDV)); + Record.payload.Reg = VirtRegs[SDV]; + } else if (Loc.getNode()) { + Record.type = RecordType::Spill; + Record.payload.FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + } else { + Record.type = RecordType::NoRelocate; + // If we didn't relocate a value, we'll essentialy end up inserting an + // additional use of the original value when lowering the gc.relocate. + // We need to make sure the value is available at the new use, which + // might be in another block. + if (Relocate->getParent() != StatepointInstr->getParent()) + ExportFromCurrentBlock(V); + } + RelocationMap[V] = Record; + } + + + SDNode *SinkNode = StatepointMCNode; // Build the GC_TRANSITION_END node if necessary. @@ -804,7 +944,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( SmallVector<SDValue, 8> TEOps; // Add chain - TEOps.push_back(SDValue(StatepointMCNode, 0)); + TEOps.push_back(SDValue(StatepointMCNode, NumResults - 2)); // Add GC transition arguments for (const Value *V : SI.GCTransitionArgs) { @@ -814,7 +954,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( } // Add glue - TEOps.push_back(SDValue(StatepointMCNode, 1)); + TEOps.push_back(SDValue(StatepointMCNode, NumResults - 1)); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -825,12 +965,18 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( } // Replace original call - DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root + // Call: ch,glue = CALL ... + // Statepoint: [gc relocates],ch,glue = STATEPOINT ... + unsigned NumSinkValues = SinkNode->getNumValues(); + SDValue StatepointValues[2] = {SDValue(SinkNode, NumSinkValues - 2), + SDValue(SinkNode, NumSinkValues - 1)}; + DAG.ReplaceAllUsesWith(CallNode, StatepointValues); // Remove original call node DAG.DeleteNode(CallNode); - // DON'T set the root - under the assumption that it's already set past the - // inserted node we created. + // Since we always emit CopyToRegs (even for local relocates), we must + // update root, so that they are emitted before any local uses. + (void)getControlRoot(); // TODO: A better future implementation would be to emit a single variable // argument, variable return value STATEPOINT node here and then hookup the @@ -927,7 +1073,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, setValue(&I, ReturnValue); return; } - + // Result value will be used in a different basic block so we need to export // it now. Default exporting mechanism will not work here because statepoint // call has a different type than the actual call. It means that by default @@ -1024,6 +1170,28 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { #endif const Value *DerivedPtr = Relocate.getDerivedPtr(); + auto &RelocationMap = + FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()]; + auto SlotIt = RelocationMap.find(DerivedPtr); + assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value"); + const RecordType &Record = SlotIt->second; + + // If relocation was done via virtual register.. + if (Record.type == RecordType::VReg) { + Register InReg = Record.payload.Reg; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), InReg, Relocate.getType(), + None); // This is not an ABI copy. + // We generate copy to/from regs even for local uses, hence we must + // chain with current root to ensure proper ordering of copies w.r.t. + // statepoint. + SDValue Chain = DAG.getRoot(); + SDValue Relocation = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), + Chain, nullptr, nullptr); + setValue(&Relocate, Relocation); + return; + } + SDValue SD = getValue(DerivedPtr); if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) { @@ -1033,19 +1201,17 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { return; } - auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()]; - auto SlotIt = SpillMap.find(DerivedPtr); - assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value"); - Optional<int> DerivedPtrLocation = SlotIt->second; // We didn't need to spill these special cases (constants and allocas). // See the handling in spillIncomingValueForStatepoint for detail. - if (!DerivedPtrLocation) { + if (Record.type == RecordType::NoRelocate) { setValue(&Relocate, SD); return; } - unsigned Index = *DerivedPtrLocation; + assert(Record.type == RecordType::Spill); + + unsigned Index = Record.payload.FI;; SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); // All the reloads are independent and are reading memory only modified by diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 96df20039b15..5760132e44a0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -93,7 +93,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, SDValue Value = OutVals[I]; if (Value->getOpcode() != ISD::CopyFromReg) return false; - MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); + Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); if (MRI.getLiveInPhysReg(ArgReg) != Reg) return false; } @@ -250,7 +250,7 @@ bool TargetLowering::findOptimalMemOpLowering( bool Fast; if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && allowsMisalignedMemoryAccesses( - VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 1, MachineMemOperand::MONone, &Fast) && Fast) VTSize = Size; @@ -912,8 +912,14 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getOpcode() == ISD::Constant) { // We know all of the bits for a constant! - Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); - Known.Zero = ~Known.One; + Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue()); + return false; + } + + if (Op.getOpcode() == ISD::ConstantFP) { + // We know all of the bits for a floating point constant! + Known = KnownBits::makeConstant( + cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()); return false; } @@ -1009,10 +1015,8 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1)) return true; - if (!!DemandedVecElts) { - Known.One &= KnownVec.One; - Known.Zero &= KnownVec.Zero; - } + if (!!DemandedVecElts) + Known = KnownBits::commonBits(Known, KnownVec); return false; } @@ -1037,14 +1041,10 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero.setAllBits(); Known.One.setAllBits(); - if (!!DemandedSubElts) { - Known.One &= KnownSub.One; - Known.Zero &= KnownSub.Zero; - } - if (!!DemandedSrcElts) { - Known.One &= KnownSrc.One; - Known.Zero &= KnownSrc.Zero; - } + if (!!DemandedSubElts) + Known = KnownBits::commonBits(Known, KnownSub); + if (!!DemandedSrcElts) + Known = KnownBits::commonBits(Known, KnownSrc); // Attempt to avoid multi-use src if we don't need anything from it. if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() || @@ -1101,10 +1101,8 @@ bool TargetLowering::SimplifyDemandedBits( Known2, TLO, Depth + 1)) return true; // Known bits are shared by every demanded subvector element. - if (!!DemandedSubElts) { - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - } + if (!!DemandedSubElts) + Known = KnownBits::commonBits(Known, Known2); } break; } @@ -1142,15 +1140,13 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO, Depth + 1)) return true; - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); } if (!!DemandedRHS) { if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO, Depth + 1)) return true; - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); } // Attempt to avoid multi-use ops if we don't need anything from them. @@ -1325,15 +1321,15 @@ bool TargetLowering::SimplifyDemandedBits( return true; // If all of the unknown bits are known to be zero on one side or the other - // (but not both) turn this into an *inclusive* or. + // turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts); if (C) { - // If one side is a constant, and all of the known set bits on the other - // side are also set in the constant, turn this into an AND, as we know + // If one side is a constant, and all of the set bits in the constant are + // also known set on the other side, turn this into an AND, as we know // the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested @@ -1377,8 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Only known if known in both the LHS and RHS. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); break; case ISD::SELECT_CC: if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO, @@ -1395,8 +1390,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Only known if known in both the LHS and RHS. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); break; case ISD::SETCC: { SDValue Op0 = Op.getOperand(0); @@ -1728,6 +1722,32 @@ bool TargetLowering::SimplifyDemandedBits( } break; } + case ISD::UMIN: { + // Check if one arg is always less than (or equal) to the other arg. + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); + KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); + Known = KnownBits::umin(Known0, Known1); + if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1)) + return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1); + if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1)) + return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1); + break; + } + case ISD::UMAX: { + // Check if one arg is always greater than (or equal) to the other arg. + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); + KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); + Known = KnownBits::umax(Known0, Known1); + if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) + return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1); + if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) + return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1); + break; + } case ISD::BITREVERSE: { SDValue Src = Op.getOperand(0); APInt DemandedSrcBits = DemandedBits.reverseBits(); @@ -1748,6 +1768,17 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero = Known2.Zero.byteSwap(); break; } + case ISD::CTPOP: { + // If only 1 bit is demanded, replace with PARITY as long as we're before + // op legalization. + // FIXME: Limit to scalars for now. + if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector()) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, + Op.getOperand(0))); + + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -1858,6 +1889,11 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); Known = Known.zext(BitWidth); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } case ISD::SIGN_EXTEND: @@ -1906,6 +1942,11 @@ bool TargetLowering::SimplifyDemandedBits( if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); } + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } case ISD::ANY_EXTEND: @@ -1945,7 +1986,8 @@ bool TargetLowering::SimplifyDemandedBits( // zero/one bits live out. unsigned OperandBitWidth = Src.getScalarValueSizeInBits(); APInt TruncMask = DemandedBits.zext(OperandBitWidth); - if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO, + Depth + 1)) return true; Known = Known.trunc(BitWidth); @@ -1968,9 +2010,9 @@ bool TargetLowering::SimplifyDemandedBits( // undesirable. break; - SDValue ShAmt = Src.getOperand(1); - auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt); - if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) + const APInt *ShAmtC = + TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts); + if (!ShAmtC) break; uint64_t ShVal = ShAmtC->getZExtValue(); @@ -1982,12 +2024,12 @@ bool TargetLowering::SimplifyDemandedBits( if (!(HighBits & DemandedBits)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. - if (TLO.LegalTypes()) - ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); + SDValue NewShAmt = TLO.DAG.getConstant( + ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes())); SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); return TLO.CombineTo( - Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt)); + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt)); } break; } @@ -2012,10 +2054,14 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::EXTRACT_VECTOR_ELT: { SDValue Src = Op.getOperand(0); SDValue Idx = Op.getOperand(1); - unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount(); unsigned EltBitWidth = Src.getScalarValueSizeInBits(); + if (SrcEltCnt.isScalable()) + return false; + // Demand the bits from every vector element without a constant index. + unsigned NumSrcElts = SrcEltCnt.getFixedValue(); APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) if (CIdx->getAPIntValue().ult(NumSrcElts)) @@ -2229,9 +2275,13 @@ bool TargetLowering::SimplifyDemandedBits( if (C->isOpaque()) return false; } - // TODO: Handle float bits as well. if (VT.isInteger()) return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); + if (VT.isFloatingPoint()) + return TLO.CombineTo( + Op, + TLO.DAG.getConstantFP( + APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT)); } return false; @@ -2593,13 +2643,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero, TLO, Depth + 1)) return true; - KnownUndef.clearBit(Idx); - if (Scl.isUndef()) - KnownUndef.setBit(Idx); + KnownUndef.setBitVal(Idx, Scl.isUndef()); - KnownZero.clearBit(Idx); - if (isNullConstant(Scl) || isNullFPConstant(Scl)) - KnownZero.setBit(Idx); + KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl)); break; } @@ -3347,6 +3393,74 @@ SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(DL, VT, X, YShl1, Cond); } +static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, + SDValue N0, const APInt &C1, + ISD::CondCode Cond, const SDLoc &dl, + SelectionDAG &DAG) { + // Look through truncs that don't change the value of a ctpop. + // FIXME: Add vector support? Need to be careful with setcc result type below. + SDValue CTPOP = N0; + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() && + N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits())) + CTPOP = N0.getOperand(0); + + if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse()) + return SDValue(); + + EVT CTVT = CTPOP.getValueType(); + SDValue CTOp = CTPOP.getOperand(0); + + // If this is a vector CTPOP, keep the CTPOP if it is legal. + // TODO: Should we check if CTPOP is legal(or custom) for scalars? + if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT)) + return SDValue(); + + // (ctpop x) u< 2 -> (x & x-1) == 0 + // (ctpop x) u> 1 -> (x & x-1) != 0 + if (Cond == ISD::SETULT || Cond == ISD::SETUGT) { + unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond); + if (C1.ugt(CostLimit + (Cond == ISD::SETULT))) + return SDValue(); + if (C1 == 0 && (Cond == ISD::SETULT)) + return SDValue(); // This is handled elsewhere. + + unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT); + + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + SDValue Result = CTOp; + for (unsigned i = 0; i < Passes; i++) { + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne); + Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add); + } + ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; + return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC); + } + + // If ctpop is not supported, expand a power-of-2 comparison based on it. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) { + // For scalars, keep CTPOP if it is legal or custom. + if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT)) + return SDValue(); + // This is based on X86's custom lowering for CTPOP which produces more + // instructions than the expansion here. + + // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) + // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) + SDValue Zero = DAG.getConstant(0, dl, CTVT); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + assert(CTVT.isInteger()); + ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); + SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); + SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); + unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; + return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); + } + + return SDValue(); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -3363,8 +3477,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Ensure that the constant occurs on the RHS and fold constant comparisons. // TODO: Handle non-splat vector constants. All undef causes trouble. + // FIXME: We can't yet fold constant scalable vector splats, so avoid an + // infinite loop here when we encounter one. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); if (isConstOrConstSplat(N0) && + (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -3376,75 +3493,46 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && - DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) && - !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } )) + DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) && + !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1})) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); - if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + if (auto *N1C = isConstOrConstSplat(N1)) { const APInt &C1 = N1C->getAPIntValue(); + // Optimize some CTPOP cases. + if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG)) + return V; + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && - N0.getOperand(1).getOpcode() == ISD::Constant) { - const APInt &ShAmt = N0.getConstantOperandAPInt(1); - if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - ShAmt == Log2_32(N0.getValueSizeInBits())) { - if ((C1 == 0) == (Cond == ISD::SETEQ)) { - // (srl (ctlz x), 5) == 0 -> X != 0 - // (srl (ctlz x), 5) != 1 -> X != 0 - Cond = ISD::SETNE; - } else { - // (srl (ctlz x), 5) != 0 -> X == 0 - // (srl (ctlz x), 5) == 1 -> X == 0 - Cond = ISD::SETEQ; + isPowerOf2_32(N0.getScalarValueSizeInBits())) { + if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) { + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero, + Cond); } - SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); - return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), - Zero, Cond); } } + } - SDValue CTPOP = N0; - // Look through truncs that don't change the value of a ctpop. - if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) - CTPOP = N0.getOperand(0); - - if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && - (N0 == CTPOP || - N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) { - EVT CTVT = CTPOP.getValueType(); - SDValue CTOp = CTPOP.getOperand(0); - - // (ctpop x) u< 2 -> (x & x-1) == 0 - // (ctpop x) u> 1 -> (x & x-1) != 0 - if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ - SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); - SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); - ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; - return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); - } - - // If ctpop is not supported, expand a power-of-2 comparison based on it. - if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) - // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) - SDValue Zero = DAG.getConstant(0, dl, CTVT); - SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); - assert(CTVT.isInteger()); - ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); - SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); - SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); - SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); - unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; - return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); - } - } + // FIXME: Support vectors. + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); // (zext x) == C --> x == (trunc C) // (sext x) == C --> x == (trunc C) @@ -3578,11 +3666,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { SDValue Ptr = Lod->getBasePtr(); if (bestOffset != 0) - Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl); - unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); - SDValue NewLoad = DAG.getLoad( - newVT, dl, Lod->getChain(), Ptr, - Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign); + Ptr = + DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl); + SDValue NewLoad = + DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), + Lod->getOriginalAlign()); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -3647,7 +3736,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, break; // todo, be more careful with signed comparisons } } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + (Cond == ISD::SETEQ || Cond == ISD::SETNE) && + !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(), + OpVT)) { EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); EVT ExtDstTy = N0.getValueType(); @@ -3656,26 +3747,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the constant doesn't fit into the number of bits for the source of // the sign extension, it is impossible for both sides to be equal. if (C1.getMinSignedBits() > ExtSrcTyBits) - return DAG.getConstant(Cond == ISD::SETNE, dl, VT); + return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); - SDValue ZextOp; - EVT Op0Ty = N0.getOperand(0).getValueType(); - if (Op0Ty == ExtSrcTy) { - ZextOp = N0.getOperand(0); - } else { - APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); - ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), - DAG.getConstant(Imm, dl, Op0Ty)); - } + assert(ExtDstTy == N0.getOperand(0).getValueType() && + ExtDstTy != ExtSrcTy && "Unexpected types!"); + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0), + DAG.getConstant(Imm, dl, ExtDstTy)); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(ZextOp.getNode()); // Otherwise, make this a use of a zext. return DAG.getSetCC(dl, VT, ZextOp, - DAG.getConstant(C1 & APInt::getLowBitsSet( - ExtDstTyBits, - ExtSrcTyBits), - dl, ExtDstTy), - Cond); + DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond); } else if ((N1C->isNullValue() || N1C->isOne()) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC @@ -3699,8 +3782,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && - isa<ConstantSDNode>(N0.getOperand(1)) && - cast<ConstantSDNode>(N0.getOperand(1))->isOne()) { + isOneConstant(N0.getOperand(1))) { // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We // can only do this if the top bits are known zero. unsigned BitWidth = N0.getValueSizeInBits(); @@ -3744,9 +3826,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond); } } - if (Op0.getOpcode() == ISD::AND && - isa<ConstantSDNode>(Op0.getOperand(1)) && - cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) { + if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, @@ -3884,6 +3964,67 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( VT, N0, N1, Cond, DCI, dl)) return CC; + + // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y). + // For example, when high 32-bits of i64 X are known clear: + // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 + // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1 + bool CmpZero = N1C->getAPIntValue().isNullValue(); + bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue(); + if ((CmpZero || CmpNegOne) && N0.hasOneUse()) { + // Match or(lo,shl(hi,bw/2)) pattern. + auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) { + unsigned EltBits = V.getScalarValueSizeInBits(); + if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0) + return false; + SDValue LHS = V.getOperand(0); + SDValue RHS = V.getOperand(1); + APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2); + // Unshifted element must have zero upperbits. + if (RHS.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(RHS.getOperand(1)) && + RHS.getConstantOperandAPInt(1) == (EltBits / 2) && + DAG.MaskedValueIsZero(LHS, HiBits)) { + Lo = LHS; + Hi = RHS.getOperand(0); + return true; + } + if (LHS.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(LHS.getOperand(1)) && + LHS.getConstantOperandAPInt(1) == (EltBits / 2) && + DAG.MaskedValueIsZero(RHS, HiBits)) { + Lo = RHS; + Hi = LHS.getOperand(0); + return true; + } + return false; + }; + + auto MergeConcat = [&](SDValue Lo, SDValue Hi) { + unsigned EltBits = N0.getScalarValueSizeInBits(); + unsigned HalfBits = EltBits / 2; + APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits); + SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT); + SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits); + SDValue NewN0 = + DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask); + SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits; + return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond); + }; + + SDValue Lo, Hi; + if (IsConcat(N0, Lo, Hi)) + return MergeConcat(Lo, Hi); + + if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) { + SDValue Lo0, Lo1, Hi0, Hi1; + if (IsConcat(N0.getOperand(0), Lo0, Hi0) && + IsConcat(N0.getOperand(1), Lo1, Hi1)) { + return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1), + DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1)); + } + } + } } // If we have "setcc X, C0", check to see if we can shrink the immediate @@ -3891,20 +4032,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // TODO: Support this for vectors after legalize ops. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && - C1 == APInt::getSignedMaxValue(OperandBitSize)) + // SETUGE X, SINTMIN -> SETLT X, 0 + if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) || + (Cond == ISD::SETUGE && C1.isMinSignedValue())) return DAG.getSetCC(dl, VT, N0, DAG.getConstant(0, dl, N1.getValueType()), ISD::SETLT); // SETULT X, SINTMIN -> SETGT X, -1 - if (Cond == ISD::SETULT && - C1 == APInt::getSignedMinValue(OperandBitSize)) { - SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, - N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); - } + // SETULE X, SINTMAX -> SETGT X, -1 + if ((Cond == ISD::SETULT && C1.isMinSignedValue()) || + (Cond == ISD::SETULE && C1.isMaxSignedValue())) + return DAG.getSetCC(dl, VT, N0, + DAG.getAllOnesConstant(dl, N1.getValueType()), + ISD::SETGT); } } @@ -3915,8 +4056,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &C1 = N1C->getAPIntValue(); EVT ShValTy = N0.getValueType(); - // Fold bit comparisons when we can. - if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + // Fold bit comparisons when we can. This will result in an + // incorrect value when boolean false is negative one, unless + // the bitsize is 1 in which case the false value is the same + // in practice regardless of the representation. + if ((VT.getSizeInBits() == 1 || + getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE) && (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { @@ -4312,8 +4458,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const { } SDValue TargetLowering::LowerAsmOutputForConstraint( - SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo, - SelectionDAG &DAG) const { + SDValue &Chain, SDValue &Flag, const SDLoc &DL, + const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { return SDValue(); } @@ -4887,9 +5033,15 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, return SDValue(); SDValue Shift, Factor; - if (VT.isVector()) { + if (VT.isFixedLengthVector()) { Shift = DAG.getBuildVector(ShVT, dl, Shifts); Factor = DAG.getBuildVector(VT, dl, Factors); + } else if (VT.isScalableVector()) { + assert(Shifts.size() == 1 && Factors.size() == 1 && + "Expected matchUnaryPredicate to return one element for scalable " + "vectors"); + Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]); + Factor = DAG.getSplatVector(VT, dl, Factors[0]); } else { Shift = Shifts[0]; Factor = Factors[0]; @@ -4982,11 +5134,20 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue MagicFactor, Factor, Shift, ShiftMask; - if (VT.isVector()) { + if (VT.isFixedLengthVector()) { MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); Factor = DAG.getBuildVector(VT, dl, Factors); Shift = DAG.getBuildVector(ShVT, dl, Shifts); ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks); + } else if (VT.isScalableVector()) { + assert(MagicFactors.size() == 1 && Factors.size() == 1 && + Shifts.size() == 1 && ShiftMasks.size() == 1 && + "Expected matchUnaryPredicate to return one element for scalable " + "vectors"); + MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]); + Factor = DAG.getSplatVector(VT, dl, Factors[0]); + Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]); + ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]); } else { MagicFactor = MagicFactors[0]; Factor = Factors[0]; @@ -5100,11 +5261,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue PreShift, PostShift, MagicFactor, NPQFactor; - if (VT.isVector()) { + if (VT.isFixedLengthVector()) { PreShift = DAG.getBuildVector(ShVT, dl, PreShifts); MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors); PostShift = DAG.getBuildVector(ShVT, dl, PostShifts); + } else if (VT.isScalableVector()) { + assert(PreShifts.size() == 1 && MagicFactors.size() == 1 && + NPQFactors.size() == 1 && PostShifts.size() == 1 && + "Expected matchUnaryPredicate to return one for scalable vectors"); + PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]); + MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]); + NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]); + PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]); } else { PreShift = PreShifts[0]; MagicFactor = MagicFactors[0]; @@ -5156,8 +5325,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); Created.push_back(Q.getNode()); + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue One = DAG.getConstant(1, dl, VT); - SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ); + SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ); return DAG.getSelect(dl, VT, IsOne, N0, Q); } @@ -5584,7 +5755,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, return SDValue(); SDValue PVal, AVal, KVal, QVal; - if (VT.isVector()) { + if (VT.isFixedLengthVector()) { if (HadOneDivisor) { // Try to turn PAmts into a splat, since we don't care about the values // that are currently '0'. If we can't, just keep '0'`s. @@ -5603,6 +5774,15 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, AVal = DAG.getBuildVector(VT, DL, AAmts); KVal = DAG.getBuildVector(ShVT, DL, KAmts); QVal = DAG.getBuildVector(VT, DL, QAmts); + } else if (VT.isScalableVector()) { + assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 && + QAmts.size() == 1 && + "Expected matchUnaryPredicate to return one element for scalable " + "vectors"); + PVal = DAG.getSplatVector(VT, DL, PAmts[0]); + AVal = DAG.getSplatVector(VT, DL, AAmts[0]); + KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]); + QVal = DAG.getSplatVector(VT, DL, QAmts[0]); } else { PVal = PAmts[0]; AVal = AAmts[0]; @@ -5697,6 +5877,28 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { return false; } +SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, + const DenormalMode &Mode) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + // Testing it with denormal inputs to avoid wrong estimate. + if (Mode.Input == DenormalMode::IEEE) { + // This is specifically a check for the handling of denormal inputs, + // not the result. + + // Test = fabs(X) < SmallestNormal + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); + SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); + SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); + return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); + } + // Test = X == 0.0 + return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); +} + SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, @@ -5726,6 +5928,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, return SDValue(); } + auto RemoveDeadNode = [&](SDValue N) { + if (N && N.getNode()->use_empty()) + DAG.RemoveDeadNode(N.getNode()); + }; + SDLoc DL(Op); switch (Opcode) { @@ -5804,13 +6011,19 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = CostX; - return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags); + SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags); + if (NegY != N) + RemoveDeadNode(NegY); + return N; } // Negate the Y if it is not expensive. if (NegY) { Cost = CostY; - return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags); + SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags); + if (NegX != N) + RemoveDeadNode(NegX); + return N; } break; } @@ -5847,7 +6060,10 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = CostX; - return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags); + SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags); + if (NegY != N) + RemoveDeadNode(NegY); + return N; } // Ignore X * 2.0 because that is expected to be canonicalized to X + X. @@ -5858,7 +6074,10 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // Negate the Y if it is not expensive. if (NegY) { Cost = CostY; - return DAG.getNode(Opcode, DL, VT, X, NegY, Flags); + SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags); + if (NegX != N) + RemoveDeadNode(NegX); + return N; } break; } @@ -5887,13 +6106,19 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = std::min(CostX, CostZ); - return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags); + SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags); + if (NegY != N) + RemoveDeadNode(NegY); + return N; } // Negate the Y if it is not expensive. if (NegY) { Cost = std::min(CostY, CostZ); - return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags); + SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags); + if (NegX != N) + RemoveDeadNode(NegX); + return N; } break; } @@ -5918,7 +6143,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // Legalization Utilities //===----------------------------------------------------------------------===// -bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, +bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, SelectionDAG &DAG, @@ -5941,8 +6166,6 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, unsigned OuterBitSize = VT.getScalarSizeInBits(); unsigned InnerBitSize = HiLoVT.getScalarSizeInBits(); - unsigned LHSSB = DAG.ComputeNumSignBits(LHS); - unsigned RHSSB = DAG.ComputeNumSignBits(RHS); // LL, LH, RL, and RH must be either all NULL or all set to a value. assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || @@ -5991,8 +6214,9 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, } } - if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize && - RHSSB > InnerBitSize) { + if (!VT.isVector() && Opcode == ISD::MUL && + DAG.ComputeNumSignBits(LHS) > InnerBitSize && + DAG.ComputeNumSignBits(RHS) > InnerBitSize) { // The input values are both sign-extended. // TODO non-MUL case? if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) { @@ -6106,7 +6330,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SDValue LL, SDValue LH, SDValue RL, SDValue RH) const { SmallVector<SDValue, 2> Result; - bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N, + bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N), N->getOperand(0), N->getOperand(1), Result, HiLoVT, DAG, Kind, LL, LH, RL, RH); if (Ok) { @@ -6118,7 +6342,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, } // Check that (every element of) Z is undef or not an exact multiple of BW. -static bool isNonZeroModBitWidth(SDValue Z, unsigned BW) { +static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) { return ISD::matchUnaryPredicate( Z, [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; }, @@ -6145,9 +6369,35 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, EVT ShVT = Z.getValueType(); + // If a funnel shift in the other direction is more supported, use it. + unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL; + if (!isOperationLegalOrCustom(Node->getOpcode(), VT) && + isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) { + if (isNonZeroModBitWidthOrUndef(Z, BW)) { + // fshl X, Y, Z -> fshr X, Y, -Z + // fshr X, Y, Z -> fshl X, Y, -Z + SDValue Zero = DAG.getConstant(0, DL, ShVT); + Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z); + } else { + // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z + // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z + SDValue One = DAG.getConstant(1, DL, ShVT); + if (IsFSHL) { + Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One); + X = DAG.getNode(ISD::SRL, DL, VT, X, One); + } else { + X = DAG.getNode(RevOpcode, DL, VT, X, Y, One); + Y = DAG.getNode(ISD::SHL, DL, VT, Y, One); + } + Z = DAG.getNOT(DL, Z, ShVT); + } + Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z); + return true; + } + SDValue ShX, ShY; SDValue ShAmt, InvShAmt; - if (isNonZeroModBitWidth(Z, BW)) { + if (isNonZeroModBitWidthOrUndef(Z, BW)) { // fshl: X << C | Y >> (BW - C) // fshr: X << (BW - C) | Y >> C // where C = Z % BW is not zero @@ -6187,8 +6437,8 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, } // TODO: Merge with expandFunnelShift. -bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, + SDValue &Result, SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); unsigned EltSizeInBits = VT.getScalarSizeInBits(); bool IsLeft = Node->getOpcode() == ISD::ROTL; @@ -6199,36 +6449,47 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, EVT ShVT = Op1.getValueType(); SDValue Zero = DAG.getConstant(0, DL, ShVT); - assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 && - "Expecting the type bitwidth to be a power of 2"); - // If a rotate in the other direction is supported, use it. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; - if (isOperationLegalOrCustom(RevRot, VT)) { + if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); Result = DAG.getNode(RevRot, DL, VT, Op0, Sub); return true; } - if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || - !isOperationLegalOrCustom(ISD::SRL, VT) || - !isOperationLegalOrCustom(ISD::SUB, VT) || - !isOperationLegalOrCustomOrPromote(ISD::OR, VT) || - !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) + if (!AllowVectorOps && VT.isVector() && + (!isOperationLegalOrCustom(ISD::SHL, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) return false; - // Otherwise, - // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1))) - // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1))) - // unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); - SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); - SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC); - SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC); - Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0), - DAG.getNode(HsOpc, DL, VT, Op0, And1)); + SDValue ShVal; + SDValue HsVal; + if (isPowerOf2_32(EltSizeInBits)) { + // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1)) + // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1)) + SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); + SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC); + ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt); + SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC); + HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt); + } else { + // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w)) + // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w)) + SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); + SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC); + ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt); + SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt); + SDValue One = DAG.getConstant(1, DL, ShVT); + HsVal = + DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt); + } + Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal); return true; } @@ -6247,7 +6508,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, if (Node->isStrictFPOpcode()) // When a NaN is converted to an integer a trap is allowed. We can't // use this expansion here because it would eliminate that trap. Other - // traps are also allowed and cannot be eliminated. See + // traps are also allowed and cannot be eliminated. See // IEEE 754-2008 sec 5.8. return false; @@ -6318,7 +6579,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); // Only expand vector types if we have the appropriate vector bit operations. - unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : + unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : ISD::FP_TO_SINT; if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT))) @@ -6333,14 +6594,19 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { if (Node->isStrictFPOpcode()) { - Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, - { Node->getOperand(0), Src }); + Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), Src }); Chain = Result.getValue(1); } else Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); return true; } + // Don't expand it if there isn't cheap fsub instruction. + if (!isOperationLegalOrCustom( + Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT)) + return false; + SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); SDValue Sel; @@ -6372,9 +6638,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, DAG.getConstant(SignMask, dl, DstVT)); SDValue SInt; if (Node->isStrictFPOpcode()) { - SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, { Chain, Src, FltOfs }); - SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, { Val.getValue(1), Val }); Chain = SInt.getValue(1); } else { @@ -6403,8 +6669,13 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const { - unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; - SDValue Src = Node->getOperand(OpNo); + // This transform is not correct for converting 0 when rounding mode is set + // to round toward negative infinity which will produce -0.0. So disable under + // strictfp. + if (Node->isStrictFPOpcode()) + return false; + + SDValue Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -6423,9 +6694,10 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout()); // Implementation of unsigned i64 to f64 following the algorithm in - // __floatundidf in compiler_rt. This implementation has the advantage - // of performing rounding correctly, both in the default rounding mode - // and in all alternate rounding modes. + // __floatundidf in compiler_rt. This implementation performs rounding + // correctly in all rounding modes with the exception of converting 0 + // when rounding toward negative infinity. In that case the fsub will produce + // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect. SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); SDValue TwoP84PlusTwoP52 = DAG.getConstantFP( BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT); @@ -6439,18 +6711,9 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); - if (Node->isStrictFPOpcode()) { - SDValue HiSub = - DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other}, - {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52}); - Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, - {HiSub.getValue(1), LoFlt, HiSub}); - Chain = Result.getValue(1); - } else { - SDValue HiSub = - DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); - Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); - } + SDValue HiSub = + DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); return true; } @@ -6460,6 +6723,11 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; EVT VT = Node->getValueType(0); + + if (VT.isScalableVector()) + report_fatal_error( + "Expanding fminnum/fmaxnum for scalable vectors is undefined."); + if (isOperationLegalOrCustom(NewOp, VT)) { SDValue Quiet0 = Node->getOperand(0); SDValue Quiet1 = Node->getOperand(1); @@ -6683,23 +6951,58 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, } bool TargetLowering::expandABS(SDNode *N, SDValue &Result, - SelectionDAG &DAG) const { + SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); EVT VT = N->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Op = N->getOperand(0); + // abs(x) -> smax(x,sub(0,x)) + if (!IsNegative && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::SMAX, VT)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + Result = DAG.getNode(ISD::SMAX, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); + return true; + } + + // abs(x) -> umin(x,sub(0,x)) + if (!IsNegative && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::UMIN, VT)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + Result = DAG.getNode(ISD::UMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); + return true; + } + + // 0 - abs(x) -> smin(x, sub(0,x)) + if (IsNegative && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::SMIN, VT)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + Result = DAG.getNode(ISD::SMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); + return true; + } + // Only expand vector types if we have the appropriate vector operations. - if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) || - !isOperationLegalOrCustom(ISD::ADD, VT) || - !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) + if (VT.isVector() && + (!isOperationLegalOrCustom(ISD::SRA, VT) || + (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) || + (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) || + !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return false; SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); - Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); + if (!IsNegative) { + SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); + Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); + } else { + // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); + Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); + } return true; } @@ -6713,6 +7016,9 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, EVT DstVT = LD->getValueType(0); ISD::LoadExtType ExtType = LD->getExtensionType(); + if (SrcVT.isScalableVector()) + report_fatal_error("Cannot scalarize scalable vector loads"); + unsigned NumElem = SrcVT.getVectorNumElements(); EVT SrcEltVT = SrcVT.getScalarType(); @@ -6739,7 +7045,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, // the codegen worse. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR, - LD->getPointerInfo(), SrcIntVT, LD->getAlignment(), + LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); SmallVector<SDValue, 8> Vals; @@ -6776,10 +7082,10 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SDValue ScalarLoad = DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride), + SrcEltVT, LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); - BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride); + BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride)); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -6800,6 +7106,9 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); + if (StVT.isScalableVector()) + report_fatal_error("Cannot scalarize scalable vector stores"); + // The type of the data we want to save EVT RegVT = Value.getValueType(); EVT RegSclVT = RegVT.getScalarType(); @@ -6836,7 +7145,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, } return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(), - ST->getAlignment(), ST->getMemOperand()->getFlags(), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()); } @@ -6850,13 +7159,14 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, DAG.getVectorIdxConstant(Idx, SL)); - SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride); + SDValue Ptr = + DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride)); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore( Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride), - MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride), - ST->getMemOperand()->getFlags(), ST->getAAInfo()); + MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), + ST->getAAInfo()); Stores.push_back(Store); } @@ -6921,7 +7231,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Load one integer register's worth from the original location. SDValue Load = DAG.getLoad( RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), - MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore( @@ -6940,8 +7250,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), MemVT, - MinAlign(LD->getAlignment(), Offset), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), + LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -6971,7 +7281,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); NumBits >>= 1; - unsigned Alignment = LD->getAlignment(); + Align Alignment = LD->getOriginalAlign(); unsigned IncrementSize = NumBits / 8; ISD::LoadExtType HiExtType = LD->getExtensionType(); @@ -6986,21 +7296,21 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, MinAlign(Alignment, IncrementSize), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, MinAlign(Alignment, IncrementSize), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); } // aggregate the two parts @@ -7024,7 +7334,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); - int Alignment = ST->getAlignment(); + Align Alignment = ST->getOriginalAlign(); auto &MF = DAG.getMachineFunction(); EVT StoreMemVT = ST->getMemoryVT(); @@ -7081,7 +7391,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), - MinAlign(ST->getAlignment(), Offset), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags())); // Increment the pointers. Offset += RegBytes; @@ -7103,7 +7413,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, Stores.push_back( DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), LoadMemVT, - MinAlign(ST->getAlignment(), Offset), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); @@ -7114,8 +7424,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, "Unaligned store of unknown type."); // Get the half-size VT EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext()); - int NumBits = NewStoredVT.getSizeInBits(); - int IncrementSize = NumBits / 8; + unsigned NumBits = NewStoredVT.getFixedSizeInBits(); + unsigned IncrementSize = NumBits / 8; // Divide the stored value in two parts. SDValue ShiftAmount = DAG.getConstant( @@ -7130,8 +7440,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, Ptr, ST->getPointerInfo(), NewStoredVT, Alignment, ST->getMemOperand()->getFlags()); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Alignment = MinAlign(Alignment, IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Store2 = DAG.getTruncStore( Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment, @@ -7150,9 +7459,12 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, SDValue Increment; EVT AddrVT = Addr.getValueType(); EVT MaskVT = Mask.getValueType(); - assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() && + assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() && "Incompatible types of Data and Mask"); if (IsCompressedMemory) { + if (DataVT.isScalableVector()) + report_fatal_error( + "Cannot currently handle compressed memory with scalable vectors"); // Incrementing the pointer according to number of '1's in the mask. EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits()); SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask); @@ -7168,6 +7480,10 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL, AddrVT); Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); + } else if (DataVT.isScalableVector()) { + Increment = DAG.getVScale(DL, AddrVT, + APInt(AddrVT.getFixedSizeInBits(), + DataVT.getStoreSize().getKnownMinSize())); } else Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); @@ -7178,16 +7494,26 @@ static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl) { - if (isa<ConstantSDNode>(Idx)) + if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx)) return Idx; EVT IdxVT = Idx.getValueType(); - unsigned NElts = VecVT.getVectorNumElements(); - if (isPowerOf2_32(NElts)) { - APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), - Log2_32(NElts)); - return DAG.getNode(ISD::AND, dl, IdxVT, Idx, - DAG.getConstant(Imm, dl, IdxVT)); + unsigned NElts = VecVT.getVectorMinNumElements(); + if (VecVT.isScalableVector()) { + SDValue VS = DAG.getVScale(dl, IdxVT, + APInt(IdxVT.getFixedSizeInBits(), + NElts)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, IdxVT, VS, + DAG.getConstant(1, dl, IdxVT)); + + return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub); + } else { + if (isPowerOf2_32(NElts)) { + APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), + Log2_32(NElts)); + return DAG.getNode(ISD::AND, dl, IdxVT, Idx, + DAG.getConstant(Imm, dl, IdxVT)); + } } return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, @@ -7204,8 +7530,8 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, EVT EltVT = VecVT.getVectorElementType(); // Calculate the element offset and add it to the pointer. - unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. - assert(EltSize * 8 == EltVT.getSizeInBits() && + unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl); @@ -7283,6 +7609,65 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, return SDValue(); } +// Convert redundant addressing modes (e.g. scaling is redundant +// when accessing bytes). +ISD::MemIndexType +TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT, + SDValue Offsets) const { + bool IsScaledIndex = + (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED); + bool IsSignedIndex = + (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED); + + // Scaling is unimportant for bytes, canonicalize to unscaled. + if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) { + IsScaledIndex = false; + IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; + } + + return IndexType; +} + +SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const { + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + EVT VT = Op0.getValueType(); + unsigned Opcode = Node->getOpcode(); + SDLoc DL(Node); + + // umin(x,y) -> sub(x,usubsat(x,y)) + if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::USUBSAT, VT)) { + return DAG.getNode(ISD::SUB, DL, VT, Op0, + DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1)); + } + + // umax(x,y) -> add(x,usubsat(y,x)) + if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) && + isOperationLegal(ISD::USUBSAT, VT)) { + return DAG.getNode(ISD::ADD, DL, VT, Op0, + DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0)); + } + + // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B + ISD::CondCode CC; + switch (Opcode) { + default: llvm_unreachable("How did we get here?"); + case ISD::SMAX: CC = ISD::SETGT; break; + case ISD::SMIN: CC = ISD::SETLT; break; + case ISD::UMAX: CC = ISD::SETUGT; break; + case ISD::UMIN: CC = ISD::SETULT; break; + } + + // FIXME: Should really try to split the vector in case it's legal on a + // subvector. + if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return DAG.UnrollVectorOp(Node); + + SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC); + return DAG.getSelect(DL, VT, Cond, Op0, Op1); +} + SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { unsigned Opcode = Node->getOpcode(); SDValue LHS = Node->getOperand(0); @@ -7294,12 +7679,13 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { assert(VT.isInteger() && "Expected operands to be integers"); // usub.sat(a, b) -> umax(a, b) - b - if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) { + if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) { SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS); return DAG.getNode(ISD::SUB, dl, VT, Max, RHS); } - if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) { + // uadd.sat(a, b) -> umin(a, ~b) + b + if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) { SDValue InvRHS = DAG.getNOT(dl, RHS, VT); SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS); return DAG.getNode(ISD::ADD, dl, VT, Min, RHS); @@ -7324,6 +7710,11 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { "addition or subtraction node."); } + // FIXME: Should really try to split the vector in case it's legal on a + // subvector. + if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return DAG.UnrollVectorOp(Node); + unsigned BitWidth = LHS.getScalarValueSizeInBits(); EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), @@ -7363,6 +7754,41 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { } } +SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + bool IsSigned = Opcode == ISD::SSHLSAT; + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + SDLoc dl(Node); + + assert((Node->getOpcode() == ISD::SSHLSAT || + Node->getOpcode() == ISD::USHLSAT) && + "Expected a SHLSAT opcode"); + assert(VT == RHS.getValueType() && "Expected operands to be the same type"); + assert(VT.isInteger() && "Expected operands to be integers"); + + // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate. + + unsigned BW = VT.getScalarSizeInBits(); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS); + SDValue Orig = + DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS); + + SDValue SatVal; + if (IsSigned) { + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT); + SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT), + SatMin, SatMax, ISD::SETLT); + } else { + SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT); + } + Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE); + + return Result; +} + SDValue TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { assert((Node->getOpcode() == ISD::SMULFIX || @@ -7736,7 +8162,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, if (isSigned) { // The high part is obtained by SRA'ing all but one of the bits of low // part. - unsigned LoSize = VT.getSizeInBits(); + unsigned LoSize = VT.getFixedSizeInBits(); HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, DAG.getConstant(LoSize - 1, dl, @@ -7795,7 +8221,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, // Truncate the result if SetCC returns a larger type than needed. EVT RType = Node->getValueType(1); - if (RType.getSizeInBits() < Overflow.getValueSizeInBits()) + if (RType.bitsLT(Overflow.getValueType())) Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow); assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() && @@ -7805,32 +8231,14 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); - bool NoNaN = Node->getFlags().hasNoNaNs(); - unsigned BaseOpcode = 0; - switch (Node->getOpcode()) { - default: llvm_unreachable("Expected VECREDUCE opcode"); - case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break; - case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break; - case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break; - case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break; - case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break; - case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break; - case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break; - case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break; - case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break; - case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break; - case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break; - case ISD::VECREDUCE_FMAX: - BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM; - break; - case ISD::VECREDUCE_FMIN: - BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM; - break; - } - + unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode()); SDValue Op = Node->getOperand(0); EVT VT = Op.getValueType(); + if (VT.isScalableVector()) + report_fatal_error( + "Expanding reductions for scalable vectors is undefined."); + // Try to use a shuffle reduction for power of two vectors. if (VT.isPow2VectorType()) { while (VT.getVectorNumElements() > 1) { @@ -7861,6 +8269,33 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { return Res; } +SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const { + SDLoc dl(Node); + SDValue AccOp = Node->getOperand(0); + SDValue VecOp = Node->getOperand(1); + SDNodeFlags Flags = Node->getFlags(); + + EVT VT = VecOp.getValueType(); + EVT EltVT = VT.getVectorElementType(); + + if (VT.isScalableVector()) + report_fatal_error( + "Expanding reductions for scalable vectors is undefined."); + + unsigned NumElts = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Ops; + DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts); + + unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode()); + + SDValue Res = AccOp; + for (unsigned i = 0; i < NumElts; i++) + Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags); + + return Res; +} + bool TargetLowering::expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); @@ -7883,3 +8318,105 @@ bool TargetLowering::expandREM(SDNode *Node, SDValue &Result, } return false; } + +SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, + SelectionDAG &DAG) const { + bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT; + SDLoc dl(SDValue(Node, 0)); + SDValue Src = Node->getOperand(0); + + // DstVT is the result type, while SatVT is the size to which we saturate + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); + + unsigned SatWidth = Node->getConstantOperandVal(1); + unsigned DstWidth = DstVT.getScalarSizeInBits(); + assert(SatWidth <= DstWidth && + "Expected saturation width smaller than result width"); + + // Determine minimum and maximum integer values and their corresponding + // floating-point values. + APInt MinInt, MaxInt; + if (IsSigned) { + MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); + MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); + } else { + MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); + MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); + } + + // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as + // libcall emission cannot handle this. Large result types will fail. + if (SrcVT == MVT::f16) { + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src); + SrcVT = Src.getValueType(); + } + + APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT)); + APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT)); + + APFloat::opStatus MinStatus = + MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero); + APFloat::opStatus MaxStatus = + MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero); + bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) && + !(MaxStatus & APFloat::opStatus::opInexact); + + SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT); + SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT); + + // If the integer bounds are exactly representable as floats and min/max are + // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence + // of comparisons and selects. + bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) && + isOperationLegal(ISD::FMAXNUM, SrcVT); + if (AreExactFloatBounds && MinMaxLegal) { + SDValue Clamped = Src; + + // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. + Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode); + // Clamp by MaxFloat from above. NaN cannot occur. + Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode); + // Convert clamped value to integer. + SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, + dl, DstVT, Clamped); + + // In the unsigned case we're done, because we mapped NaN to MinFloat, + // which will cast to zero. + if (!IsSigned) + return FpToInt; + + // Otherwise, select 0 if Src is NaN. + SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); + return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt, + ISD::CondCode::SETUO); + } + + SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT); + SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT); + + // Result of direct conversion. The assumption here is that the operation is + // non-trapping and it's fine to apply it to an out-of-range value if we + // select it away later. + SDValue FpToInt = + DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src); + + SDValue Select = FpToInt; + + // If Src ULT MinFloat, select MinInt. In particular, this also selects + // MinInt if Src is NaN. + Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select, + ISD::CondCode::SETULT); + // If Src OGT MaxFloat, select MaxInt. + Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select, + ISD::CondCode::SETOGT); + + // In the unsigned case we are done, because we mapped NaN to MinInt, which + // is already zero. + if (!IsSigned) + return Select; + + // Otherwise, select 0 if Src is NaN. + SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); + return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO); +} diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index ce43fb1fbd4b..f89069e9f728 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -144,7 +144,7 @@ class ShrinkWrap : public MachineFunctionPass { unsigned FrameDestroyOpcode; /// Stack pointer register, used by llvm.{savestack,restorestack} - unsigned SP; + Register SP; /// Entry block. const MachineBasicBlock *Entry; @@ -331,11 +331,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, Save = &MBB; else Save = MDT->findNearestCommonDominator(Save, &MBB); - - if (!Save) { - LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n"); - return; - } + assert(Save); if (!Restore) Restore = &MBB; @@ -381,7 +377,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, // C. Save and Restore are in the same loop. bool SaveDominatesRestore = false; bool RestorePostDominatesSave = false; - while (Save && Restore && + while (Restore && (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) || !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) || // Post-dominance is not enough in loops to ensure that all uses/defs @@ -412,8 +408,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, Restore = MPDT->findNearestCommonDominator(Restore, Save); // Fix (C). - if (Save && Restore && - (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { + if (Restore && (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { // Push Save outside of this loop if immediate dominator is different // from save block. If immediate dominator is not different, bail out. diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 0683058f177e..d2fd4a6d8fd9 100644 --- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -142,7 +142,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB, /// instruction with those returned by the personality function. void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal) { - SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end()); + SmallVector<Value *, 8> UseWorkList(LPI->users()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); auto *EVI = dyn_cast<ExtractValueInst>(Val); diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp index 36a0ddf67b19..4bb50a285497 100644 --- a/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/llvm/lib/CodeGen/SpillPlacement.cpp @@ -27,10 +27,7 @@ //===----------------------------------------------------------------------===// #include "SpillPlacement.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -39,7 +36,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/BlockFrequency.h" #include <algorithm> #include <cassert> #include <cstdint> diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 8dec620536a7..a6a3149ae25b 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -12,28 +12,18 @@ //===----------------------------------------------------------------------===// #include "SplitKit.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalCalc.h" -#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -41,10 +31,8 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BlockFrequency.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -53,7 +41,6 @@ #include <iterator> #include <limits> #include <tuple> -#include <utility> using namespace llvm; @@ -181,7 +168,7 @@ void SplitAnalysis::analyzeUses() { // Get use slots form the use-def chain. const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg)) + for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg())) if (!MO.isUndef()) UseSlots.push_back(LIS.getInstructionIndex(*MO.getParent()).getRegSlot()); @@ -346,7 +333,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { } bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { - unsigned OrigReg = VRM.getOriginal(CurLI->reg); + unsigned OrigReg = VRM.getOriginal(CurLI->reg()); const LiveInterval &Orig = LIS.getInterval(OrigReg); assert(!Orig.empty() && "Splitting empty interval?"); LiveInterval::const_iterator I = Orig.find(Idx); @@ -412,10 +399,18 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const { } #endif +LiveInterval::SubRange &SplitEditor::getSubRangeForMaskExact(LaneBitmask LM, + LiveInterval &LI) { + for (LiveInterval::SubRange &S : LI.subranges()) + if (S.LaneMask == LM) + return S; + llvm_unreachable("SubRange for this mask not found"); +} + LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM, LiveInterval &LI) { for (LiveInterval::SubRange &S : LI.subranges()) - if (S.LaneMask == LM) + if ((S.LaneMask & LM) == LM) return S; llvm_unreachable("SubRange for this mask not found"); } @@ -446,7 +441,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) { LaneBitmask LM; for (const MachineOperand &DefOp : DefMI->defs()) { Register R = DefOp.getReg(); - if (R != LI.reg) + if (R != LI.reg()) continue; if (unsigned SR = DefOp.getSubReg()) LM |= TRI.getSubRegIndexLaneMask(SR); @@ -517,7 +512,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) { VFP = ValueForcePair(nullptr, true); } -SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg, +SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) { const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); @@ -543,7 +538,7 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg, return Def; } -SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg, +SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, LaneBitmask LaneMask, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) { const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); @@ -649,7 +644,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); - unsigned Reg = LI->reg; + Register Reg = LI->reg(); bool DidRemat = false; if (OrigVNI) { LiveRangeEdit::Remat RM(ParentVNI); @@ -662,16 +657,25 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, } if (!DidRemat) { LaneBitmask LaneMask; - if (LI->hasSubRanges()) { + if (OrigLI.hasSubRanges()) { LaneMask = LaneBitmask::getNone(); - for (LiveInterval::SubRange &S : LI->subranges()) - LaneMask |= S.LaneMask; + for (LiveInterval::SubRange &S : OrigLI.subranges()) { + if (S.liveAt(UseIdx)) + LaneMask |= S.LaneMask; + } } else { LaneMask = LaneBitmask::getAll(); } - ++NumCopies; - Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx); + if (LaneMask.none()) { + const MCInstrDesc &Desc = TII.get(TargetOpcode::IMPLICIT_DEF); + MachineInstr *ImplicitDef = BuildMI(MBB, I, DebugLoc(), Desc, Reg); + SlotIndexes &Indexes = *LIS.getSlotIndexes(); + Def = Indexes.insertMachineInstrInMaps(*ImplicitDef, Late).getRegSlot(); + } else { + ++NumCopies; + Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx); + } } // Define the value in Reg. @@ -994,9 +998,7 @@ void SplitEditor::computeRedundantBackCopies( } if (!DominatedVNIs.empty()) { forceRecompute(0, *ParentVNI); - for (auto VNI : DominatedVNIs) { - BackCopies.push_back(VNI); - } + append_range(BackCopies, DominatedVNIs); DominatedVNIs.clear(); } } @@ -1257,8 +1259,8 @@ void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC, LiveInterval &PLI = Edit->getParent(); // Need the cast because the inputs to ?: would otherwise be deemed // "incompatible": SubRange vs LiveInterval. - LiveRange &PSR = !LM.all() ? getSubRangeForMask(LM, PLI) - : static_cast<LiveRange&>(PLI); + LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI) + : static_cast<LiveRange &>(PLI); if (PSR.liveAt(LastUse)) LIC.extend(LR, End, /*PhysReg=*/0, Undefs); } @@ -1293,7 +1295,7 @@ void SplitEditor::extendPHIKillRanges() { continue; unsigned RegIdx = RegAssign.lookup(V->def); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); - LiveInterval::SubRange &S = getSubRangeForMask(PS.LaneMask, LI); + LiveInterval::SubRange &S = getSubRangeForMaskExact(PS.LaneMask, LI); if (removeDeadSegment(V->def, S)) continue; @@ -1342,7 +1344,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); - MO.setReg(LI.reg); + MO.setReg(LI.reg()); LLVM_DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent()) << '\t' << Idx << ':' << RegIdx << '\t' << *MI); @@ -1402,7 +1404,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { } } - for (unsigned R : *Edit) { + for (Register R : *Edit) { LiveInterval &LI = LIS.getInterval(R); if (!LI.hasSubRanges()) continue; @@ -1424,7 +1426,7 @@ void SplitEditor::deleteRematVictims() { continue; MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def); assert(MI && "Missing instruction for dead def"); - MI->addRegisterDead(LI->reg, &TRI); + MI->addRegisterDead(LI->reg(), &TRI); if (!MI->allDefsAreDead()) continue; @@ -1521,7 +1523,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (unsigned Reg : *Edit) { + for (Register Reg : *Edit) { LiveInterval &LI = LIS.getInterval(Reg); LI.removeEmptySubRanges(); LI.RenumberValues(); @@ -1538,13 +1540,13 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { ConnectedVNInfoEqClasses ConEQ(LIS); for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - unsigned VReg = Edit->get(i); + Register VReg = Edit->get(i); LiveInterval &LI = LIS.getInterval(VReg); SmallVector<LiveInterval*, 8> SplitLIs; LIS.splitSeparateComponents(LI, SplitLIs); - unsigned Original = VRM.getOriginal(VReg); + Register Original = VRM.getOriginal(VReg); for (LiveInterval *SplitLI : SplitLIs) - VRM.setIsSplitFromReg(SplitLI->reg, Original); + VRM.setIsSplitFromReg(SplitLI->reg(), Original); // The new intervals all map back to i. if (LRMap) diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h index 3ab5f2585f34..a94518f5a4fc 100644 --- a/llvm/lib/CodeGen/SplitKit.h +++ b/llvm/lib/CodeGen/SplitKit.h @@ -345,10 +345,17 @@ private: return LICalc[SpillMode != SM_Partition && RegIdx != 0]; } - /// Find a subrange corresponding to the lane mask @p LM in the live + /// Find a subrange corresponding to the exact lane mask @p LM in the live /// interval @p LI. The interval @p LI is assumed to contain such a subrange. /// This function is used to find corresponding subranges between the /// original interval and the new intervals. + LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM, + LiveInterval &LI); + + /// Find a subrange corresponding to the lane mask @p LM, or a superset of it, + /// in the live interval @p LI. The interval @p LI is assumed to contain such + /// a subrange. This function is used to find corresponding subranges between + /// the original interval and the new intervals. LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI); /// Add a segment to the interval LI for the value number VNI. If LI has @@ -432,11 +439,11 @@ private: /// Add a copy instruction copying \p FromReg to \p ToReg before /// \p InsertBefore. This can be invoked with a \p LaneMask which may make it /// necessary to construct a sequence of copies to cover it exactly. - SlotIndex buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LaneMask, + SlotIndex buildCopy(Register FromReg, Register ToReg, LaneBitmask LaneMask, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx); - SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg, + SlotIndex buildSingleSubRegCopy(Register FromReg, Register ToReg, MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore, unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def); diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index d720d93c306d..af58204f6db5 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -373,6 +373,36 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // before visiting the memcpy block (which will contain the lifetime start // for "b" then it will appear that 'b' has a degenerate lifetime. // +// Handle Windows Exception with LifetimeStartOnFirstUse: +// ----------------- +// +// There was a bug for using LifetimeStartOnFirstUse in win32. +// class Type1 { +// ... +// ~Type1(){ write memory;} +// } +// ... +// try{ +// Type1 V +// ... +// } catch (Type2 X){ +// ... +// } +// For variable X in catch(X), we put point pX=&(&X) into ConservativeSlots +// to prevent using LifetimeStartOnFirstUse. Because pX may merged with +// object V which may call destructor after implicitly writing pX. All these +// are done in C++ EH runtime libs (through CxxThrowException), and can't +// obviously check it in IR level. +// +// The loader of pX, without obvious writing IR, is usually the first LOAD MI +// in EHPad, Some like: +// bb.x.catch.i (landing-pad, ehfunclet-entry): +// ; predecessors: %bb... +// successors: %bb... +// %n:gr32 = MOV32rm %stack.pX ... +// ... +// The Type2** %stack.pX will only be written in EH runtime libs, so we +// check the StoreSlots to screen it out. namespace { @@ -434,6 +464,9 @@ class StackColoring : public MachineFunctionPass { /// slots lifetime-start-on-first-use is disabled). BitVector ConservativeSlots; + /// Record the FI slots referenced by a 'may write to memory'. + BitVector StoreSlots; + /// Number of iterations taken during data flow analysis. unsigned NumIterations; @@ -629,10 +662,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { InterestingSlots.resize(NumSlot); ConservativeSlots.clear(); ConservativeSlots.resize(NumSlot); + StoreSlots.clear(); + StoreSlots.resize(NumSlot); // number of start and end lifetime ops for each slot SmallVector<int, 8> NumStartLifetimes(NumSlot, 0); SmallVector<int, 8> NumEndLifetimes(NumSlot, 0); + SmallVector<int, 8> NumLoadInCatchPad(NumSlot, 0); // Step 1: collect markers and populate the "InterestingSlots" // and "ConservativeSlots" sets. @@ -687,6 +723,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { if (! BetweenStartEnd.test(Slot)) { ConservativeSlots.set(Slot); } + // Here we check the StoreSlots to screen catch point out. For more + // information, please refer "Handle Windows Exception with + // LifetimeStartOnFirstUse" at the head of this file. + if (MI.mayStore()) + StoreSlots.set(Slot); + if (MF->getWinEHFuncInfo() && MBB->isEHPad() && MI.mayLoad()) + NumLoadInCatchPad[Slot] += 1; } } } @@ -697,11 +740,14 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { return 0; } - // PR27903: slots with multiple start or end lifetime ops are not + // 1) PR27903: slots with multiple start or end lifetime ops are not // safe to enable for "lifetime-start-on-first-use". - for (unsigned slot = 0; slot < NumSlot; ++slot) - if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1) + // 2) And also not safe for variable X in catch(X) in windows. + for (unsigned slot = 0; slot < NumSlot; ++slot) { + if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1 || + (NumLoadInCatchPad[slot] > 1 && !StoreSlots.test(slot))) ConservativeSlots.set(slot); + } LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots)); // Step 2: compute begin/end sets for each block @@ -1048,7 +1094,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { if (MMO->getAAInfo()) { if (const Value *MMOV = MMO->getValue()) { SmallVector<Value *, 4> Objs; - getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout()); + getUnderlyingObjectsForCodeGen(MMOV, Objs); if (Objs.empty()) MayHaveConflictingAAMD = true; @@ -1241,7 +1287,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // This is a simple greedy algorithm for merging allocas. First, sort the // slots, placing the largest slots first. Next, perform an n^2 scan and look - // for disjoint slots. When you find disjoint slots, merge the samller one + // for disjoint slots. When you find disjoint slots, merge the smaller one // into the bigger one and update the live interval. Remove the small alloca // and continue. diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 1e060ecbeb43..faf07e90c39c 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -45,6 +45,14 @@ static cl::opt<int> StackMapVersion( const char *StackMaps::WSMP = "Stack Maps: "; +static uint64_t getConstMetaVal(const MachineInstr &MI, unsigned Idx) { + assert(MI.getOperand(Idx).isImm() && + MI.getOperand(Idx).getImm() == StackMaps::ConstantOp); + const auto &MO = MI.getOperand(Idx + 1); + assert(MO.isImm()); + return MO.getImm(); +} + StackMapOpers::StackMapOpers(const MachineInstr *MI) : MI(MI) { assert(getVarIdx() <= MI->getNumOperands() && @@ -83,11 +91,89 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { return ScratchIdx; } +unsigned StatepointOpers::getNumGcMapEntriesIdx() { + // Take index of num of allocas and skip all allocas records. + unsigned CurIdx = getNumAllocaIdx(); + unsigned NumAllocas = getConstMetaVal(*MI, CurIdx - 1); + CurIdx++; + while (NumAllocas--) + CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx); + return CurIdx + 1; // skip <StackMaps::ConstantOp> +} + +unsigned StatepointOpers::getNumAllocaIdx() { + // Take index of num of gc ptrs and skip all gc ptr records. + unsigned CurIdx = getNumGCPtrIdx(); + unsigned NumGCPtrs = getConstMetaVal(*MI, CurIdx - 1); + CurIdx++; + while (NumGCPtrs--) + CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx); + return CurIdx + 1; // skip <StackMaps::ConstantOp> +} + +unsigned StatepointOpers::getNumGCPtrIdx() { + // Take index of num of deopt args and skip all deopt records. + unsigned CurIdx = getNumDeoptArgsIdx(); + unsigned NumDeoptArgs = getConstMetaVal(*MI, CurIdx - 1); + CurIdx++; + while (NumDeoptArgs--) { + CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx); + } + return CurIdx + 1; // skip <StackMaps::ConstantOp> +} + +int StatepointOpers::getFirstGCPtrIdx() { + unsigned NumGCPtrsIdx = getNumGCPtrIdx(); + unsigned NumGCPtrs = getConstMetaVal(*MI, NumGCPtrsIdx - 1); + if (NumGCPtrs == 0) + return -1; + ++NumGCPtrsIdx; // skip <num gc ptrs> + assert(NumGCPtrsIdx < MI->getNumOperands()); + return (int)NumGCPtrsIdx; +} + +unsigned StatepointOpers::getGCPointerMap( + SmallVectorImpl<std::pair<unsigned, unsigned>> &GCMap) { + unsigned CurIdx = getNumGcMapEntriesIdx(); + unsigned GCMapSize = getConstMetaVal(*MI, CurIdx - 1); + CurIdx++; + for (unsigned N = 0; N < GCMapSize; ++N) { + unsigned B = MI->getOperand(CurIdx++).getImm(); + unsigned D = MI->getOperand(CurIdx++).getImm(); + GCMap.push_back(std::make_pair(B, D)); + } + + return GCMapSize; +} + StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { if (StackMapVersion != 3) llvm_unreachable("Unsupported stackmap version!"); } +unsigned StackMaps::getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx) { + assert(CurIdx < MI->getNumOperands() && "Bad meta arg index"); + const auto &MO = MI->getOperand(CurIdx); + if (MO.isImm()) { + switch (MO.getImm()) { + default: + llvm_unreachable("Unrecognized operand type."); + case StackMaps::DirectMemRefOp: + CurIdx += 2; + break; + case StackMaps::IndirectMemRefOp: + CurIdx += 3; + break; + case StackMaps::ConstantOp: + ++CurIdx; + break; + } + } + ++CurIdx; + assert(CurIdx < MI->getNumOperands() && "points past operand list"); + return CurIdx; +} + /// Go up the super-register chain until we hit a valid dwarf register number. static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { int RegNum = TRI->getDwarfRegNum(Reg, false); @@ -148,6 +234,12 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, if (MOI->isImplicit()) return ++MOI; + if (MOI->isUndef()) { + // Record `undef` register as constant. Use same value as ISel uses. + Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, 0xFEFEFEFE); + return ++MOI; + } + assert(Register::isPhysicalRegister(MOI->getReg()) && "Virtreg operands should have been rewritten before now."); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); @@ -286,14 +378,82 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { } } - LiveOuts.erase( - llvm::remove_if(LiveOuts, - [](const LiveOutReg &LO) { return LO.Reg == 0; }), - LiveOuts.end()); + llvm::erase_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; }); return LiveOuts; } +// See statepoint MI format description in StatepointOpers' class comment +// in include/llvm/CodeGen/StackMaps.h +void StackMaps::parseStatepointOpers(const MachineInstr &MI, + MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + LocationVec &Locations, + LiveOutVec &LiveOuts) { + LLVM_DEBUG(dbgs() << "record statepoint : " << MI << "\n"); + StatepointOpers SO(&MI); + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // CC + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Flags + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Num Deopts + + // Record Deopt Args. + unsigned NumDeoptArgs = Locations.back().Offset; + assert(Locations.back().Type == Location::Constant); + assert(NumDeoptArgs == SO.getNumDeoptArgs()); + + while (NumDeoptArgs--) + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); + + // Record gc base/derived pairs + assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp); + ++MOI; + assert(MOI->isImm()); + unsigned NumGCPointers = MOI->getImm(); + ++MOI; + if (NumGCPointers) { + // Map logical index of GC ptr to MI operand index. + SmallVector<unsigned, 8> GCPtrIndices; + unsigned GCPtrIdx = (unsigned)SO.getFirstGCPtrIdx(); + assert((int)GCPtrIdx != -1); + assert(MOI - MI.operands_begin() == GCPtrIdx + 0LL); + while (NumGCPointers--) { + GCPtrIndices.push_back(GCPtrIdx); + GCPtrIdx = StackMaps::getNextMetaArgIdx(&MI, GCPtrIdx); + } + + SmallVector<std::pair<unsigned, unsigned>, 8> GCPairs; + unsigned NumGCPairs = SO.getGCPointerMap(GCPairs); + (void)NumGCPairs; + LLVM_DEBUG(dbgs() << "NumGCPairs = " << NumGCPairs << "\n"); + + auto MOB = MI.operands_begin(); + for (auto &P : GCPairs) { + assert(P.first < GCPtrIndices.size() && "base pointer index not found"); + assert(P.second < GCPtrIndices.size() && + "derived pointer index not found"); + unsigned BaseIdx = GCPtrIndices[P.first]; + unsigned DerivedIdx = GCPtrIndices[P.second]; + LLVM_DEBUG(dbgs() << "Base : " << BaseIdx << " Derived : " << DerivedIdx + << "\n"); + (void)parseOperand(MOB + BaseIdx, MOE, Locations, LiveOuts); + (void)parseOperand(MOB + DerivedIdx, MOE, Locations, LiveOuts); + } + + MOI = MOB + GCPtrIdx; + } + + // Record gc allocas + assert(MOI < MOE); + assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp); + ++MOI; + unsigned NumAllocas = MOI->getImm(); + ++MOI; + while (NumAllocas--) { + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); + assert(MOI < MOE); + } +} + void StackMaps::recordStackMapOpers(const MCSymbol &MILabel, const MachineInstr &MI, uint64_t ID, MachineInstr::const_mop_iterator MOI, @@ -311,9 +471,11 @@ void StackMaps::recordStackMapOpers(const MCSymbol &MILabel, } // Parse operands. - while (MOI != MOE) { - MOI = parseOperand(MOI, MOE, Locations, LiveOuts); - } + if (MI.getOpcode() == TargetOpcode::STATEPOINT) + parseStatepointOpers(MI, MOI, MOE, Locations, LiveOuts); + else + while (MOI != MOE) + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Move large constants into the constant pool. for (auto &Loc : Locations) { @@ -394,8 +556,6 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint"); StatepointOpers opers(&MI); - // Record all the deopt and gc operands (they're contiguous and run from the - // initial index to the end of the operand list) const unsigned StartIdx = opers.getVarIdx(); recordStackMapOpers(L, MI, opers.getID(), MI.operands_begin() + StartIdx, MI.operands_end(), false); @@ -404,7 +564,7 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) { /// Emit the stackmap header. /// /// Header { -/// uint8 : Stack Map Version (currently 2) +/// uint8 : Stack Map Version (currently 3) /// uint8 : Reserved (expected to be 0) /// uint16 : Reserved (expected to be 0) /// } diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index a343791807e6..0411faabbcc3 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -170,7 +170,8 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // If this instruction accesses memory make sure it doesn't access beyond // the bounds of the allocated object. Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I); - if (MemLoc.hasValue() && MemLoc->Size.getValue() > AllocSize) + if (MemLoc.hasValue() && MemLoc->Size.hasValue() && + MemLoc->Size.getValue() > AllocSize) return true; switch (I->getOpcode()) { case Instruction::Store: @@ -251,10 +252,9 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, static const CallInst *findStackProtectorIntrinsic(Function &F) { for (const BasicBlock &BB : F) for (const Instruction &I : BB) - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - if (CI->getCalledFunction() == - Intrinsic::getDeclaration(F.getParent(), Intrinsic::stackprotector)) - return CI; + if (const auto *II = dyn_cast<IntrinsicInst>(&I)) + if (II->getIntrinsicID() == Intrinsic::stackprotector) + return II; return nullptr; } @@ -274,7 +274,6 @@ static const CallInst *findStackProtectorIntrinsic(Function &F) { bool StackProtector::RequiresStackProtector() { bool Strong = false; bool NeedsProtector = false; - HasPrologue = findStackProtectorIntrinsic(*F); if (F->hasFnAttribute(Attribute::SafeStack)) return false; @@ -295,8 +294,6 @@ bool StackProtector::RequiresStackProtector() { Strong = true; // Use the same heuristic as strong to determine SSPLayout } else if (F->hasFnAttribute(Attribute::StackProtectStrong)) Strong = true; - else if (HasPrologue) - NeedsProtector = true; else if (!F->hasFnAttribute(Attribute::StackProtect)) return false; @@ -381,7 +378,10 @@ bool StackProtector::RequiresStackProtector() { static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M, IRBuilder<> &B, bool *SupportsSelectionDAGSP = nullptr) { - if (Value *Guard = TLI->getIRStackGuard(B)) + Value *Guard = TLI->getIRStackGuard(B); + auto GuardMode = TLI->getTargetMachine().Options.StackProtectorGuard; + if ((GuardMode == llvm::StackProtectorGuards::TLS || + GuardMode == llvm::StackProtectorGuards::None) && Guard) return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard"); // Use SelectionDAG SSP handling, since there isn't an IR guard. @@ -556,7 +556,9 @@ BasicBlock *StackProtector::CreateFailBB() { LLVMContext &Context = F->getContext(); BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); IRBuilder<> B(FailBB); - B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram())); + if (F->getSubprogram()) + B.SetCurrentDebugLocation( + DILocation::get(Context, 0, 0, F->getSubprogram())); if (Trip.isOSOpenBSD()) { FunctionCallee StackChkFail = M->getOrInsertFunction( "__stack_smash_handler", Type::getVoidTy(Context), diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index 3cc5d30ebad7..a6f8974f3343 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -145,7 +145,7 @@ namespace { // their weight. struct IntervalSorter { bool operator()(LiveInterval* LHS, LiveInterval* RHS) const { - return LHS->weight > RHS->weight; + return LHS->weight() > RHS->weight(); } }; @@ -174,7 +174,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { continue; LiveInterval &li = LS->getInterval(FI); if (!MI.isDebugValue()) - li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI); + li.incrementWeight( + LiveIntervals::getSpillWeight(false, true, MBFI, MI)); } for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(), EE = MI.memoperands_end(); @@ -222,7 +223,7 @@ void StackSlotColoring::InitializeSlots() { for (auto *I : Intervals) { LiveInterval &li = I->second; LLVM_DEBUG(li.dump()); - int FI = Register::stackSlot2Index(li.reg); + int FI = Register::stackSlot2Index(li.reg()); if (MFI->isDeadObjectIndex(FI)) continue; @@ -269,7 +270,7 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { int StackSlotColoring::ColorSlot(LiveInterval *li) { int Color = -1; bool Share = false; - int FI = Register::stackSlot2Index(li->reg); + int FI = Register::stackSlot2Index(li->reg()); uint8_t StackID = MFI->getStackID(FI); if (!DisableSharing) { @@ -331,12 +332,12 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = Register::stackSlot2Index(li->reg); + int SS = Register::stackSlot2Index(li->reg()); int NewSS = ColorSlot(li); assert(NewSS >= 0 && "Stack coloring failed?"); SlotMapping[SS] = NewSS; RevMap[NewSS].push_back(SS); - SlotWeights[NewSS] += li->weight; + SlotWeights[NewSS] += li->weight(); UsedColors.set(NewSS); Changed |= (SS != NewSS); } @@ -344,8 +345,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = Register::stackSlot2Index(li->reg); - li->weight = SlotWeights[SS]; + int SS = Register::stackSlot2Index(li->reg()); + li->setWeight(SlotWeights[SS]); } // Sort them by new weight. llvm::stable_sort(SSIntervals, IntervalSorter()); diff --git a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp index dd0b9d4c2e48..4408011c95c0 100644 --- a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -202,8 +202,8 @@ void SwiftErrorValueTracking::propagateVRegs() { // downward defs. bool needPHI = VRegs.size() >= 1 && - std::find_if( - VRegs.begin(), VRegs.end(), + llvm::find_if( + VRegs, [&](const std::pair<const MachineBasicBlock *, Register> &V) -> bool { return V.second != VRegs[0].second; }) != VRegs.end(); diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 078c9691f8dc..dfcec32d9537 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -11,8 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index bd554189f12b..575bf555c489 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -627,6 +627,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (PreRegAlloc && MI.isCall()) return false; + // TailDuplicator::appendCopies will erroneously place COPYs after + // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same + // bug that was fixed in f7a53d82c090. + // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point + // for the COPY when replacing PHIs. + if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) + return false; + if (MI.isBundle()) InstrCount += MI.getBundleSize(); else if (!MI.isPHI() && !MI.isMetaInstruction()) @@ -712,8 +720,7 @@ bool TailDuplicator::duplicateSimpleBB( SmallVectorImpl<MachineInstr *> &Copies) { SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), TailBB->succ_end()); - SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), - TailBB->pred_end()); + SmallVector<MachineBasicBlock *, 8> Preds(TailBB->predecessors()); bool Changed = false; for (MachineBasicBlock *PredBB : Preds) { if (PredBB->hasEHPadSuccessor() || PredBB->mayHaveInlineAsmBr()) diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index f8b482c04a58..b0594ec086b2 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -41,9 +41,9 @@ bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const /// frame of the specified index, along with the frame register used /// (in output arg FrameReg). This is the default implementation which /// is overridden for some targets. -int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, - int FI, - Register &FrameReg) const { +StackOffset +TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); @@ -52,8 +52,9 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, // something different. FrameReg = RI->getFrameRegister(MF); - return MFI.getObjectOffset(FI) + MFI.getStackSize() - - getOffsetOfLocalArea() + MFI.getOffsetAdjustment(); + return StackOffset::getFixed(MFI.getObjectOffset(FI) + MFI.getStackSize() - + getOffsetOfLocalArea() + + MFI.getOffsetAdjustment()); } bool TargetFrameLowering::needsFrameIndexResolution( diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 24f3f96d0b1d..165860ef1aa8 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -69,6 +69,15 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, llvm_unreachable("Target didn't implement insertNoop!"); } +/// insertNoops - Insert noops into the instruction stream at the specified +/// point. +void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned Quantity) const { + for (unsigned i = 0; i < Quantity; ++i) + insertNoop(MBB, MI); +} + static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) { return strncmp(Str, MAI.getCommentString().data(), MAI.getCommentString().size()) == 0; @@ -471,6 +480,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, int FrameIndex, const TargetInstrInfo &TII) { unsigned StartIdx = 0; + unsigned NumDefs = 0; switch (MI.getOpcode()) { case TargetOpcode::STACKMAP: { // StackMapLiveValues are foldable @@ -486,16 +496,25 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, case TargetOpcode::STATEPOINT: { // For statepoints, fold deopt and gc arguments, but not call arguments. StartIdx = StatepointOpers(&MI).getVarIdx(); + NumDefs = MI.getNumDefs(); break; } default: llvm_unreachable("unexpected stackmap opcode"); } + unsigned DefToFoldIdx = MI.getNumOperands(); + // Return false if any operands requested for folding are not foldable (not // part of the stackmap's live values). for (unsigned Op : Ops) { - if (Op < StartIdx) + if (Op < NumDefs) { + assert(DefToFoldIdx == MI.getNumOperands() && "Folding multiple defs"); + DefToFoldIdx = Op; + } else if (Op < StartIdx) { + return nullptr; + } + if (MI.getOperand(Op).isTied()) return nullptr; } @@ -505,11 +524,16 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, // No need to fold return, the meta data, and function arguments for (unsigned i = 0; i < StartIdx; ++i) - MIB.add(MI.getOperand(i)); + if (i != DefToFoldIdx) + MIB.add(MI.getOperand(i)); - for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) { + for (unsigned i = StartIdx, e = MI.getNumOperands(); i < e; ++i) { MachineOperand &MO = MI.getOperand(i); + unsigned TiedTo = e; + (void)MI.isRegTiedToDefOperand(i, &TiedTo); + if (is_contained(Ops, i)) { + assert(TiedTo == e && "Cannot fold tied operands"); unsigned SpillSize; unsigned SpillOffset; // Compute the spill slot size and offset. @@ -523,9 +547,15 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, MIB.addImm(SpillSize); MIB.addFrameIndex(FrameIndex); MIB.addImm(SpillOffset); - } - else + } else { MIB.add(MO); + if (TiedTo < e) { + assert(TiedTo < NumDefs && "Bad tied operand"); + if (TiedTo > DefToFoldIdx) + --TiedTo; + NewMI->tieOperands(TiedTo, NewMI->getNumOperands() - 1); + } + } } return NewMI; } @@ -748,8 +778,8 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, // instruction is known to not increase the critical path, then don't match // that pattern. bool TargetInstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl<MachineCombinerPattern> &Patterns) const { + MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, + bool DoRegPressureReduce) const { bool Commute; if (isReassociationCandidate(Root, Commute)) { // We found a sequence of instructions that may be suitable for a diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 2c94c2c62e5f..28c8bd0a7ded 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -135,23 +135,28 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C); // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf". - if (TT.getArch() == Triple::ppc || TT.isPPC64()) { + if (TT.isPPC()) { setLibcallName(RTLIB::ADD_F128, "__addkf3"); setLibcallName(RTLIB::SUB_F128, "__subkf3"); setLibcallName(RTLIB::MUL_F128, "__mulkf3"); setLibcallName(RTLIB::DIV_F128, "__divkf3"); + setLibcallName(RTLIB::POWI_F128, "__powikf2"); setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2"); setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2"); setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2"); setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2"); setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi"); setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi"); + setLibcallName(RTLIB::FPTOSINT_F128_I128, "__fixkfti"); setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi"); setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi"); + setLibcallName(RTLIB::FPTOUINT_F128_I128, "__fixunskfti"); setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf"); setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf"); + setLibcallName(RTLIB::SINTTOFP_I128_F128, "__floattikf"); setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf"); setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf"); + setLibcallName(RTLIB::UINTTOFP_I128_F128, "__floatuntikf"); setLibcallName(RTLIB::OEQ_F128, "__eqkf2"); setLibcallName(RTLIB::UNE_F128, "__nekf2"); setLibcallName(RTLIB::OGE_F128, "__gekf2"); @@ -224,6 +229,10 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f16) { if (RetVT == MVT::f32) return FPEXT_F16_F32; + if (RetVT == MVT::f64) + return FPEXT_F16_F64; + if (RetVT == MVT::f128) + return FPEXT_F16_F128; } else if (OpVT == MVT::f32) { if (RetVT == MVT::f64) return FPEXT_F32_F64; @@ -285,7 +294,14 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::i32) + return FPTOSINT_F16_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F16_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F16_I128; + } else if (OpVT == MVT::f32) { if (RetVT == MVT::i32) return FPTOSINT_F32_I32; if (RetVT == MVT::i64) @@ -327,7 +343,14 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::i32) + return FPTOUINT_F16_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F16_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F16_I128; + } else if (OpVT == MVT::f32) { if (RetVT == MVT::i32) return FPTOUINT_F32_I32; if (RetVT == MVT::i64) @@ -370,6 +393,8 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { + if (RetVT == MVT::f16) + return SINTTOFP_I32_F16; if (RetVT == MVT::f32) return SINTTOFP_I32_F32; if (RetVT == MVT::f64) @@ -381,6 +406,8 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { if (RetVT == MVT::ppcf128) return SINTTOFP_I32_PPCF128; } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f16) + return SINTTOFP_I64_F16; if (RetVT == MVT::f32) return SINTTOFP_I64_F32; if (RetVT == MVT::f64) @@ -392,6 +419,8 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { if (RetVT == MVT::ppcf128) return SINTTOFP_I64_PPCF128; } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f16) + return SINTTOFP_I128_F16; if (RetVT == MVT::f32) return SINTTOFP_I128_F32; if (RetVT == MVT::f64) @@ -410,6 +439,8 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { + if (RetVT == MVT::f16) + return UINTTOFP_I32_F16; if (RetVT == MVT::f32) return UINTTOFP_I32_F32; if (RetVT == MVT::f64) @@ -421,6 +452,8 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { if (RetVT == MVT::ppcf128) return UINTTOFP_I32_PPCF128; } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f16) + return UINTTOFP_I64_F16; if (RetVT == MVT::f32) return UINTTOFP_I64_F32; if (RetVT == MVT::f64) @@ -432,6 +465,8 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { if (RetVT == MVT::ppcf128) return UINTTOFP_I64_PPCF128; } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f16) + return UINTTOFP_I128_F16; if (RetVT == MVT::f32) return UINTTOFP_I128_F32; if (RetVT == MVT::f64) @@ -446,6 +481,83 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { return UNKNOWN_LIBCALL; } +RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, + MVT VT) { + unsigned ModeN, ModelN; + switch (VT.SimpleTy) { + case MVT::i8: + ModeN = 0; + break; + case MVT::i16: + ModeN = 1; + break; + case MVT::i32: + ModeN = 2; + break; + case MVT::i64: + ModeN = 3; + break; + case MVT::i128: + ModeN = 4; + break; + default: + return UNKNOWN_LIBCALL; + } + + switch (Order) { + case AtomicOrdering::Monotonic: + ModelN = 0; + break; + case AtomicOrdering::Acquire: + ModelN = 1; + break; + case AtomicOrdering::Release: + ModelN = 2; + break; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + ModelN = 3; + break; + default: + return UNKNOWN_LIBCALL; + } + +#define LCALLS(A, B) \ + { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } +#define LCALL5(A) \ + LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16) + switch (Opc) { + case ISD::ATOMIC_CMP_SWAP: { + const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)}; + return LC[ModeN][ModelN]; + } + case ISD::ATOMIC_SWAP: { + const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)}; + return LC[ModeN][ModelN]; + } + case ISD::ATOMIC_LOAD_ADD: { + const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)}; + return LC[ModeN][ModelN]; + } + case ISD::ATOMIC_LOAD_OR: { + const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)}; + return LC[ModeN][ModelN]; + } + case ISD::ATOMIC_LOAD_CLR: { + const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)}; + return LC[ModeN][ModelN]; + } + case ISD::ATOMIC_LOAD_XOR: { + const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)}; + return LC[ModeN][ModelN]; + } + default: + return UNKNOWN_LIBCALL; + } +#undef LCALLS +#undef LCALL5 +} + RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { #define OP_TO_LIBCALL(Name, Enum) \ case Name: \ @@ -615,7 +727,7 @@ void TargetLoweringBase::initActions() { std::end(TargetDAGCombineArray), 0); for (MVT VT : MVT::fp_valuetypes()) { - MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits().getFixedSize()); + MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits()); if (IntVT.isValid()) { setOperationAction(ISD::ATOMIC_SWAP, VT, Promote); AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT); @@ -657,6 +769,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::UADDSAT, VT, Expand); setOperationAction(ISD::SSUBSAT, VT, Expand); setOperationAction(ISD::USUBSAT, VT, Expand); + setOperationAction(ISD::SSHLSAT, VT, Expand); + setOperationAction(ISD::USHLSAT, VT, Expand); setOperationAction(ISD::SMULFIX, VT, Expand); setOperationAction(ISD::SMULFIXSAT, VT, Expand); setOperationAction(ISD::UMULFIX, VT, Expand); @@ -665,6 +779,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SDIVFIXSAT, VT, Expand); setOperationAction(ISD::UDIVFIX, VT, Expand); setOperationAction(ISD::UDIVFIXSAT, VT, Expand); + setOperationAction(ISD::FP_TO_SINT_SAT, VT, Expand); + setOperationAction(ISD::FP_TO_UINT_SAT, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); @@ -678,6 +794,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ADDCARRY, VT, Expand); setOperationAction(ISD::SUBCARRY, VT, Expand); setOperationAction(ISD::SETCCCARRY, VT, Expand); + setOperationAction(ISD::SADDO_CARRY, VT, Expand); + setOperationAction(ISD::SSUBO_CARRY, VT, Expand); // ADDC/ADDE/SUBC/SUBE default to expand. setOperationAction(ISD::ADDC, VT, Expand); @@ -690,6 +808,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::BITREVERSE, VT, Expand); + setOperationAction(ISD::PARITY, VT, Expand); // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); @@ -728,6 +847,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand); setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand); setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand); + setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand); + setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. @@ -772,6 +893,8 @@ void TargetLoweringBase::initActions() { // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); + + setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand); } MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, @@ -801,6 +924,11 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { } } +bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + return TM.isNoopAddrSpaceCast(SrcAS, DestAS); +} + void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { // If the command-line option was specified, ignore this request. if (!JumpIsExpensiveOverride.getNumOccurrences()) @@ -823,9 +951,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { "Promote may not follow Expand or Promote"); if (LA == TypeSplitVector) - return LegalizeKind(LA, - EVT::getVectorVT(Context, SVT.getVectorElementType(), - SVT.getVectorElementCount() / 2)); + return LegalizeKind(LA, EVT(SVT).getHalfNumVectorElementsVT(Context)); if (LA == TypeScalarizeVector) return LegalizeKind(LA, SVT.getVectorElementType()); return LegalizeKind(LA, NVT); @@ -856,10 +982,10 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { EVT EltVT = VT.getVectorElementType(); // Vectors with only one element are always scalarized. - if (NumElts == 1) + if (NumElts.isScalar()) return LegalizeKind(TypeScalarizeVector, EltVT); - if (VT.getVectorElementCount() == ElementCount(1, true)) + if (VT.getVectorElementCount() == ElementCount::getScalable(1)) report_fatal_error("Cannot legalize this vector"); // Try to widen vector elements until the element type is a power of two and @@ -869,7 +995,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // Vectors with a number of elements that is not a power of two are always // widened, for example <3 x i8> -> <4 x i8>. if (!VT.isPow2VectorType()) { - NumElts = NumElts.NextPowerOf2(); + NumElts = NumElts.coefficientNextPowerOf2(); EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts); return LegalizeKind(TypeWidenVector, NVT); } @@ -881,7 +1007,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // <4 x i140> -> <2 x i140> if (LK.first == TypeExpandInteger) return LegalizeKind(TypeSplitVector, - EVT::getVectorVT(Context, EltVT, NumElts / 2)); + VT.getHalfNumVectorElementsVT(Context)); // Promote the integer element types until a legal vector type is found // or until the element integer type is too big. If a legal type was not @@ -918,7 +1044,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // If there is no wider legal type, split the vector. while (true) { // Round up to the next power of 2. - NumElts = NumElts.NextPowerOf2(); + NumElts = NumElts.coefficientNextPowerOf2(); // If there is no simple vector type with this many elements then there // cannot be a larger legal vector type. Note that this assumes that @@ -941,7 +1067,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { } // Vectors with illegal element types are expanded. - EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorElementCount() / 2); + EVT NVT = EVT::getVectorVT(Context, EltVT, + VT.getVectorElementCount().divideCoefficientBy(2)); return LegalizeKind(TypeSplitVector, NVT); } @@ -957,23 +1084,24 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, // Scalable vectors cannot be scalarized, so splitting or widening is // required. - if (VT.isScalableVector() && !isPowerOf2_32(EC.Min)) + if (VT.isScalableVector() && !isPowerOf2_32(EC.getKnownMinValue())) llvm_unreachable( "Splitting or widening of non-power-of-2 MVTs is not implemented."); // FIXME: We don't support non-power-of-2-sized vectors for now. // Ideally we could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(EC.Min)) { + if (!isPowerOf2_32(EC.getKnownMinValue())) { // Split EC to unit size (scalable property is preserved). - NumVectorRegs = EC.Min; - EC = EC / NumVectorRegs; + NumVectorRegs = EC.getKnownMinValue(); + EC = ElementCount::getFixed(1); } // Divide the input until we get to a supported size. This will // always end up with an EC that represent a scalar or a scalable // scalar. - while (EC.Min > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) { - EC.Min >>= 1; + while (EC.getKnownMinValue() > 1 && + !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) { + EC = EC.divideCoefficientBy(2); NumVectorRegs <<= 1; } @@ -984,7 +1112,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, NewVT = EltTy; IntermediateVT = NewVT; - unsigned LaneSizeInBits = NewVT.getScalarSizeInBits().getFixedSize(); + unsigned LaneSizeInBits = NewVT.getScalarSizeInBits(); // Convert sizes such as i33 to i64. if (!isPowerOf2_32(LaneSizeInBits)) @@ -993,8 +1121,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, MVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs * - (LaneSizeInBits / DestVT.getScalarSizeInBits().getFixedSize()); + return NumVectorRegs * (LaneSizeInBits / DestVT.getScalarSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -1041,9 +1168,19 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, // Inherit previous memory operands. MIB.cloneMemRefs(*MI); - for (auto &MO : MI->operands()) { + for (unsigned i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); if (!MO.isFI()) { + // Index of Def operand this Use it tied to. + // Since Defs are coming before Uses, if Use is tied, then + // index of Def must be smaller that index of that Use. + // Also, Defs preserve their position in new MI. + unsigned TiedTo = i; + if (MO.isReg() && MO.isTied()) + TiedTo = MI->findTiedOperandIdx(i); MIB.add(MO); + if (TiedTo < i) + MIB->tieOperands(TiedTo, MIB->getNumOperands() - 1); continue; } @@ -1090,36 +1227,6 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, return MBB; } -MachineBasicBlock * -TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI, - MachineBasicBlock *MBB) const { - assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL && - "Called emitXRayCustomEvent on the wrong MI!"); - auto &MF = *MI.getMF(); - auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc()); - for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx) - MIB.add(MI.getOperand(OpIdx)); - - MBB->insert(MachineBasicBlock::iterator(MI), MIB); - MI.eraseFromParent(); - return MBB; -} - -MachineBasicBlock * -TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI, - MachineBasicBlock *MBB) const { - assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL && - "Called emitXRayTypedEvent on the wrong MI!"); - auto &MF = *MI.getMF(); - auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc()); - for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx) - MIB.add(MI.getOperand(OpIdx)); - - MBB->insert(MachineBasicBlock::iterator(MI), MIB); - MI.eraseFromParent(); - return MBB; -} - /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". // This function is in TargetLowering because it uses RegClassForVT which would @@ -1282,7 +1389,7 @@ void TargetLoweringBase::computeRegisterProperties( MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. - if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() && + if (SVT.getScalarSizeInBits() > EltVT.getFixedSizeInBits() && SVT.getVectorElementCount() == EC && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; @@ -1298,13 +1405,15 @@ void TargetLoweringBase::computeRegisterProperties( } case TypeWidenVector: - if (isPowerOf2_32(EC.Min)) { + if (isPowerOf2_32(EC.getKnownMinValue())) { // Try to widen the vector. for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType) nVT; if (SVT.getVectorElementType() == EltVT && SVT.isScalableVector() == IsScalable && - SVT.getVectorElementCount().Min > EC.Min && isTypeLegal(SVT)) { + SVT.getVectorElementCount().getKnownMinValue() > + EC.getKnownMinValue() && + isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1348,10 +1457,10 @@ void TargetLoweringBase::computeRegisterProperties( ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); else if (PreferredAction == TypeSplitVector) ValueTypeActions.setTypeAction(VT, TypeSplitVector); - else if (EC.Min > 1) + else if (EC.getKnownMinValue() > 1) ValueTypeActions.setTypeAction(VT, TypeSplitVector); else - ValueTypeActions.setTypeAction(VT, EC.Scalable + ValueTypeActions.setTypeAction(VT, EC.isScalable() ? TypeScalarizeScalableVector : TypeScalarizeVector); } else { @@ -1409,7 +1518,8 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT // This handles things like <2 x float> -> <4 x float> and // <4 x i1> -> <4 x i32>. LegalizeTypeAction TA = getTypeAction(Context, VT); - if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { + if (EltCnt.getKnownMinValue() != 1 && + (TA == TypeWidenVector || TA == TypePromoteInteger)) { EVT RegisterEVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterEVT)) { IntermediateVT = RegisterEVT; @@ -1426,7 +1536,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT // Scalable vectors cannot be scalarized, so handle the legalisation of the // types like done elsewhere in SelectionDAG. - if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) { + if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) { LegalizeKind LK; EVT PartVT = VT; do { @@ -1435,15 +1545,15 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT PartVT = LK.second; } while (LK.first != TypeLegal); - NumIntermediates = - VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min; + NumIntermediates = VT.getVectorElementCount().getKnownMinValue() / + PartVT.getVectorElementCount().getKnownMinValue(); // FIXME: This code needs to be extended to handle more complex vector // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only // supported cases are vectors that are broken down into equal parts // such as nxv6i64 -> 3 x nxv2i64. - assert(NumIntermediates * PartVT.getVectorElementCount().Min == - VT.getVectorElementCount().Min && + assert((PartVT.getVectorElementCount() * NumIntermediates) == + VT.getVectorElementCount() && "Expected an integer multiple of PartVT"); IntermediateVT = PartVT; RegisterVT = getRegisterType(Context, IntermediateVT); @@ -1452,16 +1562,16 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally // we could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(EltCnt.Min)) { - NumVectorRegs = EltCnt.Min; - EltCnt.Min = 1; + if (!isPowerOf2_32(EltCnt.getKnownMinValue())) { + NumVectorRegs = EltCnt.getKnownMinValue(); + EltCnt = ElementCount::getFixed(1); } // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. - while (EltCnt.Min > 1 && + while (EltCnt.getKnownMinValue() > 1 && !isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) { - EltCnt.Min >>= 1; + EltCnt = EltCnt.divideCoefficientBy(2); NumVectorRegs <<= 1; } @@ -1479,7 +1589,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT TypeSize NewVTSize = NewVT.getSizeInBits(); // Convert sizes such as i33 to i64. if (!isPowerOf2_32(NewVTSize.getKnownMinSize())) - NewVTSize = NewVTSize.NextPowerOf2(); + NewVTSize = NewVTSize.coefficientNextPowerOf2(); return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); } @@ -1616,6 +1726,14 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, MMO.getFlags(), Fast); } +bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, + const DataLayout &DL, LLT Ty, + const MachineMemOperand &MMO, + bool *Fast) const { + return allowsMemoryAccess(Context, DL, getMVTForLLT(Ty), MMO.getAddrSpace(), + MMO.getAlign(), MMO.getFlags(), Fast); +} + BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const { return BranchProbability(MinPercentageForPredictableBranch, 100); } @@ -1827,7 +1945,10 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const { if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { Module &M = *IRB.GetInsertBlock()->getParent()->getParent(); PointerType *PtrTy = Type::getInt8PtrTy(M.getContext()); - return M.getOrInsertGlobal("__guard_local", PtrTy); + Constant *C = M.getOrInsertGlobal("__guard_local", PtrTy); + if (GlobalVariable *G = dyn_cast_or_null<GlobalVariable>(C)) + G->setVisibility(GlobalValue::HiddenVisibility); + return C; } return nullptr; } @@ -1835,10 +1956,14 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const { // Currently only support "standard" __stack_chk_guard. // TODO: add LOAD_STACK_GUARD support. void TargetLoweringBase::insertSSPDeclarations(Module &M) const { - if (!M.getNamedValue("__stack_chk_guard")) - new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false, - GlobalVariable::ExternalLinkage, - nullptr, "__stack_chk_guard"); + if (!M.getNamedValue("__stack_chk_guard")) { + auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false, + GlobalVariable::ExternalLinkage, nullptr, + "__stack_chk_guard"); + if (TM.getRelocationModel() == Reloc::Static && + !TM.getTargetTriple().isWindowsGNUEnvironment()) + GV->setDSOLocal(true); + } } // Currently only support "standard" __stack_chk_guard. @@ -1922,7 +2047,7 @@ static bool parseRefinementStep(StringRef In, size_t &Position, // step parameter. if (RefStepString.size() == 1) { char RefStepChar = RefStepString[0]; - if (RefStepChar >= '0' && RefStepChar <= '9') { + if (isDigit(RefStepChar)) { Value = RefStepChar - '0'; return true; } diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 27bebe503ce6..fe64b38cf0be 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -21,6 +21,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -39,6 +40,7 @@ #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -104,10 +106,14 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, // ELF //===----------------------------------------------------------------------===// +TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() + : TargetLoweringObjectFile() { + SupportDSOLocalEquivalentLowering = true; +} + void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, const TargetMachine &TgtM) { TargetLoweringObjectFile::Initialize(Ctx, TgtM); - TM = &TgtM; CodeModel::Model CM = TgtM.getCodeModel(); InitializeELF(TgtM.Options.UseInitArray); @@ -122,6 +128,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, // Fallthrough if not using EHABI LLVM_FALLTHROUGH; case Triple::ppc: + case Triple::ppcle: case Triple::x86: PersonalityEncoding = isPositionIndependent() ? dwarf::DW_EH_PE_indirect | @@ -174,11 +181,20 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. if (isPositionIndependent()) { - PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | - dwarf::DW_EH_PE_sdata8; - LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8; - TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | - dwarf::DW_EH_PE_sdata8; + // ILP32 uses sdata4 instead of sdata8 + if (TgtM.getTargetTriple().getEnvironment() == Triple::GNUILP32) { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + } else { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata8; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata8; + } } else { PersonalityEncoding = dwarf::DW_EH_PE_absptr; LSDAEncoding = dwarf::DW_EH_PE_absptr; @@ -310,6 +326,29 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, } } + if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) { + // Emit a descriptor for every function including functions that have an + // available external linkage. We may not want this for imported functions + // that has code in another thinLTO module but we don't have a good way to + // tell them apart from inline functions defined in header files. Therefore + // we put each descriptor in a separate comdat section and rely on the + // linker to deduplicate. + for (const auto *Operand : FuncInfo->operands()) { + const auto *MD = cast<MDNode>(Operand); + auto *GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0)); + auto *Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1)); + auto *Name = cast<MDString>(MD->getOperand(2)); + auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection( + TM->getFunctionSections() ? Name->getString() : StringRef()); + + Streamer.SwitchSection(S); + Streamer.emitInt64(GUID->getZExtValue()); + Streamer.emitInt64(Hash->getZExtValue()); + Streamer.emitULEB128IntValue(Name->getString().size()); + Streamer.emitBytes(Name->getString()); + } + } + unsigned Version = 0; unsigned Flags = 0; StringRef Section; @@ -324,46 +363,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, Streamer.AddBlankLine(); } - SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; - M.getModuleFlagsMetadata(ModuleFlags); - - MDNode *CFGProfile = nullptr; - - for (const auto &MFE : ModuleFlags) { - StringRef Key = MFE.Key->getString(); - if (Key == "CG Profile") { - CFGProfile = cast<MDNode>(MFE.Val); - break; - } - } - - if (!CFGProfile) - return; - - auto GetSym = [this](const MDOperand &MDO) -> MCSymbol * { - if (!MDO) - return nullptr; - auto V = cast<ValueAsMetadata>(MDO); - const Function *F = cast<Function>(V->getValue()); - return TM->getSymbol(F); - }; - - for (const auto &Edge : CFGProfile->operands()) { - MDNode *E = cast<MDNode>(Edge); - const MCSymbol *From = GetSym(E->getOperand(0)); - const MCSymbol *To = GetSym(E->getOperand(1)); - // Skip null functions. This can happen if functions are dead stripped after - // the CGProfile pass has been run. - if (!From || !To) - continue; - uint64_t Count = cast<ConstantAsMetadata>(E->getOperand(2)) - ->getValue() - ->getUniqueInteger() - .getZExtValue(); - Streamer.emitCGProfileEntry( - MCSymbolRefExpr::create(From, MCSymbolRefExpr::VK_None, C), - MCSymbolRefExpr::create(To, MCSymbolRefExpr::VK_None, C), Count); - } + emitCGProfileMetadata(Streamer, M); } MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( @@ -436,7 +436,8 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF, /*AddSegmentInfo=*/false) || Name == getInstrProfSectionName(IPSK_covfun, Triple::ELF, - /*AddSegmentInfo=*/false)) + /*AddSegmentInfo=*/false) || + Name == ".llvmbc" || Name == ".llvmcmd") return SectionKind::getMetadata(); if (Name.empty() || Name[0] != '.') return K; @@ -614,7 +615,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind, bool HasPrefix = false; if (const auto *F = dyn_cast<Function>(GO)) { if (Optional<StringRef> Prefix = F->getSectionPrefix()) { - Name += *Prefix; + raw_svector_ostream(Name) << '.' << *Prefix; HasPrefix = true; } } @@ -680,11 +681,12 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( // MD_associated in a unique section. unsigned UniqueID = MCContext::GenericSectionID; const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM); - if (LinkedToSym) { + if (GO->getMetadata(LLVMContext::MD_associated)) { UniqueID = NextUniqueID++; Flags |= ELF::SHF_LINK_ORDER; } else { - if (getContext().getAsmInfo()->useIntegratedAssembler()) { + if (getContext().getAsmInfo()->useIntegratedAssembler() || + getContext().getAsmInfo()->binutilsIsAtLeast(2, 35)) { // Symbols must be placed into sections with compatible entry // sizes. Generate unique sections for symbols that have not // been assigned to compatible sections. @@ -735,8 +737,9 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( assert(Section->getLinkedToSymbol() == LinkedToSym && "Associated symbol mismatch between sections"); - if (!getContext().getAsmInfo()->useIntegratedAssembler()) { - // If we are not using the integrated assembler then this symbol might have + if (!(getContext().getAsmInfo()->useIntegratedAssembler() || + getContext().getAsmInfo()->binutilsIsAtLeast(2, 35))) { + // If we are using GNU as before 2.35, then this symbol might have // been placed in an incompatible mergeable section. Emit an error if this // is the case to avoid creating broken output. if ((Section->getFlags() & ELF::SHF_MERGE) && @@ -831,6 +834,43 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( /* AssociatedSymbol */ nullptr); } +MCSection * +TargetLoweringObjectFileELF::getSectionForLSDA(const Function &F, + const TargetMachine &TM) const { + // If neither COMDAT nor function sections, use the monolithic LSDA section. + // Re-use this path if LSDASection is null as in the Arm EHABI. + if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections())) + return LSDASection; + + const auto *LSDA = cast<MCSectionELF>(LSDASection); + unsigned Flags = LSDA->getFlags(); + StringRef Group; + if (F.hasComdat()) { + Group = F.getComdat()->getName(); + Flags |= ELF::SHF_GROUP; + } + + // Append the function name as the suffix like GCC, assuming + // -funique-section-names applies to .gcc_except_table sections. + if (TM.getUniqueSectionNames()) + return getContext().getELFSection(LSDA->getName() + "." + F.getName(), + LSDA->getType(), Flags, 0, Group, + MCSection::NonUniqueID, nullptr); + + // Allocate a unique ID if function sections && (integrated assembler or GNU + // as>=2.35). Note we could use SHF_LINK_ORDER to facilitate --gc-sections but + // that would require that we know the linker is a modern LLD (12.0 or later). + // GNU ld as of 2.35 does not support mixed SHF_LINK_ORDER & + // non-SHF_LINK_ORDER components in an output section + // https://sourceware.org/bugzilla/show_bug.cgi?id=26256 + unsigned ID = TM.getFunctionSections() && + getContext().getAsmInfo()->useIntegratedAssembler() + ? NextUniqueID++ + : MCSection::NonUniqueID; + return getContext().getELFSection(LSDA->getName(), LSDA->getType(), Flags, 0, + Group, ID, nullptr); +} + bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( bool UsesLabelDifference, const Function &F) const { // We can always create relative relocations, so use another section @@ -865,14 +905,14 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock( assert(MBB.isBeginSection() && "Basic block does not start a section!"); unsigned UniqueID = MCContext::GenericSectionID; - // For cold sections use the .text.unlikely prefix along with the parent + // For cold sections use the .text.split. prefix along with the parent // function name. All cold blocks for the same function go to the same // section. Similarly all exception blocks are grouped by symbol name // under the .text.eh prefix. For regular sections, we either use a unique // name, or a unique ID for the section. SmallString<128> Name; if (MBB.getSectionID() == MBBSectionID::ColdSectionID) { - Name += ".text.unlikely."; + Name += BBSectionsColdTextPrefix; Name += MBB.getParent()->getName(); } else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) { Name += ".text.eh."; @@ -888,7 +928,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock( } unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; - std::string GroupName = ""; + std::string GroupName; if (F.hasComdat()) { Flags |= ELF::SHF_GROUP; GroupName = F.getComdat()->getName().str(); @@ -968,6 +1008,20 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference( MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext()); } +const MCExpr *TargetLoweringObjectFileELF::lowerDSOLocalEquivalent( + const DSOLocalEquivalent *Equiv, const TargetMachine &TM) const { + assert(supportDSOLocalEquivalentLowering()); + + const auto *GV = Equiv->getGlobalValue(); + + // A PLT entry is not needed for dso_local globals. + if (GV->isDSOLocal() || GV->isImplicitDSOLocal()) + return MCSymbolRefExpr::create(TM.getSymbol(GV), getContext()); + + return MCSymbolRefExpr::create(TM.getSymbol(GV), PLTRelativeVariantKind, + getContext()); +} + MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const { // Use ".GCC.command.line" since this feature is to support clang's // -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the @@ -1515,6 +1569,10 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( MCSymbol *Sym = TM.getSymbol(ComdatGV); StringRef COMDATSymName = Sym->getName(); + if (const auto *F = dyn_cast<Function>(GO)) + if (Optional<StringRef> Prefix = F->getSectionPrefix()) + raw_svector_ostream(Name) << '$' << *Prefix; + // Append "$symbol" to the section name *before* IR-level mangling is // applied when targetting mingw. This is what GCC does, and the ld.bfd // COFF linker will not properly handle comdats otherwise. @@ -1590,6 +1648,31 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, Module &M) const { + emitLinkerDirectives(Streamer, M); + + unsigned Version = 0; + unsigned Flags = 0; + StringRef Section; + + GetObjCImageInfo(M, Version, Flags, Section); + if (!Section.empty()) { + auto &C = getContext(); + auto *S = C.getCOFFSection(Section, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + Streamer.SwitchSection(S); + Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.emitInt32(Version); + Streamer.emitInt32(Flags); + Streamer.AddBlankLine(); + } + + emitCGProfileMetadata(Streamer, M); +} + +void TargetLoweringObjectFileCOFF::emitLinkerDirectives( + MCStreamer &Streamer, Module &M) const { if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { // Emit the linker options to the linker .drectve section. According to the // spec, this section is a space-separated string containing flags for @@ -1606,28 +1689,51 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, } } - unsigned Version = 0; - unsigned Flags = 0; - StringRef Section; - - GetObjCImageInfo(M, Version, Flags, Section); - if (Section.empty()) - return; + // Emit /EXPORT: flags for each exported global as necessary. + std::string Flags; + for (const GlobalValue &GV : M.global_values()) { + raw_string_ostream OS(Flags); + emitLinkerFlagsForGlobalCOFF(OS, &GV, getTargetTriple(), getMangler()); + OS.flush(); + if (!Flags.empty()) { + Streamer.SwitchSection(getDrectveSection()); + Streamer.emitBytes(Flags); + } + Flags.clear(); + } - auto &C = getContext(); - auto *S = C.getCOFFSection( - Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); - Streamer.SwitchSection(S); - Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); - Streamer.emitInt32(Version); - Streamer.emitInt32(Flags); - Streamer.AddBlankLine(); + // Emit /INCLUDE: flags for each used global as necessary. + if (const auto *LU = M.getNamedGlobal("llvm.used")) { + assert(LU->hasInitializer() && "expected llvm.used to have an initializer"); + assert(isa<ArrayType>(LU->getValueType()) && + "expected llvm.used to be an array type"); + if (const auto *A = cast<ConstantArray>(LU->getInitializer())) { + for (const Value *Op : A->operands()) { + const auto *GV = cast<GlobalValue>(Op->stripPointerCasts()); + // Global symbols with internal or private linkage are not visible to + // the linker, and thus would cause an error when the linker tried to + // preserve the symbol due to the `/include:` directive. + if (GV->hasLocalLinkage()) + continue; + + raw_string_ostream OS(Flags); + emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler()); + OS.flush(); + + if (!Flags.empty()) { + Streamer.SwitchSection(getDrectveSection()); + Streamer.emitBytes(Flags); + } + Flags.clear(); + } + } + } } void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); + this->TM = &TM; const Triple &T = TM.getTargetTriple(); if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { StaticCtorSection = @@ -1702,16 +1808,6 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( cast<MCSectionCOFF>(StaticDtorSection)); } -void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( - raw_ostream &OS, const GlobalValue *GV) const { - emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler()); -} - -void TargetLoweringObjectFileCOFF::emitLinkerFlagsForUsed( - raw_ostream &OS, const GlobalValue *GV) const { - emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler()); -} - const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const { @@ -1882,7 +1978,7 @@ static MCSectionWasm *selectWasmSectionForGlobal( if (const auto *F = dyn_cast<Function>(GO)) { const auto &OptionalPrefix = F->getSectionPrefix(); if (OptionalPrefix) - Name += *OptionalPrefix; + raw_svector_ostream(Name) << '.' << *OptionalPrefix; } if (EmitUniqueSection && UniqueSectionNames) { @@ -1970,14 +2066,36 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( //===----------------------------------------------------------------------===// // XCOFF //===----------------------------------------------------------------------===// +bool TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock( + const MachineFunction *MF) { + if (!MF->getLandingPads().empty()) + return true; + + const Function &F = MF->getFunction(); + if (!F.hasPersonalityFn() || !F.needsUnwindTableEntry()) + return false; + + const Function *Per = + dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + if (isNoOpWithoutInvoke(classifyEHPersonality(Per))) + return false; + + return true; +} + +MCSymbol * +TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) { + return MF->getMMI().getContext().getOrCreateSymbol( + "__ehinfo." + Twine(MF->getFunctionNumber())); +} + MCSymbol * TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, const TargetMachine &TM) const { - if (TM.getDataSections()) - report_fatal_error("XCOFF unique data sections not yet implemented"); - // We always use a qualname symbol for a GV that represents // a declaration, a function descriptor, or a common symbol. + // If a GV represents a GlobalVariable and -fdata-sections is enabled, we + // also return a qualname so that a label symbol could be avoided. // It is inherently ambiguous when the GO represents the address of a // function, as the GO could either represent a function descriptor or a // function entry point. We choose to always return a function descriptor @@ -1992,21 +2110,34 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, return cast<MCSectionXCOFF>( getSectionForFunctionDescriptor(cast<Function>(GO), TM)) ->getQualNameSymbol(); - if (GOKind.isCommon() || GOKind.isBSSLocal()) + if ((TM.getDataSections() && !GO->hasSection()) || GOKind.isCommon() || + GOKind.isBSSLocal()) return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM)) ->getQualNameSymbol(); } // For all other cases, fall back to getSymbol to return the unqualified name. - // This could change for a GV that is a GlobalVariable when we decide to - // support -fdata-sections since we could avoid having label symbols if the - // linkage name is applied to the csect symbol. return nullptr; } MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - report_fatal_error("XCOFF explicit sections not yet implemented."); + if (!GO->hasSection()) + report_fatal_error("#pragma clang section is not yet supported"); + + StringRef SectionName = GO->getSection(); + XCOFF::StorageMappingClass MappingClass; + if (Kind.isText()) + MappingClass = XCOFF::XMC_PR; + else if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS()) + MappingClass = XCOFF::XMC_RW; + else if (Kind.isReadOnly()) + MappingClass = XCOFF::XMC_RO; + else + report_fatal_error("XCOFF other section types not yet implemented."); + + return getContext().getXCOFFSection(SectionName, MappingClass, XCOFF::XTY_SD, + Kind, /* MultiSymbolsAllowed*/ true); } MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference( @@ -2016,30 +2147,23 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference( SmallString<128> Name; getNameWithPrefix(Name, GO, TM); - XCOFF::StorageClass SC = - TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); // Externals go into a csect of type ER. return getContext().getXCOFFSection( Name, isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA, XCOFF::XTY_ER, - SC, SectionKind::getMetadata()); + SectionKind::getMetadata()); } MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - assert(!TM.getFunctionSections() && !TM.getDataSections() && - "XCOFF unique sections not yet implemented."); - // Common symbols go into a csect with matching name which will get mapped // into the .bss section. if (Kind.isBSSLocal() || Kind.isCommon()) { SmallString<128> Name; getNameWithPrefix(Name, GO, TM); - XCOFF::StorageClass SC = - TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); return getContext().getXCOFFSection( Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM, - SC, Kind, /* BeginSymbolName */ nullptr); + Kind); } if (Kind.isMergeableCString()) { @@ -2051,40 +2175,65 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( SmallString<128> Name; Name = SizeSpec + utostr(Alignment.value()); + if (TM.getDataSections()) + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection( - Name, XCOFF::XMC_RO, XCOFF::XTY_SD, - TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO), - Kind, /* BeginSymbolName */ nullptr); + Name, XCOFF::XMC_RO, XCOFF::XTY_SD, Kind, + /* MultiSymbolsAllowed*/ !TM.getDataSections()); } - if (Kind.isText()) + if (Kind.isText()) { + if (TM.getFunctionSections()) { + return cast<MCSymbolXCOFF>(getFunctionEntryPointSymbol(GO, TM)) + ->getRepresentedCsect(); + } return TextSection; + } - if (Kind.isData() || Kind.isReadOnlyWithRel()) - // TODO: We may put this under option control, because user may want to - // have read-only data with relocations placed into a read-only section by - // the compiler. - return DataSection; - - // Zero initialized data must be emitted to the .data section because external - // linkage control sections that get mapped to the .bss section will be linked - // as tentative defintions, which is only appropriate for SectionKind::Common. - if (Kind.isBSS()) + // TODO: We may put Kind.isReadOnlyWithRel() under option control, because + // user may want to have read-only data with relocations placed into a + // read-only section by the compiler. + // For BSS kind, zero initialized data must be emitted to the .data section + // because external linkage control sections that get mapped to the .bss + // section will be linked as tentative defintions, which is only appropriate + // for SectionKind::Common. + if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS()) { + if (TM.getDataSections()) { + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection(Name, XCOFF::XMC_RW, XCOFF::XTY_SD, + SectionKind::getData()); + } return DataSection; + } - if (Kind.isReadOnly()) + if (Kind.isReadOnly()) { + if (TM.getDataSections()) { + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection(Name, XCOFF::XMC_RO, XCOFF::XTY_SD, + SectionKind::getReadOnly()); + } return ReadOnlySection; + } report_fatal_error("XCOFF other section types not yet implemented."); } MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable( const Function &F, const TargetMachine &TM) const { - assert (!TM.getFunctionSections() && "Unique sections not supported on XCOFF" - " yet."); assert (!F.getComdat() && "Comdat not supported on XCOFF."); - //TODO: Enable emiting jump table to unique sections when we support it. - return ReadOnlySection; + + if (!TM.getFunctionSections()) + return ReadOnlySection; + + // If the function can be removed, produce a unique section so that + // the table doesn't prevent the removal. + SmallString<128> NameStr(".rodata.jmp.."); + getNameWithPrefix(NameStr, &F, TM); + return getContext().getXCOFFSection(NameStr, XCOFF::XMC_RO, XCOFF::XTY_SD, + SectionKind::getReadOnly()); } bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection( @@ -2104,19 +2253,23 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant( void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx, const TargetMachine &TgtM) { TargetLoweringObjectFile::Initialize(Ctx, TgtM); - TTypeEncoding = 0; + TTypeEncoding = + dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel | + (TgtM.getTargetTriple().isArch32Bit() ? dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_sdata8); PersonalityEncoding = 0; LSDAEncoding = 0; + CallSiteEncoding = dwarf::DW_EH_PE_udata4; } MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection( - unsigned Priority, const MCSymbol *KeySym) const { - report_fatal_error("XCOFF ctor section not yet implemented."); + unsigned Priority, const MCSymbol *KeySym) const { + report_fatal_error("no static constructor section on AIX"); } MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection( - unsigned Priority, const MCSymbol *KeySym) const { - report_fatal_error("XCOFF dtor section not yet implemented."); + unsigned Priority, const MCSymbol *KeySym) const { + report_fatal_error("no static destructor section on AIX"); } const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference( @@ -2125,9 +2278,11 @@ const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference( report_fatal_error("XCOFF not yet implemented."); } -XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal( - const GlobalObject *GO) { - switch (GO->getLinkage()) { +XCOFF::StorageClass +TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(const GlobalValue *GV) { + assert(!isa<GlobalIFunc>(GV) && "GlobalIFunc is not supported on AIX."); + + switch (GV->getLinkage()) { case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: return XCOFF::C_HIDEXT; @@ -2149,10 +2304,32 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal( } MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol( - const Function *F, const TargetMachine &TM) const { + const GlobalValue *Func, const TargetMachine &TM) const { + assert( + (isa<Function>(Func) || + (isa<GlobalAlias>(Func) && + isa_and_nonnull<Function>(cast<GlobalAlias>(Func)->getBaseObject()))) && + "Func must be a function or an alias which has a function as base " + "object."); + SmallString<128> NameStr; NameStr.push_back('.'); - getNameWithPrefix(NameStr, F, TM); + getNameWithPrefix(NameStr, Func, TM); + + // When -function-sections is enabled and explicit section is not specified, + // it's not necessary to emit function entry point label any more. We will use + // function entry point csect instead. And for function delcarations, the + // undefined symbols gets treated as csect with XTY_ER property. + if (((TM.getFunctionSections() && !Func->hasSection()) || + Func->isDeclaration()) && + isa<Function>(Func)) { + return getContext() + .getXCOFFSection(NameStr, XCOFF::XMC_PR, + Func->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD, + SectionKind::getText()) + ->getQualNameSymbol(); + } + return getContext().getOrCreateSymbol(NameStr); } @@ -2161,13 +2338,15 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor( SmallString<128> NameStr; getNameWithPrefix(NameStr, F, TM); return getContext().getXCOFFSection(NameStr, XCOFF::XMC_DS, XCOFF::XTY_SD, - getStorageClassForGlobal(F), SectionKind::getData()); } MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( - const MCSymbol *Sym) const { + const MCSymbol *Sym, const TargetMachine &TM) const { + // Use TE storage-mapping class when large code model is enabled so that + // the chance of needing -bbigtoc is decreased. return getContext().getXCOFFSection( - cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), XCOFF::XMC_TC, - XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData()); + cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), + TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC, + XCOFF::XTY_SD, SectionKind::getData()); } diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 4866d4c171c0..0731cf9b28f4 100644 --- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -47,7 +47,11 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const { } /// NOTE: There are targets that still do not support the debug entry values -/// production. +/// production and that is being controlled with the SupportsDebugEntryValues. +/// In addition, SCE debugger does not have the feature implemented, so prefer +/// not to emit the debug entry values in that case. +/// The EnableDebugEntryValues can be used for the testing purposes. bool TargetOptions::ShouldEmitDebugEntryValues() const { - return SupportsDebugEntryValues || EnableDebugEntryValues; + return (SupportsDebugEntryValues && DebuggerTuning != DebuggerKind::SCE) || + EnableDebugEntryValues; } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index e0fdb0cefcb8..e844d03854e2 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCAsmInfo.h" @@ -41,6 +42,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/Threading.h" +#include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" @@ -120,16 +122,22 @@ static cl::opt<cl::boolOrDefault> DebugifyAndStripAll( "Debugify MIR before and Strip debug after " "each pass except those known to be unsafe when debug info is present"), cl::ZeroOrMore); -enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault }; +static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll( + "debugify-check-and-strip-all-safe", cl::Hidden, + cl::desc( + "Debugify MIR before, by checking and stripping the debug info after, " + "each pass except those known to be unsafe when debug info is present"), + cl::ZeroOrMore); // Enable or disable the MachineOutliner. static cl::opt<RunOutliner> EnableMachineOutliner( "enable-machine-outliner", cl::desc("Enable the machine outliner"), - cl::Hidden, cl::ValueOptional, cl::init(TargetDefault), - cl::values(clEnumValN(AlwaysOutline, "always", + cl::Hidden, cl::ValueOptional, cl::init(RunOutliner::TargetDefault), + cl::values(clEnumValN(RunOutliner::AlwaysOutline, "always", "Run on all functions guaranteed to be beneficial"), - clEnumValN(NeverOutline, "never", "Disable all outlining"), + clEnumValN(RunOutliner::NeverOutline, "never", + "Disable all outlining"), // Sentinel value for unspecified option. - clEnumValN(AlwaysOutline, "", ""))); + clEnumValN(RunOutliner::AlwaysOutline, "", ""))); // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -141,9 +149,11 @@ static cl::opt<cl::boolOrDefault> EnableGlobalISelOption( "global-isel", cl::Hidden, cl::desc("Enable the \"global\" instruction selector")); -static cl::opt<std::string> PrintMachineInstrs( - "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), - cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden); +// FIXME: remove this after switching to NPM or GlobalISel, whichever gets there +// first... +static cl::opt<bool> + PrintAfterISel("print-after-isel", cl::init(false), cl::Hidden, + cl::desc("Print machine instrs after ISel")); static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort( "global-isel-abort", cl::Hidden, @@ -170,7 +180,6 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); // Experimental option to use CFL-AA in codegen -enum class CFLAAType { None, Steensgaard, Andersen, Both }; static cl::opt<CFLAAType> UseCFLAA( "use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden, cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"), @@ -210,6 +219,17 @@ static cl::opt<std::string> cl::desc("Stop compilation before a specific pass"), cl::value_desc("pass-name"), cl::init(""), cl::Hidden); +/// Enable the machine function splitter pass. +static cl::opt<bool> EnableMachineFunctionSplitter( + "enable-split-machine-functions", cl::Hidden, + cl::desc("Split out cold blocks from machine functions based on profile " + "information.")); + +/// Disable the expand reductions pass for testing. +static cl::opt<bool> DisableExpandReductions( + "disable-expand-reductions", cl::init(false), cl::Hidden, + cl::desc("Disable the expand reduction intrinsics pass from running")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -294,12 +314,11 @@ struct InsertedPass { AnalysisID TargetPassID; IdentifyingPassPtr InsertedPassID; bool VerifyAfter; - bool PrintAfter; InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, - bool VerifyAfter, bool PrintAfter) + bool VerifyAfter) : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID), - VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {} + VerifyAfter(VerifyAfter) {} Pass *getInsertedPass() const { assert(InsertedPassID.isValid() && "Illegal Pass ID!"); @@ -397,6 +416,145 @@ void TargetPassConfig::setStartStopPasses() { Started = (StartAfter == nullptr) && (StartBefore == nullptr); } +CGPassBuilderOption llvm::getCGPassBuilderOption() { + CGPassBuilderOption Opt; + +#define SET_OPTION(Option) \ + if (Option.getNumOccurrences()) \ + Opt.Option = Option; + + SET_OPTION(EnableFastISelOption) + SET_OPTION(EnableGlobalISelAbort) + SET_OPTION(EnableGlobalISelOption) + SET_OPTION(EnableIPRA) + SET_OPTION(OptimizeRegAlloc) + SET_OPTION(VerifyMachineCode) + +#define SET_BOOLEAN_OPTION(Option) Opt.Option = Option; + + SET_BOOLEAN_OPTION(EarlyLiveIntervals) + SET_BOOLEAN_OPTION(EnableBlockPlacementStats) + SET_BOOLEAN_OPTION(EnableImplicitNullChecks) + SET_BOOLEAN_OPTION(EnableMachineOutliner) + SET_BOOLEAN_OPTION(MISchedPostRA) + SET_BOOLEAN_OPTION(UseCFLAA) + SET_BOOLEAN_OPTION(DisableMergeICmps) + SET_BOOLEAN_OPTION(DisableLSR) + SET_BOOLEAN_OPTION(DisableConstantHoisting) + SET_BOOLEAN_OPTION(DisableCGP) + SET_BOOLEAN_OPTION(DisablePartialLibcallInlining) + SET_BOOLEAN_OPTION(PrintLSR) + SET_BOOLEAN_OPTION(PrintISelInput) + SET_BOOLEAN_OPTION(PrintGCInfo) + + return Opt; +} + +static void registerPartialPipelineCallback(PassInstrumentationCallbacks &PIC, + LLVMTargetMachine &LLVMTM) { + StringRef StartBefore; + StringRef StartAfter; + StringRef StopBefore; + StringRef StopAfter; + + unsigned StartBeforeInstanceNum = 0; + unsigned StartAfterInstanceNum = 0; + unsigned StopBeforeInstanceNum = 0; + unsigned StopAfterInstanceNum = 0; + + std::tie(StartBefore, StartBeforeInstanceNum) = + getPassNameAndInstanceNum(StartBeforeOpt); + std::tie(StartAfter, StartAfterInstanceNum) = + getPassNameAndInstanceNum(StartAfterOpt); + std::tie(StopBefore, StopBeforeInstanceNum) = + getPassNameAndInstanceNum(StopBeforeOpt); + std::tie(StopAfter, StopAfterInstanceNum) = + getPassNameAndInstanceNum(StopAfterOpt); + + if (StartBefore.empty() && StartAfter.empty() && StopBefore.empty() && + StopAfter.empty()) + return; + + std::tie(StartBefore, std::ignore) = + LLVMTM.getPassNameFromLegacyName(StartBefore); + std::tie(StartAfter, std::ignore) = + LLVMTM.getPassNameFromLegacyName(StartAfter); + std::tie(StopBefore, std::ignore) = + LLVMTM.getPassNameFromLegacyName(StopBefore); + std::tie(StopAfter, std::ignore) = + LLVMTM.getPassNameFromLegacyName(StopAfter); + if (!StartBefore.empty() && !StartAfter.empty()) + report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") + + Twine(StartAfterOptName) + Twine(" specified!")); + if (!StopBefore.empty() && !StopAfter.empty()) + report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") + + Twine(StopAfterOptName) + Twine(" specified!")); + + PIC.registerShouldRunOptionalPassCallback( + [=, EnableCurrent = StartBefore.empty() && StartAfter.empty(), + EnableNext = Optional<bool>(), StartBeforeCount = 0u, + StartAfterCount = 0u, StopBeforeCount = 0u, + StopAfterCount = 0u](StringRef P, Any) mutable { + bool StartBeforePass = !StartBefore.empty() && P.contains(StartBefore); + bool StartAfterPass = !StartAfter.empty() && P.contains(StartAfter); + bool StopBeforePass = !StopBefore.empty() && P.contains(StopBefore); + bool StopAfterPass = !StopAfter.empty() && P.contains(StopAfter); + + // Implement -start-after/-stop-after + if (EnableNext) { + EnableCurrent = *EnableNext; + EnableNext.reset(); + } + + // Using PIC.registerAfterPassCallback won't work because if this + // callback returns false, AfterPassCallback is also skipped. + if (StartAfterPass && StartAfterCount++ == StartAfterInstanceNum) { + assert(!EnableNext && "Error: assign to EnableNext more than once"); + EnableNext = true; + } + if (StopAfterPass && StopAfterCount++ == StopAfterInstanceNum) { + assert(!EnableNext && "Error: assign to EnableNext more than once"); + EnableNext = false; + } + + if (StartBeforePass && StartBeforeCount++ == StartBeforeInstanceNum) + EnableCurrent = true; + if (StopBeforePass && StopBeforeCount++ == StopBeforeInstanceNum) + EnableCurrent = false; + return EnableCurrent; + }); +} + +void llvm::registerCodeGenCallback(PassInstrumentationCallbacks &PIC, + LLVMTargetMachine &LLVMTM) { + + // Register a callback for disabling passes. + PIC.registerShouldRunOptionalPassCallback([](StringRef P, Any) { + +#define DISABLE_PASS(Option, Name) \ + if (Option && P.contains(#Name)) \ + return false; + DISABLE_PASS(DisableBlockPlacement, MachineBlockPlacementPass) + DISABLE_PASS(DisableBranchFold, BranchFolderPass) + DISABLE_PASS(DisableCopyProp, MachineCopyPropagationPass) + DISABLE_PASS(DisableEarlyIfConversion, EarlyIfConverterPass) + DISABLE_PASS(DisableEarlyTailDup, EarlyTailDuplicatePass) + DISABLE_PASS(DisableMachineCSE, MachineCSEPass) + DISABLE_PASS(DisableMachineDCE, DeadMachineInstructionElimPass) + DISABLE_PASS(DisableMachineLICM, EarlyMachineLICMPass) + DISABLE_PASS(DisableMachineSink, MachineSinkingPass) + DISABLE_PASS(DisablePostRAMachineLICM, MachineLICMPass) + DISABLE_PASS(DisablePostRAMachineSink, PostRAMachineSinkingPass) + DISABLE_PASS(DisablePostRASched, PostRASchedulerPass) + DISABLE_PASS(DisableSSC, StackSlotColoringPass) + DISABLE_PASS(DisableTailDuplicate, TailDuplicatePass) + + return true; + }); + + registerPartialPipelineCallback(PIC, LLVMTM); +} + // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) @@ -411,9 +569,6 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); - if (StringRef(PrintMachineInstrs.getValue()).equals("")) - TM.Options.PrintMachineCode = true; - if (EnableIPRA.getNumOccurrences()) TM.Options.EnableIPRA = EnableIPRA; else { @@ -437,14 +592,13 @@ CodeGenOpt::Level TargetPassConfig::getOptLevel() const { /// Insert InsertedPassID pass after TargetPassID. void TargetPassConfig::insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, - bool VerifyAfter, bool PrintAfter) { + bool VerifyAfter) { assert(((!InsertedPassID.isInstance() && TargetPassID != InsertedPassID.getID()) || (InsertedPassID.isInstance() && TargetPassID != InsertedPassID.getInstance()->getPassID())) && "Insert a pass after itself!"); - Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter, - PrintAfter); + Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter); } /// createPassConfig - Create a pass configuration object to be used by @@ -522,7 +676,7 @@ bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const { /// a later pass or that it should stop after an earlier pass, then do not add /// the pass. Finally, compare the current pass against the StartAfter /// and StopAfter options and change the Started/Stopped flags accordingly. -void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { +void TargetPassConfig::addPass(Pass *P, bool verifyAfter) { assert(!Initialized && "PassConfig is immutable"); // Cache the Pass ID here in case the pass manager finds this pass is @@ -540,17 +694,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { addMachinePrePasses(); std::string Banner; // Construct banner message before PM->add() as that may delete the pass. - if (AddingMachinePasses && (printAfter || verifyAfter)) + if (AddingMachinePasses && verifyAfter) Banner = std::string("After ") + std::string(P->getPassName()); PM->add(P); if (AddingMachinePasses) - addMachinePostPasses(Banner, /*AllowPrint*/ printAfter, - /*AllowVerify*/ verifyAfter); + addMachinePostPasses(Banner, /*AllowVerify*/ verifyAfter); // Add the passes after the pass P if there is any. - for (auto IP : Impl->InsertedPasses) { + for (const auto &IP : Impl->InsertedPasses) { if (IP.TargetPassID == PassID) - addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter); + addPass(IP.getInsertedPass(), IP.VerifyAfter); } } else { delete P; @@ -570,8 +723,7 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { /// /// addPass cannot return a pointer to the pass instance because is internal the /// PassManager and the instance we create here may already be freed. -AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter, - bool printAfter) { +AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) { IdentifyingPassPtr TargetID = getPassSubstitution(PassID); IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID); if (!FinalPtr.isValid()) @@ -586,7 +738,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter, llvm_unreachable("Pass ID not registered"); } AnalysisID FinalID = P->getPassID(); - addPass(P, verifyAfter, printAfter); // Ends the lifetime of P. + addPass(P, verifyAfter); // Ends the lifetime of P. return FinalID; } @@ -597,7 +749,7 @@ void TargetPassConfig::printAndVerify(const std::string &Banner) { } void TargetPassConfig::addPrintPass(const std::string &Banner) { - if (TM->shouldPrintMachineCode()) + if (PrintAfterISel) PM->add(createMachineFunctionPrinterPass(dbgs(), Banner)); } @@ -619,18 +771,26 @@ void TargetPassConfig::addStripDebugPass() { PM->add(createStripDebugMachineModulePass(/*OnlyDebugified=*/true)); } +void TargetPassConfig::addCheckDebugPass() { + PM->add(createCheckDebugMachineModulePass()); +} + void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) { - if (AllowDebugify && DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe) + if (AllowDebugify && DebugifyIsSafe && + (DebugifyAndStripAll == cl::BOU_TRUE || + DebugifyCheckAndStripAll == cl::BOU_TRUE)) addDebugifyPass(); } void TargetPassConfig::addMachinePostPasses(const std::string &Banner, - bool AllowPrint, bool AllowVerify, - bool AllowStrip) { - if (DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe) - addStripDebugPass(); - if (AllowPrint) - addPrintPass(Banner); + bool AllowVerify, bool AllowStrip) { + if (DebugifyIsSafe) { + if (DebugifyCheckAndStripAll == cl::BOU_TRUE) { + addCheckDebugPass(); + addStripDebugPass(); + } else if (DebugifyAndStripAll == cl::BOU_TRUE) + addStripDebugPass(); + } if (AllowVerify) addVerifyPass(Banner); } @@ -707,10 +867,12 @@ void TargetPassConfig::addIRPasses() { // Add scalarization of target's unsupported masked memory intrinsics pass. // the unsupported intrinsic will be replaced with a chain of basic blocks, // that stores/loads element one-by-one if the appropriate mask bit is set. - addPass(createScalarizeMaskedMemIntrinPass()); + addPass(createScalarizeMaskedMemIntrinLegacyPass()); // Expand reduction intrinsics into shuffle sequences if the target wants to. - addPass(createExpandReductionsPass()); + // Allow disabling it for testing purposes. + if (!DisableExpandReductions) + addPass(createExpandReductionsPass()); } /// Turn exception handling constructs into something the code generators can @@ -730,6 +892,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { LLVM_FALLTHROUGH; case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: + case ExceptionHandling::AIX: addPass(createDwarfEHPass(getOptLevel())); break; case ExceptionHandling::WinEH: @@ -879,7 +1042,7 @@ bool TargetPassConfig::addISelPasses() { addPass(createLowerEmuTLSPass()); addPass(createPreISelIntrinsicLoweringPass()); - addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); addIRPasses(); addCodeGenPrepare(); addPassesToHandleExceptions(); @@ -916,20 +1079,6 @@ static cl::opt<RegisterRegAlloc::FunctionPassCtor, false, void TargetPassConfig::addMachinePasses() { AddingMachinePasses = true; - // Insert a machine instr printer pass after the specified pass. - StringRef PrintMachineInstrsPassName = PrintMachineInstrs.getValue(); - if (!PrintMachineInstrsPassName.equals("") && - !PrintMachineInstrsPassName.equals("option-unspecified")) { - if (const PassInfo *TPI = getPassInfo(PrintMachineInstrsPassName)) { - const PassRegistry *PR = PassRegistry::getPassRegistry(); - const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer")); - assert(IPI && "failed to get \"machineinstr-printer\" PassInfo!"); - const char *TID = (const char *)(TPI->getTypeInfo()); - const char *IID = (const char *)(IPI->getTypeInfo()); - insertPass(TID, IID); - } - } - // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { addMachineSSAOptimization(); @@ -1000,7 +1149,7 @@ void TargetPassConfig::addMachinePasses() { // GC if (addGCPasses()) { if (PrintGCInfo) - addPass(createGCInfoPrinter(dbgs()), false, false); + addPass(createGCInfoPrinter(dbgs()), false); } // Basic block placement. @@ -1028,20 +1177,31 @@ void TargetPassConfig::addMachinePasses() { addPass(&LiveDebugValuesID, false); if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && - EnableMachineOutliner != NeverOutline) { - bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline); - bool AddOutliner = RunOnAllFunctions || - TM->Options.SupportsDefaultOutlining; + EnableMachineOutliner != RunOutliner::NeverOutline) { + bool RunOnAllFunctions = + (EnableMachineOutliner == RunOutliner::AlwaysOutline); + bool AddOutliner = + RunOnAllFunctions || TM->Options.SupportsDefaultOutlining; if (AddOutliner) addPass(createMachineOutlinerPass(RunOnAllFunctions)); } - if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) - addPass(llvm::createBBSectionsPreparePass(TM->getBBSectionsFuncListBuf())); + // Machine function splitter uses the basic block sections feature. Both + // cannot be enabled at the same time. + if (TM->Options.EnableMachineFunctionSplitter || + EnableMachineFunctionSplitter) { + addPass(createMachineFunctionSplitterPass()); + } else if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { + addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf())); + } // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); + // Insert pseudo probe annotation for callsite profiling + if (TM->Options.PseudoProbeForProfiling) + addPass(createPseudoProbeInserter()); + AddingMachinePasses = false; } @@ -1148,7 +1308,7 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { return createTargetRegisterAllocator(Optimized); } -bool TargetPassConfig::addRegAssignmentFast() { +bool TargetPassConfig::addRegAssignAndRewriteFast() { if (RegAlloc != &useDefaultRegisterAllocator && RegAlloc != &createFastRegisterAllocator) report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc."); @@ -1157,7 +1317,7 @@ bool TargetPassConfig::addRegAssignmentFast() { return true; } -bool TargetPassConfig::addRegAssignmentOptimized() { +bool TargetPassConfig::addRegAssignAndRewriteOptimized() { // Add the selected register allocation pass. addPass(createRegAllocPass(true)); @@ -1167,12 +1327,6 @@ bool TargetPassConfig::addRegAssignmentOptimized() { // Finally rewrite virtual registers. addPass(&VirtRegRewriterID); - // Perform stack slot coloring and post-ra machine LICM. - // - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. - addPass(&StackSlotColoringID); - return true; } @@ -1188,7 +1342,7 @@ void TargetPassConfig::addFastRegAlloc() { addPass(&PHIEliminationID, false); addPass(&TwoAddressInstructionPassID, false); - addRegAssignmentFast(); + addRegAssignAndRewriteFast(); } /// Add standard target-independent passes that are tightly coupled with @@ -1205,6 +1359,11 @@ void TargetPassConfig::addOptimizedRegAlloc() { // LiveVariables can be removed completely, and LiveIntervals can be directly // computed. (We still either need to regenerate kill flags after regalloc, or // preferably fix the scavenger to not depend on them). + // FIXME: UnreachableMachineBlockElim is a dependant pass of LiveVariables. + // When LiveVariables is removed this has to be removed/moved either. + // Explicit addition of UnreachableMachineBlockElim allows stopping before or + // after it with -stop-before/-stop-after. + addPass(&UnreachableMachineBlockElimID, false); addPass(&LiveVariablesID, false); // Edge splitting is smarter with machine loop info. @@ -1226,7 +1385,13 @@ void TargetPassConfig::addOptimizedRegAlloc() { // PreRA instruction scheduling. addPass(&MachineSchedulerID); - if (addRegAssignmentOptimized()) { + if (addRegAssignAndRewriteOptimized()) { + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(&StackSlotColoringID); + // Allow targets to expand pseudo instructions depending on the choice of // registers before MachineCopyPropagation. addPostRewrite(); diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index e2ef12d8ac77..5fd7eef5808f 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" @@ -68,7 +69,7 @@ bool TargetRegisterInfo::shouldRegionSplitForVirtReg( const MachineFunction &MF, const LiveInterval &VirtReg) const { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg); + MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg()); if (MI && TII->isTriviallyReMaterializable(*MI) && VirtReg.size() > HugeSizeForSplit) return false; @@ -532,6 +533,56 @@ TargetRegisterInfo::lookThruCopyLike(Register SrcReg, } } +Register TargetRegisterInfo::lookThruSingleUseCopyChain( + Register SrcReg, const MachineRegisterInfo *MRI) const { + while (true) { + const MachineInstr *MI = MRI->getVRegDef(SrcReg); + // Found the real definition, return it if it has a single use. + if (!MI->isCopyLike()) + return MRI->hasOneNonDBGUse(SrcReg) ? SrcReg : Register(); + + Register CopySrcReg; + if (MI->isCopy()) + CopySrcReg = MI->getOperand(1).getReg(); + else { + assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike"); + CopySrcReg = MI->getOperand(2).getReg(); + } + + // Continue only if the next definition in the chain is for a virtual + // register that has a single use. + if (!CopySrcReg.isVirtual() || !MRI->hasOneNonDBGUse(CopySrcReg)) + return Register(); + + SrcReg = CopySrcReg; + } +} + +void TargetRegisterInfo::getOffsetOpcodes( + const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const { + assert(!Offset.getScalable() && "Scalable offsets are not handled"); + DIExpression::appendOffset(Ops, Offset.getFixed()); +} + +DIExpression * +TargetRegisterInfo::prependOffsetExpression(const DIExpression *Expr, + unsigned PrependFlags, + const StackOffset &Offset) const { + assert((PrependFlags & + ~(DIExpression::DerefBefore | DIExpression::DerefAfter | + DIExpression::StackValue | DIExpression::EntryValue)) == 0 && + "Unsupported prepend flag"); + SmallVector<uint64_t, 16> OffsetExpr; + if (PrependFlags & DIExpression::DerefBefore) + OffsetExpr.push_back(dwarf::DW_OP_deref); + getOffsetOpcodes(Offset, OffsetExpr); + if (PrependFlags & DIExpression::DerefAfter) + OffsetExpr.push_back(dwarf::DW_OP_deref); + return DIExpression::prependOpcodes(Expr, OffsetExpr, + PrependFlags & DIExpression::StackValue, + PrependFlags & DIExpression::EntryValue); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void TargetRegisterInfo::dumpReg(Register Reg, unsigned SubRegIndex, diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp index 63766df4d2be..e4520d8ccb1e 100644 --- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -15,13 +15,12 @@ using namespace llvm; TargetSubtargetInfo::TargetSubtargetInfo( - const Triple &TT, StringRef CPU, StringRef FS, + const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD, - const MCWriteProcResEntry *WPR, - const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, - const InstrStage *IS, const unsigned *OC, const unsigned *FP) - : MCSubtargetInfo(TT, CPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) { -} + const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, + const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, + const unsigned *FP) + : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {} TargetSubtargetInfo::~TargetSubtargetInfo() = default; diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index de336abe607a..ecee4aed7f88 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -70,7 +70,6 @@ STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); -STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); @@ -109,47 +108,38 @@ class TwoAddressInstructionPass : public MachineFunctionPass { // Set of already processed instructions in the current block. SmallPtrSet<MachineInstr*, 8> Processed; - // Set of instructions converted to three-address by target and then sunk - // down current basic block. - SmallPtrSet<MachineInstr*, 8> SunkInstrs; - // A map from virtual registers to physical registers which are likely targets // to be coalesced to due to copies from physical registers to virtual // registers. e.g. v1024 = move r0. - DenseMap<unsigned, unsigned> SrcRegMap; + DenseMap<Register, Register> SrcRegMap; // A map from virtual registers to physical registers which are likely targets // to be coalesced to due to copies to physical registers from virtual // registers. e.g. r1 = move v1024. - DenseMap<unsigned, unsigned> DstRegMap; - - bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, - MachineBasicBlock::iterator OldPos); + DenseMap<Register, Register> DstRegMap; - bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen); + bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen); - bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); + bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef); - bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + bool isProfitableToCommute(Register RegA, Register RegB, Register RegC, MachineInstr *MI, unsigned Dist); bool commuteInstruction(MachineInstr *MI, unsigned DstIdx, unsigned RegBIdx, unsigned RegCIdx, unsigned Dist); - bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); + bool isProfitableToConv3Addr(Register RegA, Register RegB); bool convertInstTo3Addr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned RegA, unsigned RegB, unsigned Dist); + MachineBasicBlock::iterator &nmi, Register RegA, + Register RegB, unsigned Dist); - bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI); + bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI); bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg); + MachineBasicBlock::iterator &nmi, Register Reg); bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg); + MachineBasicBlock::iterator &nmi, Register Reg); bool tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -161,7 +151,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { unsigned BaseOpIdx, bool BaseOpKilled, unsigned Dist); - void scanUses(unsigned DstReg); + void scanUses(Register DstReg); void processCopy(MachineInstr *MI); @@ -207,140 +197,10 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) -static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); - -/// A two-address instruction has been converted to a three-address instruction -/// to avoid clobbering a register. Try to sink it past the instruction that -/// would kill the above mentioned register to reduce register pressure. -bool TwoAddressInstructionPass:: -sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, - MachineBasicBlock::iterator OldPos) { - // FIXME: Shouldn't we be trying to do this before we three-addressify the - // instruction? After this transformation is done, we no longer need - // the instruction to be in three-address form. - - // Check if it's safe to move this instruction. - bool SeenStore = true; // Be conservative. - if (!MI->isSafeToMove(AA, SeenStore)) - return false; - - unsigned DefReg = 0; - SmallSet<unsigned, 4> UseRegs; - - for (const MachineOperand &MO : MI->operands()) { - if (!MO.isReg()) - continue; - Register MOReg = MO.getReg(); - if (!MOReg) - continue; - if (MO.isUse() && MOReg != SavedReg) - UseRegs.insert(MO.getReg()); - if (!MO.isDef()) - continue; - if (MO.isImplicit()) - // Don't try to move it if it implicitly defines a register. - return false; - if (DefReg) - // For now, don't move any instructions that define multiple registers. - return false; - DefReg = MO.getReg(); - } - - // Find the instruction that kills SavedReg. - MachineInstr *KillMI = nullptr; - if (LIS) { - LiveInterval &LI = LIS->getInterval(SavedReg); - assert(LI.end() != LI.begin() && - "Reg should not have empty live interval."); - - SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); - LiveInterval::const_iterator I = LI.find(MBBEndIdx); - if (I != LI.end() && I->start < MBBEndIdx) - return false; - - --I; - KillMI = LIS->getInstructionFromIndex(I->end); - } - if (!KillMI) { - for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) { - if (!UseMO.isKill()) - continue; - KillMI = UseMO.getParent(); - break; - } - } - - // If we find the instruction that kills SavedReg, and it is in an - // appropriate location, we can try to sink the current instruction - // past it. - if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - MachineBasicBlock::iterator(KillMI) == OldPos || KillMI->isTerminator()) - return false; - - // If any of the definitions are used by another instruction between the - // position and the kill use, then it's not safe to sink it. - // - // FIXME: This can be sped up if there is an easy way to query whether an - // instruction is before or after another instruction. Then we can use - // MachineRegisterInfo def / use instead. - MachineOperand *KillMO = nullptr; - MachineBasicBlock::iterator KillPos = KillMI; - ++KillPos; - - unsigned NumVisited = 0; - for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) { - // Debug instructions cannot be counted against the limit. - if (OtherMI.isDebugInstr()) - continue; - if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. - return false; - ++NumVisited; - for (unsigned i = 0, e = OtherMI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = OtherMI.getOperand(i); - if (!MO.isReg()) - continue; - Register MOReg = MO.getReg(); - if (!MOReg) - continue; - if (DefReg == MOReg) - return false; - - if (MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))) { - if (&OtherMI == KillMI && MOReg == SavedReg) - // Save the operand that kills the register. We want to unset the kill - // marker if we can sink MI past it. - KillMO = &MO; - else if (UseRegs.count(MOReg)) - // One of the uses is killed before the destination. - return false; - } - } - } - assert(KillMO && "Didn't find kill"); - - if (!LIS) { - // Update kill and LV information. - KillMO->setIsKill(false); - KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); - KillMO->setIsKill(true); - - if (LV) - LV->replaceKillInstruction(SavedReg, *KillMI, *MI); - } - - // Move instruction to its destination. - MBB->remove(MI); - MBB->insert(KillPos, MI); - - if (LIS) - LIS->handleMove(*MI); - - ++Num3AddrSunk; - return true; -} +static bool isPlainlyKilled(MachineInstr *MI, Register Reg, LiveIntervals *LIS); /// Return the MachineInstr* if it is the single def of the Reg in current BB. -static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB, +static MachineInstr *getSingleDef(Register Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI) { MachineInstr *Ret = nullptr; for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { @@ -361,9 +221,9 @@ static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB, /// %Tmp2 = copy %ToReg; /// MaxLen specifies the maximum length of the copy chain the func /// can walk through. -bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg, +bool TwoAddressInstructionPass::isRevCopyChain(Register FromReg, Register ToReg, int Maxlen) { - unsigned TmpReg = FromReg; + Register TmpReg = FromReg; for (int i = 0; i < Maxlen; i++) { MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI); if (!Def || !Def->isCopy()) @@ -381,7 +241,7 @@ bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg, /// in the MBB that defines the specified register and the two-address /// instruction which is being processed. It also returns the last def location /// by reference. -bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, +bool TwoAddressInstructionPass::noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef) { LastDef = 0; unsigned LastUse = Dist; @@ -405,8 +265,8 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, /// instruction. It also returns the source and destination registers and /// whether they are physical registers by reference. static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, - unsigned &SrcReg, unsigned &DstReg, - bool &IsSrcPhys, bool &IsDstPhys) { + Register &SrcReg, Register &DstReg, bool &IsSrcPhys, + bool &IsDstPhys) { SrcReg = 0; DstReg = 0; if (MI.isCopy()) { @@ -415,19 +275,20 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, } else if (MI.isInsertSubreg() || MI.isSubregToReg()) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(2).getReg(); - } else + } else { return false; + } - IsSrcPhys = Register::isPhysicalRegister(SrcReg); - IsDstPhys = Register::isPhysicalRegister(DstReg); + IsSrcPhys = SrcReg.isPhysical(); + IsDstPhys = DstReg.isPhysical(); return true; } /// Test if the given register value, which is used by the /// given instruction, is killed by the given instruction. -static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, +static bool isPlainlyKilled(MachineInstr *MI, Register Reg, LiveIntervals *LIS) { - if (LIS && Register::isVirtualRegister(Reg) && !LIS->isNotInMIMap(*MI)) { + if (LIS && Reg.isVirtual() && !LIS->isNotInMIMap(*MI)) { // FIXME: Sometimes tryInstructionTransform() will add instructions and // test whether they can be folded before keeping them. In this case it // sets a kill before recursively calling tryInstructionTransform() again. @@ -466,20 +327,17 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, /// /// If allowFalsePositives is true then likely kills are treated as kills even /// if it can't be proven that they are kills. -static bool isKilled(MachineInstr &MI, unsigned Reg, - const MachineRegisterInfo *MRI, - const TargetInstrInfo *TII, - LiveIntervals *LIS, - bool allowFalsePositives) { +static bool isKilled(MachineInstr &MI, Register Reg, + const MachineRegisterInfo *MRI, const TargetInstrInfo *TII, + LiveIntervals *LIS, bool allowFalsePositives) { MachineInstr *DefMI = &MI; while (true) { // All uses of physical registers are likely to be kills. - if (Register::isPhysicalRegister(Reg) && - (allowFalsePositives || MRI->hasOneUse(Reg))) + if (Reg.isPhysical() && (allowFalsePositives || MRI->hasOneUse(Reg))) return true; if (!isPlainlyKilled(DefMI, Reg, LIS)) return false; - if (Register::isPhysicalRegister(Reg)) + if (Reg.isPhysical()) return true; MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); // If there are multiple defs, we can't do a simple analysis, so just @@ -488,7 +346,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, return true; DefMI = Begin->getParent(); bool IsSrcPhys, IsDstPhys; - unsigned SrcReg, DstReg; + Register SrcReg, DstReg; // If the def is something other than a copy, then it isn't going to // be coalesced, so follow the kill flag. if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) @@ -499,7 +357,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, /// Return true if the specified MI uses the specified register as a two-address /// use. If so, return the destination register by reference. -static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { +static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) { for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) @@ -515,19 +373,17 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { /// Given a register, if has a single in-basic block use, return the use /// instruction if it's a copy or a two-address use. -static -MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, - MachineRegisterInfo *MRI, - const TargetInstrInfo *TII, - bool &IsCopy, - unsigned &DstReg, bool &IsDstPhys) { +static MachineInstr * +findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB, + MachineRegisterInfo *MRI, const TargetInstrInfo *TII, + bool &IsCopy, Register &DstReg, bool &IsDstPhys) { if (!MRI->hasOneNonDBGUse(Reg)) // None or more than one use. return nullptr; MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg); if (UseMI.getParent() != MBB) return nullptr; - unsigned SrcReg; + Register SrcReg; bool IsSrcPhys; if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { IsCopy = true; @@ -535,7 +391,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, } IsDstPhys = false; if (isTwoAddrUse(UseMI, Reg, DstReg)) { - IsDstPhys = Register::isPhysicalRegister(DstReg); + IsDstPhys = DstReg.isPhysical(); return &UseMI; } return nullptr; @@ -543,22 +399,22 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, /// Return the physical register the specified virtual register might be mapped /// to. -static unsigned -getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) { - while (Register::isVirtualRegister(Reg)) { - DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg); +static MCRegister getMappedReg(Register Reg, + DenseMap<Register, Register> &RegMap) { + while (Reg.isVirtual()) { + DenseMap<Register, Register>::iterator SI = RegMap.find(Reg); if (SI == RegMap.end()) return 0; Reg = SI->second; } - if (Register::isPhysicalRegister(Reg)) + if (Reg.isPhysical()) return Reg; return 0; } /// Return true if the two registers are equal or aliased. -static bool -regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { +static bool regsAreCompatible(Register RegA, Register RegB, + const TargetRegisterInfo *TRI) { if (RegA == RegB) return true; if (!RegA || !RegB) @@ -567,7 +423,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { } // Returns true if Reg is equal or aliased to at least one register in Set. -static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg, +static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg, const TargetRegisterInfo *TRI) { for (unsigned R : Set) if (TRI->regsOverlap(R, Reg)) @@ -578,10 +434,11 @@ static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg, /// Return true if it's potentially profitable to commute the two-address /// instruction that's being processed. -bool -TwoAddressInstructionPass:: -isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, - MachineInstr *MI, unsigned Dist) { +bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA, + Register RegB, + Register RegC, + MachineInstr *MI, + unsigned Dist) { if (OptLevel == CodeGenOpt::None) return false; @@ -603,7 +460,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // insert => %reg1030 = COPY %reg1029 // %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags - if (!isPlainlyKilled(MI, regC, LIS)) + if (!isPlainlyKilled(MI, RegC, LIS)) return false; // Ok, we have something like: @@ -616,10 +473,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // %reg1026 = ADD %reg1024, %reg1025 // r0 = MOV %reg1026 // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. - unsigned ToRegA = getMappedReg(regA, DstRegMap); + MCRegister ToRegA = getMappedReg(RegA, DstRegMap); if (ToRegA) { - unsigned FromRegB = getMappedReg(regB, SrcRegMap); - unsigned FromRegC = getMappedReg(regC, SrcRegMap); + MCRegister FromRegB = getMappedReg(RegB, SrcRegMap); + MCRegister FromRegC = getMappedReg(RegC, SrcRegMap); bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI); bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI); @@ -637,16 +494,16 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, return false; } - // If there is a use of regC between its last def (could be livein) and this + // If there is a use of RegC between its last def (could be livein) and this // instruction, then bail. unsigned LastDefC = 0; - if (!noUseAfterLastDef(regC, Dist, LastDefC)) + if (!noUseAfterLastDef(RegC, Dist, LastDefC)) return false; - // If there is a use of regB between its last def (could be livein) and this + // If there is a use of RegB between its last def (could be livein) and this // instruction, then go ahead and make this transformation. unsigned LastDefB = 0; - if (!noUseAfterLastDef(regB, Dist, LastDefB)) + if (!noUseAfterLastDef(RegB, Dist, LastDefB)) return true; // Look for situation like this: @@ -664,14 +521,14 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // To more generally minimize register copies, ideally the logic of two addr // instruction pass should be integrated with register allocation pass where // interference graph is available. - if (isRevCopyChain(regC, regA, MaxDataFlowEdge)) + if (isRevCopyChain(RegC, RegA, MaxDataFlowEdge)) return true; - if (isRevCopyChain(regB, regA, MaxDataFlowEdge)) + if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge)) return false; // Since there are no intervening uses for both registers, then commute - // if the def of regC is closer. Its live interval is shorter. + // if the def of RegC is closer. Its live interval is shorter. return LastDefB && LastDefC && LastDefC > LastDefB; } @@ -697,7 +554,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, "instruction unless it was requested."); // Update source register map. - unsigned FromRegC = getMappedReg(RegC, SrcRegMap); + MCRegister FromRegC = getMappedReg(RegC, SrcRegMap); if (FromRegC) { Register RegA = MI->getOperand(DstIdx).getReg(); SrcRegMap[RegA] = FromRegC; @@ -708,28 +565,26 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, /// Return true if it is profitable to convert the given 2-address instruction /// to a 3-address one. -bool -TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ +bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA, + Register RegB) { // Look for situations like this: // %reg1024 = MOV r1 // %reg1025 = MOV r0 // %reg1026 = ADD %reg1024, %reg1025 // r2 = MOV %reg1026 // Turn ADD into a 3-address instruction to avoid a copy. - unsigned FromRegB = getMappedReg(RegB, SrcRegMap); + MCRegister FromRegB = getMappedReg(RegB, SrcRegMap); if (!FromRegB) return false; - unsigned ToRegA = getMappedReg(RegA, DstRegMap); + MCRegister ToRegA = getMappedReg(RegA, DstRegMap); return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } /// Convert the specified two-address instruction into a three address one. /// Return true if this transformation was successful. -bool -TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned RegA, unsigned RegB, - unsigned Dist) { +bool TwoAddressInstructionPass::convertInstTo3Addr( + MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, + Register RegA, Register RegB, unsigned Dist) { // FIXME: Why does convertToThreeAddress() need an iterator reference? MachineFunction::iterator MFI = MBB->getIterator(); MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV); @@ -740,26 +595,33 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); - bool Sunk = false; if (LIS) LIS->ReplaceMachineInstrInMaps(*mi, *NewMI); - if (NewMI->findRegisterUseOperand(RegB, false, TRI)) - // FIXME: Temporary workaround. If the new instruction doesn't - // uses RegB, convertToThreeAddress must have created more - // then one instruction. - Sunk = sink3AddrInstruction(NewMI, RegB, mi); + // If the old instruction is debug value tracked, an update is required. + if (auto OldInstrNum = mi->peekDebugInstrNum()) { + // Sanity check. + assert(mi->getNumExplicitDefs() == 1); + assert(NewMI->getNumExplicitDefs() == 1); + + // Find the old and new def location. + auto OldIt = mi->defs().begin(); + auto NewIt = NewMI->defs().begin(); + unsigned OldIdx = mi->getOperandNo(OldIt); + unsigned NewIdx = NewMI->getOperandNo(NewIt); + + // Record that one def has been replaced by the other. + unsigned NewInstrNum = NewMI->getDebugInstrNum(); + MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx), + std::make_pair(NewInstrNum, NewIdx)); + } MBB->erase(mi); // Nuke the old inst. - if (!Sunk) { - DistanceMap.insert(std::make_pair(NewMI, Dist)); - mi = NewMI; - nmi = std::next(mi); - } - else - SunkInstrs.insert(NewMI); + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = std::next(mi); // Update source and destination register maps. SrcRegMap.erase(RegA); @@ -769,13 +631,12 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, /// Scan forward recursively for only uses, update maps if the use is a copy or /// a two-address instruction. -void -TwoAddressInstructionPass::scanUses(unsigned DstReg) { - SmallVector<unsigned, 4> VirtRegPairs; +void TwoAddressInstructionPass::scanUses(Register DstReg) { + SmallVector<Register, 4> VirtRegPairs; bool IsDstPhys; bool IsCopy = false; - unsigned NewReg = 0; - unsigned Reg = DstReg; + Register NewReg; + Register Reg = DstReg; while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, NewReg, IsDstPhys)) { if (IsCopy && !Processed.insert(UseMI).second) @@ -831,13 +692,13 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { return; bool IsSrcPhys, IsDstPhys; - unsigned SrcReg, DstReg; + Register SrcReg, DstReg; if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) return; - if (IsDstPhys && !IsSrcPhys) + if (IsDstPhys && !IsSrcPhys) { DstRegMap.insert(std::make_pair(SrcReg, DstReg)); - else if (!IsDstPhys && IsSrcPhys) { + } else if (!IsDstPhys && IsSrcPhys) { bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second; if (!isNew) assert(SrcRegMap[DstReg] == SrcReg && @@ -852,10 +713,9 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { /// If there is one more local instruction that reads 'Reg' and it kills 'Reg, /// consider moving the instruction below the kill instruction in order to /// eliminate the need for the copy. -bool TwoAddressInstructionPass:: -rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg) { +bool TwoAddressInstructionPass::rescheduleMIBelowKill( + MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, + Register Reg) { // Bail immediately if we don't have LV or LIS available. We use them to find // kills efficiently. if (!LV && !LIS) @@ -892,7 +752,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Don't move pass calls, etc. return false; - unsigned DstReg; + Register DstReg; if (isTwoAddrUse(*KillMI, Reg, DstReg)) return false; @@ -904,9 +764,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // FIXME: Needs more sophisticated heuristics. return false; - SmallVector<unsigned, 2> Uses; - SmallVector<unsigned, 2> Kills; - SmallVector<unsigned, 2> Defs; + SmallVector<Register, 2> Uses; + SmallVector<Register, 2> Kills; + SmallVector<Register, 2> Defs; for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; @@ -1021,7 +881,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, /// Return true if the re-scheduling will put the given instruction too close /// to the defs of its register dependencies. -bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, +bool TwoAddressInstructionPass::isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI) { for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike()) @@ -1042,10 +902,9 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, /// If there is one more local instruction that reads 'Reg' and it kills 'Reg, /// consider moving the kill instruction above the current two-address /// instruction in order to eliminate the need for the copy. -bool TwoAddressInstructionPass:: -rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg) { +bool TwoAddressInstructionPass::rescheduleKillAboveMI( + MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, + Register Reg) { // Bail immediately if we don't have LV or LIS available. We use them to find // kills efficiently. if (!LV && !LIS) @@ -1077,7 +936,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Don't mess with copies, they may be coalesced later. return false; - unsigned DstReg; + Register DstReg; if (isTwoAddrUse(*KillMI, Reg, DstReg)) return false; @@ -1085,10 +944,10 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, if (!KillMI->isSafeToMove(AA, SeenStore)) return false; - SmallSet<unsigned, 2> Uses; - SmallSet<unsigned, 2> Kills; - SmallSet<unsigned, 2> Defs; - SmallSet<unsigned, 2> LiveDefs; + SmallVector<Register, 2> Uses; + SmallVector<Register, 2> Kills; + SmallVector<Register, 2> Defs; + SmallVector<Register, 2> LiveDefs; for (const MachineOperand &MO : KillMI->operands()) { if (!MO.isReg()) continue; @@ -1101,13 +960,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS)); if (MOReg == Reg && !isKill) return false; - Uses.insert(MOReg); + Uses.push_back(MOReg); if (isKill && MOReg != Reg) - Kills.insert(MOReg); - } else if (Register::isPhysicalRegister(MOReg)) { - Defs.insert(MOReg); + Kills.push_back(MOReg); + } else if (MOReg.isPhysical()) { + Defs.push_back(MOReg); if (!MO.isDead()) - LiveDefs.insert(MOReg); + LiveDefs.push_back(MOReg); } } @@ -1125,7 +984,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, OtherMI.isBranch() || OtherMI.isTerminator()) // Don't move pass calls, etc. return false; - SmallVector<unsigned, 2> OtherDefs; + SmallVector<Register, 2> OtherDefs; for (const MachineOperand &MO : OtherMI.operands()) { if (!MO.isReg()) continue; @@ -1133,11 +992,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, if (!MOReg) continue; if (MO.isUse()) { - if (Defs.count(MOReg)) + if (regOverlapsSet(Defs, MOReg, TRI)) // Moving KillMI can clobber the physical register if the def has // not been seen. return false; - if (Kills.count(MOReg)) + if (regOverlapsSet(Kills, MOReg, TRI)) // Don't want to extend other live ranges and update kills. return false; if (&OtherMI != MI && MOReg == Reg && @@ -1150,13 +1009,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, } for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) { - unsigned MOReg = OtherDefs[i]; - if (Uses.count(MOReg)) + Register MOReg = OtherDefs[i]; + if (regOverlapsSet(Uses, MOReg, TRI)) return false; - if (Register::isPhysicalRegister(MOReg) && LiveDefs.count(MOReg)) + if (MOReg.isPhysical() && regOverlapsSet(LiveDefs, MOReg, TRI)) return false; // Physical register def is seen. - Defs.erase(MOReg); + llvm::erase_value(Defs, MOReg); } } @@ -1274,11 +1133,10 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, Register regA = MI.getOperand(DstIdx).getReg(); Register regB = MI.getOperand(SrcIdx).getReg(); - assert(Register::isVirtualRegister(regB) && - "cannot make instruction into two-address form"); + assert(regB.isVirtual() && "cannot make instruction into two-address form"); bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); - if (Register::isVirtualRegister(regA)) + if (regA.isVirtual()) scanUses(regA); bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); @@ -1394,7 +1252,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (LV) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && MO.getReg().isVirtual()) { if (MO.isUse()) { if (MO.isKill()) { if (NewMIs[0]->killsRegister(MO.getReg())) @@ -1479,7 +1337,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { // Deal with undef uses immediately - simply rewrite the src operand. if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. - if (Register::isVirtualRegister(DstReg)) + if (DstReg.isVirtual()) if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, TRI, *MF)) MRI->constrainRegClass(DstReg, RC); @@ -1509,7 +1367,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, bool AllUsesCopied = true; unsigned LastCopiedReg = 0; SlotIndex LastCopyIdx; - unsigned RegB = 0; + Register RegB = 0; unsigned SubRegB = 0; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; @@ -1532,8 +1390,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } LastCopiedReg = RegA; - assert(Register::isVirtualRegister(RegB) && - "cannot make instruction into two-address form"); + assert(RegB.isVirtual() && "cannot make instruction into two-address form"); #ifndef NDEBUG // First, verify that we don't have a use of "a" in the instruction @@ -1553,7 +1410,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, MIB.addReg(RegB, 0, SubRegB); const TargetRegisterClass *RC = MRI->getRegClass(RegB); if (SubRegB) { - if (Register::isVirtualRegister(RegA)) { + if (RegA.isVirtual()) { assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA), SubRegB) && "tied subregister must be a truncation"); @@ -1574,7 +1431,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (LIS) { LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot(); - if (Register::isVirtualRegister(RegA)) { + if (RegA.isVirtual()) { LiveInterval &LI = LIS->getInterval(RegA); VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); SlotIndex endIdx = @@ -1594,7 +1451,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } // Make sure regA is a legal regclass for the SrcIdx operand. - if (Register::isVirtualRegister(RegA) && Register::isVirtualRegister(RegB)) + if (RegA.isVirtual() && RegB.isVirtual()) MRI->constrainRegClass(RegA, RC); MO.setReg(RegA); // The getMatchingSuper asserts guarantee that the register class projected @@ -1700,13 +1557,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); - SunkInstrs.clear(); for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); mi != me; ) { MachineBasicBlock::iterator nmi = std::next(mi); - // Don't revisit an instruction previously converted by target. It may - // contain undef register operands (%noreg), which are not handled. - if (mi->isDebugInstr() || SunkInstrs.count(&*mi)) { + // Skip debug instructions. + if (mi->isDebugInstr()) { mi = nmi; continue; } @@ -1800,7 +1655,7 @@ void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; Register DstReg = MI.getOperand(0).getReg(); - if (MI.getOperand(0).getSubReg() || Register::isPhysicalRegister(DstReg) || + if (MI.getOperand(0).getSubReg() || DstReg.isPhysical() || !(MI.getNumOperands() & 1)) { LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); llvm_unreachable(nullptr); @@ -1850,7 +1705,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { DefEmitted = true; // Update LiveVariables' kill info. - if (LV && isKill && !Register::isPhysicalRegister(SrcReg)) + if (LV && isKill && !SrcReg.isPhysical()) LV->replaceKillInstruction(SrcReg, MI, *CopyMI); LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI); diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 807babdcaf25..a42095d8718a 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -134,8 +134,9 @@ public: Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), Sources(sources), Sinks(sinks), SafeWrap(wrap) { ExtTy = IntegerType::get(Ctx, PromotedWidth); - assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() - && "Original type not smaller than extended type"); + assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() < + ExtTy->getPrimitiveSizeInBits().getFixedSize() && + "Original type not smaller than extended type"); } void Mutate(); @@ -809,7 +810,7 @@ bool TypePromotion::isLegalToPromote(Value *V) { bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { Type *OrigTy = V->getType(); - TypeSize = OrigTy->getPrimitiveSizeInBits(); + TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize(); SafeToPromote.clear(); SafeWrap.clear(); @@ -980,15 +981,14 @@ bool TypePromotion::runOnFunction(Function &F) { if (TLI->getTypeAction(ICmp->getContext(), SrcVT) != TargetLowering::TypePromoteInteger) break; - EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT); - if (RegisterBitWidth < PromotedVT.getSizeInBits()) { + if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) { LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " << "for promoted type\n"); break; } - MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits()); + MadeChange |= TryToPromote(I, PromotedVT.getFixedSizeInBits()); break; } } diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 66bcdd9b2c4a..978357d8f539 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -49,8 +49,7 @@ EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) { EVT ResultVT; - ResultVT.LLVMTy = - VectorType::get(VT.getTypeForEVT(Context), {EC.Min, EC.Scalable}); + ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), EC); assert(ResultVT.isExtended() && "Type is not extended!"); return ResultVT; } @@ -123,13 +122,13 @@ EVT EVT::getExtendedVectorElementType() const { unsigned EVT::getExtendedVectorNumElements() const { assert(isExtended() && "Type is not extended!"); ElementCount EC = cast<VectorType>(LLVMTy)->getElementCount(); - if (EC.Scalable) { + if (EC.isScalable()) { WithColor::warning() << "The code that requested the fixed number of elements has made the " "assumption that this vector is not scalable. This assumption was " "not correct, and this may lead to broken code\n"; } - return EC.Min; + return EC.getKnownMinValue(); } ElementCount EVT::getExtendedVectorElementCount() const { @@ -151,23 +150,25 @@ std::string EVT::getEVTString() const { switch (V.SimpleTy) { default: if (isVector()) - return (isScalableVector() ? "nxv" : "v") - + utostr(getVectorElementCount().Min) - + getVectorElementType().getEVTString(); + return (isScalableVector() ? "nxv" : "v") + + utostr(getVectorElementCount().getKnownMinValue()) + + getVectorElementType().getEVTString(); if (isInteger()) return "i" + utostr(getSizeInBits()); if (isFloatingPoint()) return "f" + utostr(getSizeInBits()); llvm_unreachable("Invalid EVT!"); - case MVT::bf16: return "bf16"; - case MVT::ppcf128: return "ppcf128"; - case MVT::isVoid: return "isVoid"; - case MVT::Other: return "ch"; - case MVT::Glue: return "glue"; - case MVT::x86mmx: return "x86mmx"; - case MVT::Metadata:return "Metadata"; - case MVT::Untyped: return "Untyped"; - case MVT::exnref : return "exnref"; + case MVT::bf16: return "bf16"; + case MVT::ppcf128: return "ppcf128"; + case MVT::isVoid: return "isVoid"; + case MVT::Other: return "ch"; + case MVT::Glue: return "glue"; + case MVT::x86mmx: return "x86mmx"; + case MVT::x86amx: return "x86amx"; + case MVT::Metadata: return "Metadata"; + case MVT::Untyped: return "Untyped"; + case MVT::funcref: return "funcref"; + case MVT::externref: return "externref"; } } @@ -194,6 +195,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::f128: return Type::getFP128Ty(Context); case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); + case MVT::x86amx: return Type::getX86_AMXTy(Context); case MVT::v1i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1); case MVT::v2i1: @@ -292,6 +294,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt64Ty(Context), 16); case MVT::v32i64: return FixedVectorType::get(Type::getInt64Ty(Context), 32); + case MVT::v64i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 64); + case MVT::v128i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 128); + case MVT::v256i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 256); case MVT::v1i128: return FixedVectorType::get(Type::getInt128Ty(Context), 1); case MVT::v2f16: @@ -307,9 +315,9 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v32f16: return FixedVectorType::get(Type::getHalfTy(Context), 32); case MVT::v64f16: - return FixedVectorType::get(Type::getBFloatTy(Context), 64); + return FixedVectorType::get(Type::getHalfTy(Context), 64); case MVT::v128f16: - return FixedVectorType::get(Type::getBFloatTy(Context), 128); + return FixedVectorType::get(Type::getHalfTy(Context), 128); case MVT::v2bf16: return FixedVectorType::get(Type::getBFloatTy(Context), 2); case MVT::v3bf16: @@ -366,6 +374,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getDoubleTy(Context), 16); case MVT::v32f64: return FixedVectorType::get(Type::getDoubleTy(Context), 32); + case MVT::v64f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 64); + case MVT::v128f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 128); + case MVT::v256f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 256); case MVT::nxv1i1: return ScalableVectorType::get(Type::getInt1Ty(Context), 1); case MVT::nxv2i1: @@ -488,6 +502,7 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ case Type::DoubleTyID: return MVT(MVT::f64); case Type::X86_FP80TyID: return MVT(MVT::f80); case Type::X86_MMXTyID: return MVT(MVT::x86mmx); + case Type::X86_AMXTyID: return MVT(MVT::x86amx); case Type::FP128TyID: return MVT(MVT::f128); case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); case Type::PointerTyID: return MVT(MVT::iPTR); diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 2c83f13b651b..5e0ff9d9092c 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -68,6 +68,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { Virt2PhysMap.clear(); Virt2StackSlotMap.clear(); Virt2SplitMap.clear(); + Virt2ShapeMap.clear(); grow(); return false; @@ -104,7 +105,7 @@ bool VirtRegMap::hasPreferredPhys(Register VirtReg) { return false; if (Hint.isVirtual()) Hint = getPhys(Hint); - return getPhys(VirtReg) == Hint; + return Register(getPhys(VirtReg)) == Hint; } bool VirtRegMap::hasKnownPreference(Register VirtReg) { @@ -187,7 +188,7 @@ class VirtRegRewriter : public MachineFunctionPass { void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const; void handleIdentityCopy(MachineInstr &MI) const; void expandCopyBundle(MachineInstr &MI) const; - bool subRegLiveThrough(const MachineInstr &MI, Register SuperPhysReg) const; + bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const; public: static char ID; @@ -400,18 +401,18 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { /// after processing the last in the bundle. Does not update LiveIntervals /// which we shouldn't need for this instruction anymore. void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { - if (!MI.isCopy()) + if (!MI.isCopy() && !MI.isKill()) return; if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) { SmallVector<MachineInstr *, 2> MIs({&MI}); - // Only do this when the complete bundle is made out of COPYs. + // Only do this when the complete bundle is made out of COPYs and KILLs. MachineBasicBlock &MBB = *MI.getParent(); for (MachineBasicBlock::reverse_instr_iterator I = std::next(MI.getReverseIterator()), E = MBB.instr_rend(); I != E && I->isBundledWithSucc(); ++I) { - if (!I->isCopy()) + if (!I->isCopy() && !I->isKill()) return; MIs.push_back(&*I); } @@ -452,7 +453,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { // instruction, the bundle will have been completely undone. if (BundledMI != BundleStart) { BundledMI->removeFromBundle(); - MBB.insert(FirstMI, BundledMI); + MBB.insert(BundleStart, BundledMI); } else if (BundledMI->isBundledWithSucc()) { BundledMI->unbundleFromSucc(); BundleStart = &*std::next(BundledMI->getIterator()); @@ -468,7 +469,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { /// \pre \p MI defines a subregister of a virtual register that /// has been assigned to \p SuperPhysReg. bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, - Register SuperPhysReg) const { + MCRegister SuperPhysReg) const { SlotIndex MIIndex = LIS->getInstructionIndex(MI); SlotIndex BeforeMIUses = MIIndex.getBaseIndex(); SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex(); @@ -515,7 +516,7 @@ void VirtRegRewriter::rewrite() { if (!MO.isReg() || !MO.getReg().isVirtual()) continue; Register VirtReg = MO.getReg(); - Register PhysReg = VRM->getPhys(VirtReg); + MCRegister PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Instruction uses unmapped VirtReg"); assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index 44f4fe2ff9b1..53424556682d 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -23,7 +23,7 @@ // // - After: // catchpad ... -// exn = wasm.extract.exception(); +// exn = wasm.catch(WebAssembly::CPP_EXCEPTION); // // Only add below in case it's not a single catch (...) // wasm.landingpad.index(index); // __wasm_lpad_context.lpad_index = index; @@ -112,7 +112,7 @@ class WasmEHPrepare : public FunctionPass { Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic Function *LSDAF = nullptr; // wasm.lsda() intrinsic Function *GetExnF = nullptr; // wasm.get.exception() intrinsic - Function *ExtractExnF = nullptr; // wasm.extract.exception() intrinsic + Function *CatchF = nullptr; // wasm.catch() intrinsic Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic FunctionCallee CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper @@ -124,7 +124,6 @@ class WasmEHPrepare : public FunctionPass { void setupEHPadFunctions(Function &F); void prepareEHPad(BasicBlock *BB, bool NeedPersonality, bool NeedLSDA = false, unsigned Index = 0); - void prepareTerminateCleanupPad(BasicBlock *BB); public: static char ID; // Pass identification, replacement for typeid @@ -169,7 +168,7 @@ static void eraseDeadBBsAndChildren(const Container &BBs, DomTreeUpdater *DTU) { SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end()); while (!WL.empty()) { auto *BB = WL.pop_back_val(); - if (pred_begin(BB) != pred_end(BB)) + if (!pred_empty(BB)) continue; WL.append(succ_begin(BB), succ_end(BB)); DeleteDeadBlock(BB, DTU); @@ -205,7 +204,7 @@ bool WasmEHPrepare::prepareThrows(Function &F) { continue; Changed = true; auto *BB = ThrowI->getParent(); - SmallVector<BasicBlock *, 4> Succs(succ_begin(BB), succ_end(BB)); + SmallVector<BasicBlock *, 4> Succs(successors(BB)); auto &InstList = BB->getInstList(); InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end()); IRB.SetInsertPoint(BB); @@ -328,12 +327,9 @@ void WasmEHPrepare::setupEHPadFunctions(Function &F) { GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception); GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector); - // wasm.extract.exception() is the same as wasm.get.exception() but it does - // not take a token argument. This will be lowered down to EXTRACT_EXCEPTION - // pseudo instruction in instruction selection, which will be expanded using - // 'br_on_exn' instruction later. - ExtractExnF = - Intrinsic::getDeclaration(&M, Intrinsic::wasm_extract_exception); + // wasm.catch() will be lowered down to wasm 'catch' instruction in + // instruction selection. + CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch); // _Unwind_CallPersonality() wrapper function, which calls the personality CallPersonalityF = M.getOrInsertFunction( @@ -373,8 +369,13 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, return; } - Instruction *ExtractExnCI = IRB.CreateCall(ExtractExnF, {}, "exn"); - GetExnCI->replaceAllUsesWith(ExtractExnCI); + // Replace wasm.get.exception intrinsic with wasm.catch intrinsic, which will + // be lowered to wasm 'catch' instruction. We do this mainly because + // instruction selection cannot handle wasm.get.exception intrinsic's token + // argument. + Instruction *CatchCI = + IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::CPP_EXCEPTION)}, "exn"); + GetExnCI->replaceAllUsesWith(CatchCI); GetExnCI->eraseFromParent(); // In case it is a catchpad with single catch (...) or a cleanuppad, we don't @@ -387,7 +388,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, } return; } - IRB.SetInsertPoint(ExtractExnCI->getNextNode()); + IRB.SetInsertPoint(CatchCI->getNextNode()); // This is to create a map of <landingpad EH label, landingpad index> in // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables. @@ -403,7 +404,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField); // Pseudocode: _Unwind_CallPersonality(exn); - CallInst *PersCI = IRB.CreateCall(CallPersonalityF, ExtractExnCI, + CallInst *PersCI = IRB.CreateCall(CallPersonalityF, CatchCI, OperandBundleDef("funclet", CPI)); PersCI->setDoesNotThrow(); diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp index 5a25234ba850..96d256ba57a3 100644 --- a/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -804,13 +804,9 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { << "\' to block \'" << NewBlock->getName() << "\'.\n"); - BlocksInFunclet.erase( - std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock), - BlocksInFunclet.end()); + llvm::erase_value(BlocksInFunclet, OldBlock); ColorVector &OldColors = BlockColors[OldBlock]; - OldColors.erase( - std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB), - OldColors.end()); + llvm::erase_value(OldColors, FuncletPadBB); DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << " Removed color \'" << FuncletPadBB->getName() diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp index ab9c0e81ebdc..11d1b309aa64 100644 --- a/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -145,20 +145,22 @@ void XRayInstrumentation::prependRetWithPatchableExit( bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { auto &F = MF.getFunction(); auto InstrAttr = F.getFnAttribute("function-instrument"); - bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) && - InstrAttr.isStringAttribute() && + bool AlwaysInstrument = InstrAttr.isStringAttribute() && InstrAttr.getValueAsString() == "xray-always"; + bool NeverInstrument = InstrAttr.isStringAttribute() && + InstrAttr.getValueAsString() == "xray-never"; + if (NeverInstrument && !AlwaysInstrument) + return false; auto ThresholdAttr = F.getFnAttribute("xray-instruction-threshold"); auto IgnoreLoopsAttr = F.getFnAttribute("xray-ignore-loops"); unsigned int XRayThreshold = 0; if (!AlwaysInstrument) { - if (ThresholdAttr.hasAttribute(Attribute::None) || - !ThresholdAttr.isStringAttribute()) + if (!ThresholdAttr.isStringAttribute()) return false; // XRay threshold attribute not found. if (ThresholdAttr.getValueAsString().getAsInteger(10, XRayThreshold)) return false; // Invalid value for threshold. - bool IgnoreLoops = !IgnoreLoopsAttr.hasAttribute(Attribute::None); + bool IgnoreLoops = IgnoreLoopsAttr.isValid(); // Count the number of MachineInstr`s in MachineFunction int64_t MICount = 0; |