diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-07-27 23:34:35 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-10-23 18:26:01 +0000 |
commit | 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583 (patch) | |
tree | 6cf5ab1f05330c6773b1f3f64799d56a9c7a1faa /contrib/llvm-project/llvm/lib/CodeGen | |
parent | 6b9f7133aba44189d9625c352bc2c2a59baf18ef (diff) | |
parent | ac9a064cb179f3425b310fa2847f8764ac970a4d (diff) | |
download | src-0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583.tar.gz src-0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583.zip |
Merge llvm-project main llvmorg-19-init-18630-gf2ccf80136a0
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvm-project main llvmorg-19-init-18630-gf2ccf80136a0, the
last commit before the upstream release/19.x branch was created.
PR: 280562
MFC after: 1 month
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
239 files changed, 17680 insertions, 7820 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index c5367221cae7..bccd9b04cd2c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -23,11 +23,11 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" @@ -124,13 +124,13 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ - for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { - BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]); + for (const TargetRegisterClass *RC : CriticalPathRCs) { + BitVector CPSet = TRI->getAllocatableSet(MF, RC); if (CriticalPathSet.none()) CriticalPathSet = CPSet; else CriticalPathSet |= CPSet; - } + } LLVM_DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); LLVM_DEBUG(for (unsigned r @@ -231,9 +231,9 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI, MachineOperand *Op = nullptr; if (MO.isDef()) - Op = MI.findRegisterUseOperand(Reg, true); + Op = MI.findRegisterUseOperand(Reg, /*TRI=*/nullptr, true); else - Op = MI.findRegisterDefOperand(Reg); + Op = MI.findRegisterDefOperand(Reg, /*TRI=*/nullptr); return(Op && Op->isImplicit()); } @@ -679,7 +679,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // defines 'NewReg' via an early-clobber operand. for (const auto &Q : make_range(RegRefs.equal_range(Reg))) { MachineInstr *UseMI = Q.second.Operand->getParent(); - int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI); + int Idx = UseMI->findRegisterDefOperandIdx(NewReg, TRI, false, true); if (Idx == -1) continue; @@ -846,7 +846,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( continue; } else { // No anti-dep breaking for implicit deps - MachineOperand *AntiDepOp = MI.findRegisterDefOperand(AntiDepReg); + MachineOperand *AntiDepOp = + MI.findRegisterDefOperand(AntiDepReg, /*TRI=*/nullptr); assert(AntiDepOp && "Can't find index for defined register operand"); if (!AntiDepOp || AntiDepOp->isImplicit()) { LLVM_DEBUG(dbgs() << " (implicit)\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp index 2aef1234ac0e..27a4a6cd8571 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp @@ -39,15 +39,13 @@ AllocationOrder AllocationOrder::create(unsigned VirtReg, const VirtRegMap &VRM, LLVM_DEBUG({ if (!Hints.empty()) { dbgs() << "hints:"; - for (unsigned I = 0, E = Hints.size(); I != E; ++I) - dbgs() << ' ' << printReg(Hints[I], TRI); + for (MCPhysReg Hint : Hints) + dbgs() << ' ' << printReg(Hint, TRI); dbgs() << '\n'; } }); -#ifndef NDEBUG - for (unsigned I = 0, E = Hints.size(); I != E; ++I) - assert(is_contained(Order, Hints[I]) && - "Target hint is outside allocation order."); -#endif + assert(all_of(Hints, + [&](MCPhysReg Hint) { return is_contained(Order, Hint); }) && + "Target hint is outside allocation order."); return AllocationOrder(std::move(Hints), Order, HardHints); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp index 1994e6aec84b..128060ec912c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp @@ -81,6 +81,9 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, SmallVectorImpl<EVT> *MemVTs, SmallVectorImpl<TypeSize> *Offsets, TypeSize StartingOffset) { + assert((Ty->isScalableTy() == StartingOffset.isScalable() || + StartingOffset.isZero()) && + "Offset/TypeSize mismatch!"); // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast<StructType>(Ty)) { // If the Offsets aren't needed, don't query the struct layout. This allows @@ -92,8 +95,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, EE = STy->element_end(); EI != EE; ++EI) { // Don't compute the element offset if we didn't get a StructLayout above. - TypeSize EltOffset = SL ? SL->getElementOffset(EI - EB) - : TypeSize::get(0, StartingOffset.isScalable()); + TypeSize EltOffset = + SL ? SL->getElementOffset(EI - EB) : TypeSize::getZero(); ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets, StartingOffset + EltOffset); } @@ -121,50 +124,10 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl<EVT> &ValueVTs, - SmallVectorImpl<TypeSize> *Offsets, - TypeSize StartingOffset) { - return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets, - StartingOffset); -} - -void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, - Type *Ty, SmallVectorImpl<EVT> &ValueVTs, - SmallVectorImpl<TypeSize> *Offsets, - uint64_t StartingOffset) { - TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); - return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, Offset); -} - -void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, - Type *Ty, SmallVectorImpl<EVT> &ValueVTs, - SmallVectorImpl<uint64_t> *FixedOffsets, - uint64_t StartingOffset) { - TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); - if (FixedOffsets) { - SmallVector<TypeSize, 4> Offsets; - ComputeValueVTs(TLI, DL, Ty, ValueVTs, &Offsets, Offset); - for (TypeSize Offset : Offsets) - FixedOffsets->push_back(Offset.getFixedValue()); - } else { - ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offset); - } -} - -void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, - Type *Ty, SmallVectorImpl<EVT> &ValueVTs, - SmallVectorImpl<EVT> *MemVTs, - SmallVectorImpl<TypeSize> *Offsets, - uint64_t StartingOffset) { - TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); - return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, Offset); -} - -void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, - Type *Ty, SmallVectorImpl<EVT> &ValueVTs, SmallVectorImpl<EVT> *MemVTs, SmallVectorImpl<uint64_t> *FixedOffsets, uint64_t StartingOffset) { - TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); + TypeSize Offset = TypeSize::getFixed(StartingOffset); if (FixedOffsets) { SmallVector<TypeSize, 4> Offsets; ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, &Offsets, Offset); @@ -569,7 +532,8 @@ static bool nextRealType(SmallVectorImpl<Type *> &SubTypes, /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) { +bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM, + bool ReturnsFirstArg) { const BasicBlock *ExitBB = Call.getParent(); const Instruction *Term = ExitBB->getTerminator(); const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); @@ -612,7 +576,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) { const Function *F = ExitBB->getParent(); return returnTypeIsEligibleForTailCall( - F, &Call, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); + F, &Call, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering(), + ReturnsFirstArg); } bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, @@ -630,9 +595,10 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, // Following attributes are completely benign as far as calling convention // goes, they shouldn't affect whether the call is a tail call. - for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, - Attribute::DereferenceableOrNull, Attribute::NoAlias, - Attribute::NonNull, Attribute::NoUndef}) { + for (const auto &Attr : + {Attribute::Alignment, Attribute::Dereferenceable, + Attribute::DereferenceableOrNull, Attribute::NoAlias, + Attribute::NonNull, Attribute::NoUndef, Attribute::Range}) { CallerAttrs.removeAttribute(Attr); CalleeAttrs.removeAttribute(Attr); } @@ -674,26 +640,11 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, return CallerAttrs == CalleeAttrs; } -/// Check whether B is a bitcast of a pointer type to another pointer type, -/// which is equal to A. -static bool isPointerBitcastEqualTo(const Value *A, const Value *B) { - assert(A && B && "Expected non-null inputs!"); - - auto *BitCastIn = dyn_cast<BitCastInst>(B); - - if (!BitCastIn) - return false; - - if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy()) - return false; - - return A == BitCastIn->getOperand(0); -} - bool llvm::returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, - const TargetLoweringBase &TLI) { + const TargetLoweringBase &TLI, + bool ReturnsFirstArg) { // If the block ends with a void return or unreachable, it doesn't matter // what the call's return type is. if (!Ret || Ret->getNumOperands() == 0) return true; @@ -707,26 +658,11 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes)) return false; - const Value *RetVal = Ret->getOperand(0), *CallVal = I; - // Intrinsic like llvm.memcpy has no return value, but the expanded - // libcall may or may not have return value. On most platforms, it - // will be expanded as memcpy in libc, which returns the first - // argument. On other platforms like arm-none-eabi, memcpy may be - // expanded as library call without return value, like __aeabi_memcpy. - const CallInst *Call = cast<CallInst>(I); - if (Function *F = Call->getCalledFunction()) { - Intrinsic::ID IID = F->getIntrinsicID(); - if (((IID == Intrinsic::memcpy && - TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) || - (IID == Intrinsic::memmove && - TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) || - (IID == Intrinsic::memset && - TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) && - (RetVal == Call->getArgOperand(0) || - isPointerBitcastEqualTo(RetVal, Call->getArgOperand(0)))) - return true; - } + // If the return value is the first argument of the call. + if (ReturnsFirstArg) + return true; + const Value *RetVal = Ret->getOperand(0), *CallVal = I; SmallVector<unsigned, 4> RetPath, CallPath; SmallVector<Type *, 4> RetSubTypes, CallSubTypes; @@ -766,7 +702,7 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, // index is compatible with the value we return. if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, AllowDifferingSizes, TLI, - F->getParent()->getDataLayout())) + F->getDataLayout())) return false; CallEmpty = !nextRealType(CallSubTypes, CallPath); @@ -775,6 +711,15 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, return true; } +bool llvm::funcReturnsFirstArgOfCall(const CallInst &CI) { + const ReturnInst *Ret = dyn_cast<ReturnInst>(CI.getParent()->getTerminator()); + Value *RetVal = Ret ? Ret->getReturnValue() : nullptr; + bool ReturnsFirstArg = false; + if (RetVal && ((RetVal == CI.getArgOperand(0)))) + ReturnsFirstArg = true; + return ReturnsFirstArg; +} + static void collectEHScopeMembers( DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, int EHScope, const MachineBasicBlock *MBB) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 82b5ccdc70ea..5d7c97adcaca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 1024aabf2ab0..4957f70b23f0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -32,23 +32,13 @@ using namespace llvm; void AccelTableBase::computeBucketCount() { - // First get the number of unique hashes. - std::vector<uint32_t> Uniques; + SmallVector<uint32_t, 0> Uniques; Uniques.reserve(Entries.size()); for (const auto &E : Entries) Uniques.push_back(E.second.HashValue); - array_pod_sort(Uniques.begin(), Uniques.end()); - std::vector<uint32_t>::iterator P = - std::unique(Uniques.begin(), Uniques.end()); - - UniqueHashCount = std::distance(Uniques.begin(), P); - - if (UniqueHashCount > 1024) - BucketCount = UniqueHashCount / 4; - else if (UniqueHashCount > 16) - BucketCount = UniqueHashCount / 2; - else - BucketCount = std::max<uint32_t>(UniqueHashCount, 1); + llvm::sort(Uniques); + UniqueHashCount = llvm::unique(Uniques) - Uniques.begin(); + BucketCount = dwarf::getDebugNamesBucketCount(UniqueHashCount); } void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) { @@ -59,9 +49,7 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) { [](const AccelTableData *A, const AccelTableData *B) { return *A < *B; }); - E.second.Values.erase( - std::unique(E.second.Values.begin(), E.second.Values.end()), - E.second.Values.end()); + E.second.Values.erase(llvm::unique(E.second.Values), E.second.Values.end()); } // Figure out how many buckets we need, then compute the bucket contents and @@ -208,8 +196,13 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { }; Header Header; - DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>> - Abbreviations; + /// FoldingSet that uniques the abbreviations. + FoldingSet<DebugNamesAbbrev> AbbreviationsSet; + /// Vector containing DebugNames abbreviations for iteration in order. + SmallVector<DebugNamesAbbrev *, 5> AbbreviationsVector; + /// The bump allocator to use when creating DIEAbbrev objects in the uniqued + /// storage container. + BumpPtrAllocator Alloc; ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits; ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits; llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>( @@ -234,7 +227,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { void emitEntry( const DWARF5AccelTableData &Entry, const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel, - DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const; + DenseSet<MCSymbol *> &EmittedAccelEntrySymbols); void emitData(); public: @@ -246,7 +239,10 @@ public: const DWARF5AccelTableData &)> getIndexForEntry, bool IsSplitDwarf); - + ~Dwarf5AccelTableWriter() { + for (DebugNamesAbbrev *Abbrev : AbbreviationsVector) + Abbrev->~DebugNamesAbbrev(); + } void emit(); }; } // namespace @@ -370,7 +366,8 @@ void AppleAccelTableWriter::emit() const { DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID, const bool IsTU) - : OffsetVal(&Die), DieTag(Die.getTag()), UnitID(UnitID), IsTU(IsTU) {} + : OffsetVal(&Die), DieTag(Die.getTag()), AbbrevNumber(0), IsTU(IsTU), + UnitID(UnitID) {} void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) { assert(CompUnitCount > 0 && "Index must have at least one CU."); @@ -409,51 +406,6 @@ DWARF5AccelTableData::getDefiningParentDieOffset(const DIE &Die) { return {}; } -enum IdxParentEncoding : uint8_t { - NoIndexedParent = 0, /// Parent information present but parent isn't indexed. - Ref4 = 1, /// Parent information present and parent is indexed. - NoParent = 2, /// Parent information missing. -}; - -static uint32_t constexpr NumBitsIdxParent = 2; - -uint8_t encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) { - if (!MaybeParentForm) - return NoParent; - switch (*MaybeParentForm) { - case dwarf::Form::DW_FORM_flag_present: - return NoIndexedParent; - case dwarf::Form::DW_FORM_ref4: - return Ref4; - default: - // This is not crashing on bad input: we should only reach this if the - // internal compiler logic is faulty; see getFormForIdxParent. - llvm_unreachable("Bad form for IDX_parent"); - } -} - -static uint32_t constexpr ParentBitOffset = dwarf::DW_IDX_type_hash; -static uint32_t constexpr TagBitOffset = ParentBitOffset + NumBitsIdxParent; -static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) { - return AbbrvTag >> TagBitOffset; -} - -/// Constructs a unique AbbrevTag that captures what a DIE accesses. -/// Using this tag we can emit a unique abbreviation for each DIE. -static uint32_t constructAbbreviationTag( - const unsigned Tag, - const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet, - std::optional<dwarf::Form> MaybeParentForm) { - uint32_t AbbrvTag = 0; - if (EntryRet) - AbbrvTag |= 1 << EntryRet->Encoding.Index; - AbbrvTag |= 1 << dwarf::DW_IDX_die_offset; - AbbrvTag |= 1 << dwarf::DW_IDX_parent; - AbbrvTag |= encodeIdxParent(MaybeParentForm) << ParentBitOffset; - AbbrvTag |= Tag << TagBitOffset; - return AbbrvTag; -} - static std::optional<dwarf::Form> getFormForIdxParent(const DenseSet<OffsetAndUnitID> &IndexedOffsets, std::optional<OffsetAndUnitID> ParentOffset) { @@ -467,26 +419,42 @@ getFormForIdxParent(const DenseSet<OffsetAndUnitID> &IndexedOffsets, return dwarf::Form::DW_FORM_flag_present; } +void DebugNamesAbbrev::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(DieTag); + for (const DebugNamesAbbrev::AttributeEncoding &Enc : AttrVect) { + ID.AddInteger(Enc.Index); + ID.AddInteger(Enc.Form); + } +} + void Dwarf5AccelTableWriter::populateAbbrevsMap() { for (auto &Bucket : Contents.getBuckets()) { for (auto *Hash : Bucket) { for (auto *Value : Hash->getValues<DWARF5AccelTableData *>()) { std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet = getIndexForEntry(*Value); - unsigned Tag = Value->getDieTag(); std::optional<dwarf::Form> MaybeParentForm = getFormForIdxParent( IndexedOffsets, Value->getParentDieOffsetAndUnitID()); - uint32_t AbbrvTag = - constructAbbreviationTag(Tag, EntryRet, MaybeParentForm); - if (Abbreviations.count(AbbrvTag) == 0) { - SmallVector<DWARF5AccelTableData::AttributeEncoding, 3> UA; - if (EntryRet) - UA.push_back(EntryRet->Encoding); - UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4}); - if (MaybeParentForm) - UA.push_back({dwarf::DW_IDX_parent, *MaybeParentForm}); - Abbreviations.try_emplace(AbbrvTag, UA); + DebugNamesAbbrev Abbrev(Value->getDieTag()); + if (EntryRet) + Abbrev.addAttribute(EntryRet->Encoding); + Abbrev.addAttribute({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4}); + if (MaybeParentForm) + Abbrev.addAttribute({dwarf::DW_IDX_parent, *MaybeParentForm}); + FoldingSetNodeID ID; + Abbrev.Profile(ID); + void *InsertPos; + if (DebugNamesAbbrev *Existing = + AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) { + Value->setAbbrevNumber(Existing->getNumber()); + continue; } + DebugNamesAbbrev *NewAbbrev = + new (Alloc) DebugNamesAbbrev(std::move(Abbrev)); + AbbreviationsVector.push_back(NewAbbrev); + NewAbbrev->setNumber(AbbreviationsVector.size()); + AbbreviationsSet.InsertNode(NewAbbrev, InsertPos); + Value->setAbbrevNumber(NewAbbrev->getNumber()); } } } @@ -536,14 +504,13 @@ void Dwarf5AccelTableWriter::emitStringOffsets() const { void Dwarf5AccelTableWriter::emitAbbrevs() const { Asm->OutStreamer->emitLabel(AbbrevStart); - for (const auto &Abbrev : Abbreviations) { + for (const DebugNamesAbbrev *Abbrev : AbbreviationsVector) { Asm->OutStreamer->AddComment("Abbrev code"); - uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first); - assert(Tag != 0); - Asm->emitULEB128(Abbrev.first); - Asm->OutStreamer->AddComment(dwarf::TagString(Tag)); - Asm->emitULEB128(Tag); - for (const auto &AttrEnc : Abbrev.second) { + Asm->emitULEB128(Abbrev->getNumber()); + Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev->getDieTag())); + Asm->emitULEB128(Abbrev->getDieTag()); + for (const DebugNamesAbbrev::AttributeEncoding &AttrEnc : + Abbrev->getAttributes()) { Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data()); Asm->emitULEB128(AttrEnc.Form, dwarf::FormEncodingString(AttrEnc.Form).data()); @@ -558,21 +525,15 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const { void Dwarf5AccelTableWriter::emitEntry( const DWARF5AccelTableData &Entry, const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel, - DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const { + DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) { + unsigned AbbrevIndex = Entry.getAbbrevNumber() - 1; + assert(AbbrevIndex < AbbreviationsVector.size() && + "Entry abbrev index is outside of abbreviations vector range."); + DebugNamesAbbrev *Abbrev = AbbreviationsVector[AbbrevIndex]; std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet = getIndexForEntry(Entry); std::optional<OffsetAndUnitID> MaybeParentOffset = Entry.getParentDieOffsetAndUnitID(); - std::optional<dwarf::Form> MaybeParentForm = - getFormForIdxParent(IndexedOffsets, MaybeParentOffset); - uint32_t AbbrvTag = - constructAbbreviationTag(Entry.getDieTag(), EntryRet, MaybeParentForm); - auto AbbrevIt = Abbreviations.find(AbbrvTag); - assert(AbbrevIt != Abbreviations.end() && - "Why wasn't this abbrev generated?"); - assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() && - "Invalid Tag"); - auto EntrySymbolIt = DIEOffsetToAccelEntryLabel.find(Entry.getDieOffsetAndUnitID()); assert(EntrySymbolIt != DIEOffsetToAccelEntryLabel.end()); @@ -584,9 +545,10 @@ void Dwarf5AccelTableWriter::emitEntry( if (EmittedAccelEntrySymbols.insert(EntrySymbol).second) Asm->OutStreamer->emitLabel(EntrySymbol); - Asm->emitULEB128(AbbrevIt->first, "Abbreviation code"); + Asm->emitULEB128(Entry.getAbbrevNumber(), "Abbreviation code"); - for (const auto &AttrEnc : AbbrevIt->second) { + for (const DebugNamesAbbrev::AttributeEncoding &AttrEnc : + Abbrev->getAttributes()) { Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index)); switch (AttrEnc.Index) { case dwarf::DW_IDX_compile_unit: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 0d573562de96..2297b27ffdc0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -113,7 +113,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Timer.h" +#include "llvm/Support/VCSRevision.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -151,23 +151,9 @@ static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures( "Basic Block Frequency"), clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", "Branch Probability")), - cl::desc("Enable extended information within the BBAddrMap that is " - "extracted from PGO related analysis.")); - -const char DWARFGroupName[] = "dwarf"; -const char DWARFGroupDescription[] = "DWARF Emission"; -const char DbgTimerName[] = "emit"; -const char DbgTimerDescription[] = "Debug Info Emission"; -const char EHTimerName[] = "write_exception"; -const char EHTimerDescription[] = "DWARF Exception Writer"; -const char CFGuardName[] = "Control Flow Guard"; -const char CFGuardDescription[] = "Control Flow Guard"; -const char CodeViewLineTablesGroupName[] = "linetables"; -const char CodeViewLineTablesGroupDescription[] = "CodeView Line Tables"; -const char PPTimerName[] = "emit"; -const char PPTimerDescription[] = "Pseudo Probe Emission"; -const char PPGroupName[] = "pseudo probe"; -const char PPGroupDescription[] = "Pseudo Probe Emission"; + cl::desc( + "Enable extended information within the SHT_LLVM_BB_ADDR_MAP that is " + "extracted from PGO related analysis.")); STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -442,7 +428,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineOptimizationRemarkEmitterPass>(); AU.addRequired<GCModuleInfo>(); AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); - AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); } bool AsmPrinter::doInitialization(Module &M) { @@ -476,11 +462,13 @@ bool AsmPrinter::doInitialization(Module &M) { // use the directive, where it would need the same conditionalization // anyway. const Triple &Target = TM.getTargetTriple(); - Triple TVT(M.getDarwinTargetVariantTriple()); - OutStreamer->emitVersionForTarget( - Target, M.getSDKVersion(), - M.getDarwinTargetVariantTriple().empty() ? nullptr : &TVT, - M.getDarwinTargetVariantSDKVersion()); + if (Target.isOSBinFormatMachO() && Target.isOSDarwin()) { + Triple TVT(M.getDarwinTargetVariantTriple()); + OutStreamer->emitVersionForTarget( + Target, M.getSDKVersion(), + M.getDarwinTargetVariantTriple().empty() ? nullptr : &TVT, + M.getDarwinTargetVariantSDKVersion()); + } // Allow the target to emit any magic that it wants at the start of the file. emitStartOfAsmFile(M); @@ -496,12 +484,15 @@ bool AsmPrinter::doInitialization(Module &M) { else FileName = M.getSourceFileName(); if (MAI->hasFourStringsDotFile()) { -#ifdef PACKAGE_VENDOR const char VerStr[] = - PACKAGE_VENDOR " " PACKAGE_NAME " version " PACKAGE_VERSION; -#else - const char VerStr[] = PACKAGE_NAME " version " PACKAGE_VERSION; +#ifdef PACKAGE_VENDOR + PACKAGE_VENDOR " " +#endif + PACKAGE_NAME " version " PACKAGE_VERSION +#ifdef LLVM_REVISION + " (" LLVM_REVISION ")" #endif + ; // TODO: Add timestamp and description. OutStreamer->emitFileDirective(FileName, VerStr, "", ""); } else { @@ -537,36 +528,29 @@ bool AsmPrinter::doInitialization(Module &M) { if (!M.getModuleInlineAsm().empty()) { OutStreamer->AddComment("Start of file scope inline assembly"); OutStreamer->addBlankLine(); - emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(), - TM.Options.MCOptions); + emitInlineAsm( + M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(), + TM.Options.MCOptions, nullptr, + InlineAsm::AsmDialect(TM.getMCAsmInfo()->getAssemblerDialect())); OutStreamer->AddComment("End of file scope inline assembly"); OutStreamer->addBlankLine(); } if (MAI->doesSupportDebugInformation()) { bool EmitCodeView = M.getCodeViewFlag(); - if (EmitCodeView && TM.getTargetTriple().isOSWindows()) { - Handlers.emplace_back(std::make_unique<CodeViewDebug>(this), - DbgTimerName, DbgTimerDescription, - CodeViewLineTablesGroupName, - CodeViewLineTablesGroupDescription); - } + if (EmitCodeView && TM.getTargetTriple().isOSWindows()) + DebugHandlers.push_back(std::make_unique<CodeViewDebug>(this)); if (!EmitCodeView || M.getDwarfVersion()) { assert(MMI && "MMI could not be nullptr here!"); if (MMI->hasDebugInfo()) { DD = new DwarfDebug(this); - Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName, - DbgTimerDescription, DWARFGroupName, - DWARFGroupDescription); + DebugHandlers.push_back(std::unique_ptr<DwarfDebug>(DD)); } } } - if (M.getNamedMetadata(PseudoProbeDescMetadataName)) { - PP = new PseudoProbeHandler(this); - Handlers.emplace_back(std::unique_ptr<PseudoProbeHandler>(PP), PPTimerName, - PPTimerDescription, PPGroupName, PPGroupDescription); - } + if (M.getNamedMetadata(PseudoProbeDescMetadataName)) + PP = std::make_unique<PseudoProbeHandler>(this); switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: @@ -623,21 +607,16 @@ bool AsmPrinter::doInitialization(Module &M) { break; } if (ES) - Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName, - EHTimerDescription, DWARFGroupName, - DWARFGroupDescription); + Handlers.push_back(std::unique_ptr<EHStreamer>(ES)); // Emit tables for any value of cfguard flag (i.e. cfguard=1 or cfguard=2). if (mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("cfguard"))) - Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName, - CFGuardDescription, DWARFGroupName, - DWARFGroupDescription); + Handlers.push_back(std::make_unique<WinCFGuard>(this)); - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, - HI.TimerGroupDescription, TimePassesIsEnabled); - HI.Handler->beginModule(&M); - } + for (auto &Handler : DebugHandlers) + Handler->beginModule(&M); + for (auto &Handler : Handlers) + Handler->beginModule(&M); return false; } @@ -776,7 +755,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout &DL = GV->getParent()->getDataLayout(); + const DataLayout &DL = GV->getDataLayout(); uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); // If the alignment is specified, we *must* obey it. Overaligning a global @@ -784,12 +763,8 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { // sections and expected to be contiguous (e.g. ObjC metadata). const Align Alignment = getGVAlignment(GV, DL); - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, - HI.TimerGroupName, HI.TimerGroupDescription, - TimePassesIsEnabled); - HI.Handler->setSymbolSize(GVSym, Size); - } + for (auto &Handler : DebugHandlers) + Handler->setSymbolSize(GVSym, Size); // Handle common symbols if (GVKind.isCommon()) { @@ -864,7 +839,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { emitAlignment(Alignment, GV); OutStreamer->emitLabel(MangSym); - emitGlobalConstant(GV->getParent()->getDataLayout(), + emitGlobalConstant(GV->getDataLayout(), GV->getInitializer()); } @@ -904,7 +879,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (LocalAlias != EmittedInitSym) OutStreamer->emitLabel(LocalAlias); - emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); + emitGlobalConstant(GV->getDataLayout(), GV->getInitializer()); if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 @@ -924,6 +899,27 @@ void AsmPrinter::emitDebugValue(const MCExpr *Value, unsigned Size) const { void AsmPrinter::emitFunctionHeaderComment() {} +void AsmPrinter::emitFunctionPrefix(ArrayRef<const Constant *> Prefix) { + const Function &F = MF->getFunction(); + if (!MAI->hasSubsectionsViaSymbols()) { + for (auto &C : Prefix) + emitGlobalConstant(F.getDataLayout(), C); + return; + } + // Preserving prefix-like data on platforms which use subsections-via-symbols + // is a bit tricky. Here we introduce a symbol for the prefix-like data + // and use the .alt_entry attribute to mark the function's real entry point + // as an alternative entry point to the symbol that precedes the function.. + OutStreamer->emitLabel(OutContext.createLinkerPrivateTempSymbol()); + + for (auto &C : Prefix) { + emitGlobalConstant(F.getDataLayout(), C); + } + + // Emit an .alt_entry directive for the actual function symbol. + OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_AltEntry); +} + /// EmitFunctionHeader - This method emits the header for the current /// function. void AsmPrinter::emitFunctionHeader() { @@ -963,23 +959,8 @@ void AsmPrinter::emitFunctionHeader() { OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold); // Emit the prefix data. - if (F.hasPrefixData()) { - if (MAI->hasSubsectionsViaSymbols()) { - // Preserving prefix data on platforms which use subsections-via-symbols - // is a bit tricky. Here we introduce a symbol for the prefix data - // and use the .alt_entry attribute to mark the function's real entry point - // as an alternative entry point to the prefix-data symbol. - MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol(); - OutStreamer->emitLabel(PrefixSym); - - emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); - - // Emit an .alt_entry directive for the actual function symbol. - OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_AltEntry); - } else { - emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); - } - } + if (F.hasPrefixData()) + emitFunctionPrefix({F.getPrefixData()}); // Emit KCFI type information before patchable-function-prefix nops. emitKCFITypeId(*MF); @@ -1011,8 +992,7 @@ void AsmPrinter::emitFunctionHeader() { auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0)); auto *TypeHash = mdconst::extract<Constant>(MD->getOperand(1)); - emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig); - emitGlobalConstant(F.getParent()->getDataLayout(), TypeHash); + emitFunctionPrefix({PrologueSig, TypeHash}); } if (isVerbose()) { @@ -1055,20 +1035,18 @@ void AsmPrinter::emitFunctionHeader() { } // Emit pre-function debug and/or EH information. - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, - HI.TimerGroupDescription, TimePassesIsEnabled); - HI.Handler->beginFunction(MF); - } - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, - HI.TimerGroupDescription, TimePassesIsEnabled); - HI.Handler->beginBasicBlockSection(MF->front()); + for (auto &Handler : DebugHandlers) { + Handler->beginFunction(MF); + Handler->beginBasicBlockSection(MF->front()); } + for (auto &Handler : Handlers) + Handler->beginFunction(MF); + for (auto &Handler : Handlers) + Handler->beginBasicBlockSection(MF->front()); // Emit the prologue data. if (F.hasPrologueData()) - emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData()); + emitGlobalConstant(F.getDataLayout(), F.getPrologueData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -1105,25 +1083,21 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { // We assume a single instruction only has a spill or reload, not // both. - std::optional<unsigned> Size; + std::optional<LocationSize> Size; if ((Size = MI.getRestoreSize(TII))) { - CommentOS << *Size << "-byte Reload\n"; + CommentOS << Size->getValue() << "-byte Reload\n"; } else if ((Size = MI.getFoldedRestoreSize(TII))) { - if (*Size) { - if (*Size == unsigned(MemoryLocation::UnknownSize)) - CommentOS << "Unknown-size Folded Reload\n"; - else - CommentOS << *Size << "-byte Folded Reload\n"; - } + if (!Size->hasValue()) + CommentOS << "Unknown-size Folded Reload\n"; + else if (Size->getValue()) + CommentOS << Size->getValue() << "-byte Folded Reload\n"; } else if ((Size = MI.getSpillSize(TII))) { - CommentOS << *Size << "-byte Spill\n"; + CommentOS << Size->getValue() << "-byte Spill\n"; } else if ((Size = MI.getFoldedSpillSize(TII))) { - if (*Size) { - if (*Size == unsigned(MemoryLocation::UnknownSize)) - CommentOS << "Unknown-size Folded Spill\n"; - else - CommentOS << *Size << "-byte Folded Spill\n"; - } + if (!Size->hasValue()) + CommentOS << "Unknown-size Folded Spill\n"; + else if (Size->getValue()) + CommentOS << Size->getValue() << "-byte Folded Spill\n"; } // Check for spill-induced copies @@ -1154,7 +1128,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { OS << ' ' << (Op.isDef() ? "def " : "killed ") << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo()); } - AP.OutStreamer->AddComment(OS.str()); + AP.OutStreamer->AddComment(Str); AP.OutStreamer->addBlankLine(); } @@ -1261,7 +1235,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer->emitRawComment(OS.str()); + AP.OutStreamer->emitRawComment(Str); return true; } @@ -1371,6 +1345,14 @@ static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) { .encode(); } +static llvm::object::BBAddrMap::Features +getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) { + return {PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::FuncEntryCount), + PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq), + PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BrProb), + MF.hasBBSections() && NumMBBSectionRanges > 1}; +} + void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { MCSection *BBAddrMapSection = getObjFileLowering().getBBAddrMapSection(*MF.getSection()); @@ -1384,17 +1366,47 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion(); OutStreamer->emitInt8(BBAddrMapVersion); OutStreamer->AddComment("feature"); - auto FeaturesBits = static_cast<uint8_t>(PgoAnalysisMapFeatures.getBits()); - OutStreamer->emitInt8(FeaturesBits); - OutStreamer->AddComment("function address"); - OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); - OutStreamer->AddComment("number of basic blocks"); - OutStreamer->emitULEB128IntValue(MF.size()); - const MCSymbol *PrevMBBEndSymbol = FunctionSymbol; + auto Features = getBBAddrMapFeature(MF, MBBSectionRanges.size()); + OutStreamer->emitInt8(Features.encode()); // Emit BB Information for each basic block in the function. + if (Features.MultiBBRange) { + OutStreamer->AddComment("number of basic block ranges"); + OutStreamer->emitULEB128IntValue(MBBSectionRanges.size()); + } + // Number of blocks in each MBB section. + MapVector<MBBSectionID, unsigned> MBBSectionNumBlocks; + const MCSymbol *PrevMBBEndSymbol = nullptr; + if (!Features.MultiBBRange) { + OutStreamer->AddComment("function address"); + OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); + OutStreamer->AddComment("number of basic blocks"); + OutStreamer->emitULEB128IntValue(MF.size()); + PrevMBBEndSymbol = FunctionSymbol; + } else { + unsigned BBCount = 0; + for (const MachineBasicBlock &MBB : MF) { + BBCount++; + if (MBB.isEndSection()) { + // Store each section's basic block count when it ends. + MBBSectionNumBlocks[MBB.getSectionID()] = BBCount; + // Reset the count for the next section. + BBCount = 0; + } + } + } + // Emit the BB entry for each basic block in the function. for (const MachineBasicBlock &MBB : MF) { const MCSymbol *MBBSymbol = MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol(); + bool IsBeginSection = + Features.MultiBBRange && (MBB.isBeginSection() || MBB.isEntryBlock()); + if (IsBeginSection) { + OutStreamer->AddComment("base address"); + OutStreamer->emitSymbolValue(MBBSymbol, getPointerSize()); + OutStreamer->AddComment("number of basic blocks"); + OutStreamer->emitULEB128IntValue(MBBSectionNumBlocks[MBB.getSectionID()]); + PrevMBBEndSymbol = MBBSymbol; + } // TODO: Remove this check when version 1 is deprecated. if (BBAddrMapVersion > 1) { OutStreamer->AddComment("BB id"); @@ -1416,35 +1428,33 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { PrevMBBEndSymbol = MBB.getEndSymbol(); } - if (FeaturesBits != 0) { + if (Features.hasPGOAnalysis()) { assert(BBAddrMapVersion >= 2 && "PGOAnalysisMap only supports version 2 or later"); - auto FeatEnable = - cantFail(object::PGOAnalysisMap::Features::decode(FeaturesBits)); - - if (FeatEnable.FuncEntryCount) { + if (Features.FuncEntryCount) { OutStreamer->AddComment("function entry count"); auto MaybeEntryCount = MF.getFunction().getEntryCount(); OutStreamer->emitULEB128IntValue( MaybeEntryCount ? MaybeEntryCount->getCount() : 0); } const MachineBlockFrequencyInfo *MBFI = - FeatEnable.BBFreq + Features.BBFreq ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() : nullptr; const MachineBranchProbabilityInfo *MBPI = - FeatEnable.BrProb ? &getAnalysis<MachineBranchProbabilityInfo>() - : nullptr; + Features.BrProb + ? &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI() + : nullptr; - if (FeatEnable.BBFreq || FeatEnable.BrProb) { + if (Features.BBFreq || Features.BrProb) { for (const MachineBasicBlock &MBB : MF) { - if (FeatEnable.BBFreq) { + if (Features.BBFreq) { OutStreamer->AddComment("basic block frequency"); OutStreamer->emitULEB128IntValue( MBFI->getBlockFreq(&MBB).getFrequency()); } - if (FeatEnable.BrProb) { + if (Features.BrProb) { unsigned SuccCount = MBB.succ_size(); OutStreamer->AddComment("basic block successor count"); OutStreamer->emitULEB128IntValue(SuccCount); @@ -1483,7 +1493,7 @@ void AsmPrinter::emitKCFITrapEntry(const MachineFunction &MF, void AsmPrinter::emitKCFITypeId(const MachineFunction &MF) { const Function &F = MF.getFunction(); if (const MDNode *MD = F.getMetadata(LLVMContext::MD_kcfi_type)) - emitGlobalConstant(F.getParent()->getDataLayout(), + emitGlobalConstant(F.getDataLayout(), mdconst::extract<ConstantInt>(MD->getOperand(0))); } @@ -1633,7 +1643,7 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) { for (const MDOperand &AuxMDO : AuxMDs->operands()) { assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant"); const Constant *C = cast<ConstantAsMetadata>(AuxMDO)->getValue(); - const DataLayout &DL = F.getParent()->getDataLayout(); + const DataLayout &DL = F.getDataLayout(); const uint64_t Size = DL.getTypeStoreSize(C->getType()); if (auto *CI = dyn_cast<ConstantInt>(C); @@ -1659,8 +1669,8 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) { } /// Returns true if function begin and end labels should be emitted. -static bool needFuncLabels(const MachineFunction &MF) { - MachineModuleInfo &MMI = MF.getMMI(); +static bool needFuncLabels(const MachineFunction &MF, + const MachineModuleInfo &MMI) { if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI.hasDebugInfo() || MF.getFunction().hasMetadata(LLVMContext::MD_pcsections)) @@ -1684,7 +1694,8 @@ void AsmPrinter::emitFunctionBody() { if (isVerbose()) { // Get MachineDominatorTree or compute it on the fly if it's unavailable - MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); + MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; if (!MDT) { OwnedMDT = std::make_unique<MachineDominatorTree>(); OwnedMDT->getBase().recalculate(*MF); @@ -1692,10 +1703,11 @@ void AsmPrinter::emitFunctionBody() { } // Get MachineLoopInfo or compute it on the fly if it's unavailable - MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>(); + MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr; if (!MLI) { OwnedMLI = std::make_unique<MachineLoopInfo>(); - OwnedMLI->getBase().analyze(MDT->getBase()); + OwnedMLI->analyze(MDT->getBase()); MLI = OwnedMLI.get(); } } @@ -1725,11 +1737,8 @@ void AsmPrinter::emitFunctionBody() { if (MDNode *MD = MI.getPCSections()) emitPCSectionsLabel(*MF, *MD); - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, - HI.TimerGroupDescription, TimePassesIsEnabled); - HI.Handler->beginInstruction(&MI); - } + for (auto &Handler : DebugHandlers) + Handler->beginInstruction(&MI); if (isVerbose()) emitComments(MI, OutStreamer->getCommentOS()); @@ -1823,17 +1832,14 @@ void AsmPrinter::emitFunctionBody() { if (MCSymbol *S = MI.getPostInstrSymbol()) OutStreamer->emitLabel(S); - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, - HI.TimerGroupDescription, TimePassesIsEnabled); - HI.Handler->endInstruction(); - } + for (auto &Handler : DebugHandlers) + Handler->endInstruction(); } // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a // section. - if (MF->hasBBLabels() || + if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection())) OutStreamer->emitLabel(MBB.getEndSymbol()); @@ -1849,7 +1855,9 @@ void AsmPrinter::emitFunctionBody() { OutContext); OutStreamer->emitELFSize(CurrentSectionBeginSym, SizeExp); } - MBBSectionRanges[MBB.getSectionIDNum()] = + assert(!MBBSectionRanges.contains(MBB.getSectionID()) && + "Overwrite section range"); + MBBSectionRanges[MBB.getSectionID()] = MBBSectionRange{CurrentSectionBeginSym, MBB.getEndSymbol()}; } } @@ -1936,7 +1944,7 @@ void AsmPrinter::emitFunctionBody() { // are automatically sized. bool EmitFunctionSize = MAI->hasDotTypeDotSizeDirective() && !TT.isWasm(); - if (needFuncLabels(*MF) || EmitFunctionSize) { + if (needFuncLabels(*MF, *MMI) || EmitFunctionSize) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); OutStreamer->emitLabel(CurrentFnEnd); @@ -1958,35 +1966,32 @@ void AsmPrinter::emitFunctionBody() { // Call endBasicBlockSection on the last block now, if it wasn't already // called. if (!MF->back().isEndSection()) { - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, - HI.TimerGroupDescription, TimePassesIsEnabled); - HI.Handler->endBasicBlockSection(MF->back()); - } - } - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, - HI.TimerGroupDescription, TimePassesIsEnabled); - HI.Handler->markFunctionEnd(); + for (auto &Handler : DebugHandlers) + Handler->endBasicBlockSection(MF->back()); + for (auto &Handler : Handlers) + Handler->endBasicBlockSection(MF->back()); } + for (auto &Handler : Handlers) + Handler->markFunctionEnd(); - MBBSectionRanges[MF->front().getSectionIDNum()] = + assert(!MBBSectionRanges.contains(MF->front().getSectionID()) && + "Overwrite section range"); + MBBSectionRanges[MF->front().getSectionID()] = MBBSectionRange{CurrentFnBegin, CurrentFnEnd}; // Print out jump tables referenced by the function. emitJumpTableInfo(); // Emit post-function debug and/or EH information. - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, - HI.TimerGroupDescription, TimePassesIsEnabled); - HI.Handler->endFunction(MF); - } + for (auto &Handler : DebugHandlers) + Handler->endFunction(MF); + for (auto &Handler : Handlers) + Handler->endFunction(MF); // Emit section containing BB address offsets and their metadata, when // BB labels are requested for this function. Skip empty functions. if (HasAnyRealCode) { - if (MF->hasBBLabels()) + if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap) emitBBAddrMapSection(*MF); else if (PgoAnalysisMapFeatures.getBits() != 0) MF->getContext().reportWarning( @@ -2090,7 +2095,7 @@ void AsmPrinter::emitGlobalGOTEquivs() { emitGlobalVariable(GV); } -void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { +void AsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) { MCSymbol *Name = getSymbol(&GA); bool IsFunction = GA.getValueType()->isFunctionTy(); // Treat bitcasts of functions as functions also. This is important at least @@ -2273,7 +2278,7 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) { OutContext.getObjectFileInfo()->getRemarksSection(); OutStreamer->switchSection(RemarksSection); - OutStreamer->emitBinaryData(OS.str()); + OutStreamer->emitBinaryData(Buf); } bool AsmPrinter::doFinalization(Module &M) { @@ -2324,8 +2329,10 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit linkage for the function entry point. emitLinkage(&F, FnEntryPointSym); - // Emit linkage for the function descriptor. - emitLinkage(&F, Name); + // If a function's address is taken, which means it may be called via a + // function pointer, we need the function descriptor for it. + if (F.hasAddressTaken()) + emitLinkage(&F, Name); } // Emit the remarks section contents. @@ -2370,8 +2377,7 @@ bool AsmPrinter::doFinalization(Module &M) { SectionName, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_LNK_COMDAT, - SectionKind::getReadOnly(), Stub.first->getName(), - COFF::IMAGE_COMDAT_SELECT_ANY)); + Stub.first->getName(), COFF::IMAGE_COMDAT_SELECT_ANY)); emitAlignment(Align(DL.getPointerSize())); OutStreamer->emitSymbolAttribute(Stub.first, MCSA_Global); OutStreamer->emitLabel(Stub.first); @@ -2419,16 +2425,17 @@ bool AsmPrinter::doFinalization(Module &M) { emitGlobalIFunc(M, IFunc); // Finalize debug and EH information. - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, - HI.TimerGroupDescription, TimePassesIsEnabled); - HI.Handler->endModule(); - } + for (auto &Handler : DebugHandlers) + Handler->endModule(); + for (auto &Handler : Handlers) + Handler->endModule(); // This deletes all the ephemeral handlers that AsmPrinter added, while // keeping all the user-added handlers alive until the AsmPrinter is // destroyed. Handlers.erase(Handlers.begin() + NumUserHandlers, Handlers.end()); + DebugHandlers.erase(DebugHandlers.begin() + NumUserDebugHandlers, + DebugHandlers.end()); DD = nullptr; // If the target wants to know about weak references, print them all. @@ -2535,7 +2542,7 @@ bool AsmPrinter::doFinalization(Module &M) { } MCSymbol *AsmPrinter::getMBBExceptionSym(const MachineBasicBlock &MBB) { - auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionIDNum()); + auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionID()); if (Res.second) Res.first->second = createTempSymbol("exception"); return Res.first->second; @@ -2581,8 +2588,9 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { if (F.hasFnAttribute("patchable-function-entry") || F.hasFnAttribute("function-instrument") || F.hasFnAttribute("xray-instruction-threshold") || - needFuncLabels(MF) || NeedsLocalForSize || - MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) { + needFuncLabels(MF, *MMI) || NeedsLocalForSize || + MF.getTarget().Options.EmitStackSizeSection || + MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -2753,6 +2761,8 @@ void AsmPrinter::emitJumpTableInfo() { MCSymbol* JTISymbol = GetJTISymbol(JTI); OutStreamer->emitLabel(JTISymbol); + // Defer MCAssembler based constant folding due to a performance issue. The + // label differences will be evaluated at write time. for (const MachineBasicBlock *MBB : JTBBs) emitJumpTableEntry(MJTI, MBB, JTI); } @@ -2845,13 +2855,13 @@ bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) { // For ARM64EC, print the table that maps between symbols and the // corresponding thunks to translate between x64 and AArch64 code. // This table is generated by AArch64Arm64ECCallLowering. - OutStreamer->switchSection(OutContext.getCOFFSection( - ".hybmp$x", COFF::IMAGE_SCN_LNK_INFO, SectionKind::getMetadata())); + OutStreamer->switchSection( + OutContext.getCOFFSection(".hybmp$x", COFF::IMAGE_SCN_LNK_INFO)); auto *Arr = cast<ConstantArray>(GV->getInitializer()); for (auto &U : Arr->operands()) { auto *C = cast<Constant>(U); - auto *Src = cast<Function>(C->getOperand(0)->stripPointerCasts()); - auto *Dst = cast<Function>(C->getOperand(1)->stripPointerCasts()); + auto *Src = cast<GlobalValue>(C->getOperand(0)->stripPointerCasts()); + auto *Dst = cast<GlobalValue>(C->getOperand(1)->stripPointerCasts()); int Kind = cast<ConstantInt>(C->getOperand(2))->getZExtValue(); if (Src->hasDLLImportStorageClass()) { @@ -2879,20 +2889,20 @@ bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) { assert(GV->hasInitializer() && "Not a special LLVM global!"); if (GV->getName() == "llvm.global_ctors") { - emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + emitXXStructorList(GV->getDataLayout(), GV->getInitializer(), /* isCtor */ true); return true; } if (GV->getName() == "llvm.global_dtors") { - emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + emitXXStructorList(GV->getDataLayout(), GV->getInitializer(), /* isCtor */ false); return true; } - report_fatal_error("unknown special variable"); + report_fatal_error("unknown special variable with appending linkage"); } /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each @@ -2987,8 +2997,7 @@ void AsmPrinter::emitModuleIdents(Module &M) { return; if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); + for (const MDNode *N : NMD->operands()) { assert(N->getNumOperands() == 1 && "llvm.ident metadata entry can have only one operand"); const MDString *S = cast<MDString>(N->getOperand(0)); @@ -3009,8 +3018,7 @@ void AsmPrinter::emitModuleCommandLines(Module &M) { OutStreamer->pushSection(); OutStreamer->switchSection(CommandLine); OutStreamer->emitZeros(1); - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); + for (const MDNode *N : NMD->operands()) { assert(N->getNumOperands() == 1 && "llvm.commandline metadata entry can have only one operand"); const MDString *S = cast<MDString>(N->getOperand(0)); @@ -3100,12 +3108,12 @@ void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV, unsigned MaxBytesToEmit) const { if (GV) - Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment); + Alignment = getGVAlignment(GV, GV->getDataLayout(), Alignment); if (Alignment == Align(1)) return; // 1-byte aligned: no need to emit alignment. - if (getCurrentSection()->getKind().isText()) { + if (getCurrentSection()->isText()) { const MCSubtargetInfo *STI = nullptr; if (this->MF) STI = &getSubtargetInfo(); @@ -3129,11 +3137,14 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) return MCConstantExpr::create(CI->getZExtValue(), Ctx); + if (const ConstantPtrAuth *CPA = dyn_cast<ConstantPtrAuth>(CV)) + return lowerConstantPtrAuth(*CPA); + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) return MCSymbolRefExpr::create(getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) - return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx); + return lowerBlockAddressConstant(*BA); if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV)) return getObjFileLowering().lowerDSOLocalEquivalent(Equiv, TM); @@ -3276,7 +3287,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/false, !MF ? nullptr : MF->getFunction().getParent()); - report_fatal_error(Twine(OS.str())); + report_fatal_error(Twine(S)); } static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, @@ -3457,7 +3468,7 @@ static void emitGlobalConstantStruct(const DataLayout &DL, const Constant *BaseCV, uint64_t Offset, AsmPrinter::AliasMapTy *AliasList) { // Print the fields in successive locations. Pad to align if needed! - unsigned Size = DL.getTypeAllocSize(CS->getType()); + uint64_t Size = DL.getTypeAllocSize(CS->getType()); const StructLayout *Layout = DL.getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; for (unsigned I = 0, E = CS->getNumOperands(); I != E; ++I) { @@ -3815,6 +3826,10 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol(BB); } +const MCExpr *AsmPrinter::lowerBlockAddressConstant(const BlockAddress &BA) { + return MCSymbolRefExpr::create(GetBlockAddressSymbol(&BA), OutContext); +} + /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { if (getSubtargetInfo().getTargetTriple().isWindowsMSVCEnvironment()) { @@ -3938,9 +3953,9 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // End the previous funclet and start a new one. if (MBB.isEHFuncletEntry()) { - for (const HandlerInfo &HI : Handlers) { - HI.Handler->endFunclet(); - HI.Handler->beginFunclet(MBB); + for (auto &Handler : Handlers) { + Handler->endFunclet(); + Handler->beginFunclet(MBB); } } @@ -4010,17 +4025,23 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // With BB sections, each basic block must handle CFI information on its own // if it begins a section (Entry block call is handled separately, next to // beginFunction). - if (MBB.isBeginSection() && !MBB.isEntryBlock()) - for (const HandlerInfo &HI : Handlers) - HI.Handler->beginBasicBlockSection(MBB); + if (MBB.isBeginSection() && !MBB.isEntryBlock()) { + for (auto &Handler : DebugHandlers) + Handler->beginBasicBlockSection(MBB); + for (auto &Handler : Handlers) + Handler->beginBasicBlockSection(MBB); + } } void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { // Check if CFI information needs to be updated for this MBB with basic block // sections. - if (MBB.isEndSection()) - for (const HandlerInfo &HI : Handlers) - HI.Handler->endBasicBlockSection(MBB); + if (MBB.isEndSection()) { + for (auto &Handler : DebugHandlers) + Handler->endBasicBlockSection(MBB); + for (auto &Handler : Handlers) + Handler->endBasicBlockSection(MBB); + } } void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility, @@ -4049,7 +4070,9 @@ bool AsmPrinter::shouldEmitLabelForBasicBlock( // With `-fbasic-block-sections=`, a label is needed for every non-entry block // in the labels mode (option `=labels`) and every section beginning in the // sections mode (`=all` and `=list=`). - if ((MF->hasBBLabels() || MBB.isBeginSection()) && !MBB.isEntryBlock()) + if ((MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || + MBB.isBeginSection()) && + !MBB.isEntryBlock()) return true; // A label is needed for any block with at least one predecessor (when that // predecessor is not the fallthrough predecessor, or if it is an EH funclet @@ -4145,6 +4168,17 @@ void AsmPrinter::emitStackMaps() { SM.serializeToStackMapSection(); } +void AsmPrinter::addAsmPrinterHandler( + std::unique_ptr<AsmPrinterHandler> Handler) { + Handlers.insert(Handlers.begin(), std::move(Handler)); + NumUserHandlers++; +} + +void AsmPrinter::addDebugHandler(std::unique_ptr<DebugHandlerBase> Handler) { + DebugHandlers.insert(DebugHandlers.begin(), std::move(Handler)); + NumUserDebugHandlers++; +} + /// Pin vtable to this file. AsmPrinterHandler::~AsmPrinterHandler() = default; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index d0ef3e5a1939..6fe8d0e0af99 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -102,9 +102,6 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, std::unique_ptr<MCAsmParser> Parser( createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); - // Do not use assembler-level information for parsing inline assembly. - OutStreamer->setUseAssemblerInfoForParsing(false); - // We create a new MCInstrInfo here since we might be at the module level // and not have a MachineFunction to initialize the TargetInstrInfo from and // we only need MCInstrInfo for asm parsing. We create one unconditionally @@ -116,12 +113,16 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); - Parser->setAssemblerDialect(Dialect); + + // Respect inlineasm dialect on X86 targets only + if (TM.getTargetTriple().isX86()) { + Parser->setAssemblerDialect(Dialect); + // Enable lexing Masm binary and hex integer literals in intel inline + // assembly. + if (Dialect == InlineAsm::AD_Intel) + Parser->getLexer().setLexMasmIntegers(true); + } Parser->setTargetParser(*TAP); - // Enable lexing Masm binary and hex integer literals in intel inline - // assembly. - if (Dialect == InlineAsm::AD_Intel) - Parser->getLexer().setLexMasmIntegers(true); emitInlineAsmStart(); // Don't implicitly switch to the text section before the asm. @@ -314,7 +315,7 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI, std::string msg; raw_string_ostream Msg(msg); Msg << "invalid operand in inline asm: '" << AsmStr << "'"; - MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + MMI->getModule()->getContext().emitError(LocCookie, msg); } } break; @@ -414,7 +415,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { } } - emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD, + emitInlineAsm(StringData, getSubtargetInfo(), TM.Options.MCOptions, LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 4c03bf79d04d..7a138a0332b6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -220,7 +220,7 @@ private: // DIGlobalVariableExpression referencing the DIGlobalVariable. DenseMap<const DIGlobalVariable *, uint64_t> CVGlobalVariableOffsets; - // Map used to seperate variables according to the lexical scope they belong + // Map used to separate variables according to the lexical scope they belong // in. This is populated by recordLocalVariable() before // collectLexicalBlocks() separates the variables between the FunctionInfo // and LexicalBlocks. @@ -517,8 +517,6 @@ public: void beginModule(Module *M) override; - void setSymbolSize(const MCSymbol *, uint64_t) override {} - /// Emit the COFF section that holds the line table information. void endModule() override; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 619155cafe92..4bbf66206bfb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -53,8 +53,8 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(unsigned(Children)); // For each attribute description. - for (unsigned i = 0, N = Data.size(); i < N; ++i) - Data[i].Profile(ID); + for (const DIEAbbrevData &D : Data) + D.Profile(ID); } /// Emit - Print the abbreviation using the specified asm printer. @@ -67,9 +67,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { AP->emitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data()); // For each attribute description. - for (unsigned i = 0, N = Data.size(); i < N; ++i) { - const DIEAbbrevData &AttrData = Data[i]; - + for (const DIEAbbrevData &AttrData : Data) { // Emit attribute type. AP->emitULEB128(AttrData.getAttribute(), dwarf::AttributeString(AttrData.getAttribute()).data()); @@ -109,14 +107,12 @@ void DIEAbbrev::print(raw_ostream &O) const { << dwarf::ChildrenString(Children) << '\n'; - for (unsigned i = 0, N = Data.size(); i < N; ++i) { - O << " " - << dwarf::AttributeString(Data[i].getAttribute()) - << " " - << dwarf::FormEncodingString(Data[i].getForm()); + for (const DIEAbbrevData &D : Data) { + O << " " << dwarf::AttributeString(D.getAttribute()) << " " + << dwarf::FormEncodingString(D.getForm()); - if (Data[i].getForm() == dwarf::DW_FORM_implicit_const) - O << " " << Data[i].getValue(); + if (D.getForm() == dwarf::DW_FORM_implicit_const) + O << " " << D.getValue(); O << '\n'; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index eb2d992c7e75..6c70c47de882 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" @@ -99,6 +100,8 @@ DbgVariableLocation::extractFromMachineInstruction( DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} +DebugHandlerBase::~DebugHandlerBase() = default; + void DebugHandlerBase::beginModule(Module *M) { if (M->debug_compile_units().empty()) Asm = nullptr; @@ -154,7 +157,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type && - Tag != dwarf::DW_TAG_immutable_type) + Tag != dwarf::DW_TAG_immutable_type && + Tag != dwarf::DW_TAG_template_alias) return DDTy->getSizeInBits(); DIType *BaseType = DDTy->getBaseType(); @@ -210,7 +214,8 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type || - T == dwarf::DW_TAG_immutable_type); + T == dwarf::DW_TAG_immutable_type || + T == dwarf::DW_TAG_template_alias); assert(DTy->getBaseType() && "Expected valid base type"); return isUnsignedDIType(DTy->getBaseType()); } @@ -224,12 +229,15 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || Encoding == dwarf::DW_ATE_complex_float || + Encoding == dwarf::DW_ATE_signed_fixed || + Encoding == dwarf::DW_ATE_unsigned_fixed || (Ty->getTag() == dwarf::DW_TAG_unspecified_type && Ty->getName() == "decltype(nullptr)")) && "Unsupported encoding"); return Encoding == dwarf::DW_ATE_unsigned || Encoding == dwarf::DW_ATE_unsigned_char || Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + Encoding == llvm::dwarf::DW_ATE_unsigned_fixed || Ty->getTag() == dwarf::DW_TAG_unspecified_type; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 726aba18bb80..069766ccddc2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -238,10 +238,10 @@ public: if (Values.size() == 1) return; llvm::sort(Values); - Values.erase(std::unique(Values.begin(), Values.end(), - [](const DbgValueLoc &A, const DbgValueLoc &B) { - return A.getExpression() == B.getExpression(); - }), + Values.erase(llvm::unique(Values, + [](const DbgValueLoc &A, const DbgValueLoc &B) { + return A.getExpression() == B.getExpression(); + }), Values.end()); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 10c844ddb14a..087ee02a7f2b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" @@ -89,7 +90,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - const MCAsmInfo &MAI = *MF->getMMI().getContext().getAsmInfo(); + const MCAsmInfo &MAI = *MF->getContext().getAsmInfo(); if (MAI.getExceptionHandlingType() != ExceptionHandling::None) shouldEmitCFI = MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 14f2a363f9be..c1e8355353cf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -32,6 +32,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MachineLocation.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -42,6 +43,20 @@ using namespace llvm; +/// Query value using AddLinkageNamesToDeclCallOriginsForTuning. +cl::opt<cl::boolOrDefault> AddLinkageNamesToDeclCallOrigins( + "add-linkage-names-to-declaration-call-origins", cl::Hidden, + cl::desc("Add DW_AT_linkage_name to function declaration DIEs " + "referenced by DW_AT_call_origin attributes. Enabled by default " + "for -gsce debugger tuning.")); + +static bool AddLinkageNamesToDeclCallOriginsForTuning(const DwarfDebug *DD) { + bool EnabledByDefault = DD->tuneForSCE(); + if (EnabledByDefault) + return AddLinkageNamesToDeclCallOrigins != cl::boolOrDefault::BOU_FALSE; + return AddLinkageNamesToDeclCallOrigins == cl::boolOrDefault::BOU_TRUE; +} + static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) { // According to DWARF Debugging Information Format Version 5, @@ -669,7 +684,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( // the order of blocks will be frozen beyond this point. do { if (MBB->sameSection(EndMBB) || MBB->isEndSection()) { - auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionIDNum()]; + auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionID()]; List.push_back( {MBB->sameSection(BeginMBB) ? BeginLabel : MBBSectionRange.BeginLabel, @@ -1260,6 +1275,12 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, } else { DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); assert(CalleeDIE && "Could not create DIE for call site entry origin"); + if (AddLinkageNamesToDeclCallOriginsForTuning(DD) && + !CalleeSP->isDefinition() && + !CalleeDIE->findAttribute(dwarf::DW_AT_linkage_name)) { + addLinkageName(*CalleeDIE, CalleeSP->getLinkageName()); + } + addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), *CalleeDIE); } @@ -1518,8 +1539,8 @@ void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, } /// Add a new global type to the unit. -void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, - const DIScope *Context) { +void DwarfCompileUnit::addGlobalTypeImpl(const DIType *Ty, const DIE &Die, + const DIScope *Context) { if (!hasDwarfPubSections()) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index dc772bb459c9..76584b3eb8e7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -335,8 +335,8 @@ public: void addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context); /// Add a new global type to the compile unit. - void addGlobalType(const DIType *Ty, const DIE &Die, - const DIScope *Context) override; + void addGlobalTypeImpl(const DIType *Ty, const DIE &Die, + const DIScope *Context) override; /// Add a new global type present in a type unit to this compile unit. void addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 6b5ad62e083e..f88653146cc6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -798,10 +798,10 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, ParamSet &Params) { const MachineFunction *MF = CallMI->getMF(); const auto &CalleesMap = MF->getCallSitesInfo(); - auto CallFwdRegsInfo = CalleesMap.find(CallMI); + auto CSInfo = CalleesMap.find(CallMI); // There is no information for the call instruction. - if (CallFwdRegsInfo == CalleesMap.end()) + if (CSInfo == CalleesMap.end()) return; const MachineBasicBlock *MBB = CallMI->getParent(); @@ -815,7 +815,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, DIExpression::get(MF->getFunction().getContext(), {}); // Add all the forwarding registers into the ForwardedRegWorklist. - for (const auto &ArgReg : CallFwdRegsInfo->second) { + for (const auto &ArgReg : CSInfo->second.ArgRegPairs) { bool InsertedReg = ForwardedRegWorklist.insert({ArgReg.Reg, {{ArgReg.Reg, EmptyExpr}}}) .second; @@ -1130,11 +1130,11 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) { return !!FragmentB; return FragmentA->OffsetInBits < FragmentB->OffsetInBits; }); - GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), - [](DwarfCompileUnit::GlobalExpr A, - DwarfCompileUnit::GlobalExpr B) { - return A.Expr == B.Expr; - }), + GVEs.erase(llvm::unique(GVEs, + [](DwarfCompileUnit::GlobalExpr A, + DwarfCompileUnit::GlobalExpr B) { + return A.Expr == B.Expr; + }), GVEs.end()); return GVEs; } @@ -1713,7 +1713,7 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, const MCSymbol *EndLabel; if (std::next(EI) == Entries.end()) { const MachineBasicBlock &EndMBB = Asm->MF->back(); - EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionIDNum()].EndLabel; + EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionID()].EndLabel; if (EI->isClobber()) EndMI = EI->getInstr(); } @@ -2064,7 +2064,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { bool PrevInstInSameSection = (!PrevInstBB || - PrevInstBB->getSectionIDNum() == MI->getParent()->getSectionIDNum()); + PrevInstBB->getSectionID() == MI->getParent()->getSectionID()); if (DL == PrevInstLoc && PrevInstInSameSection) { // If we have an ongoing unspecified location, nothing to do here. if (!DL) @@ -2483,6 +2483,7 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_base_type: case dwarf::DW_TAG_subrange_type: + case dwarf::DW_TAG_template_alias: return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC); case dwarf::DW_TAG_namespace: return dwarf::GIEK_TYPE; @@ -2989,6 +2990,9 @@ struct ArangeSpan { // Emit a debug aranges section, containing a CU lookup for any // address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { + if (ArangeLabels.empty()) + return; + // Provides a unique id per text section. MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap; @@ -2997,8 +3001,7 @@ void DwarfDebug::emitDebugARanges() { if (SCU.Sym->isInSection()) { // Make a note of this symbol and it's section. MCSection *Section = &SCU.Sym->getSection(); - if (!Section->getKind().isMetadata()) - SectionMap[Section].push_back(SCU); + SectionMap[Section].push_back(SCU); } else { // Some symbols (e.g. common/bss on mach-o) can have no section but still // appear in the output. This sucks as we rely on sections to build @@ -3012,8 +3015,7 @@ void DwarfDebug::emitDebugARanges() { for (auto &I : SectionMap) { MCSection *Section = I.first; SmallVector<SymbolCU, 8> &List = I.second; - if (List.size() < 1) - continue; + assert(!List.empty()); // If we have no section (e.g. common), just write out // individual spans for each symbol. @@ -3028,20 +3030,6 @@ void DwarfDebug::emitDebugARanges() { continue; } - // Sort the symbols by offset within the section. - llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) { - unsigned IA = A.Sym ? Asm->OutStreamer->getSymbolOrder(A.Sym) : 0; - unsigned IB = B.Sym ? Asm->OutStreamer->getSymbolOrder(B.Sym) : 0; - - // Symbols with no order assigned should be placed at the end. - // (e.g. section end labels) - if (IA == 0) - return false; - if (IB == 0) - return true; - return IA < IB; - }); - // Insert a final terminator. List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section))); @@ -3563,7 +3551,8 @@ void DwarfDebug::addAccelNameImpl( const DwarfUnit &Unit, const DICompileUnit::DebugNameTableKind NameTableKind, AccelTable<DataT> &AppleAccel, StringRef Name, const DIE &Die) { - if (getAccelTableKind() == AccelTableKind::None || Name.empty()) + if (getAccelTableKind() == AccelTableKind::None || + Unit.getUnitDie().getTag() == dwarf::DW_TAG_skeleton_unit || Name.empty()) return; if (getAccelTableKind() != AccelTableKind::Apple && @@ -3590,7 +3579,8 @@ void DwarfDebug::addAccelNameImpl( "Kind is TU but CU is being processed."); // The type unit can be discarded, so need to add references to final // acceleration table once we know it's complete and we emit it. - Current.addName(Ref, Die, Unit.getUniqueID()); + Current.addName(Ref, Die, Unit.getUniqueID(), + Unit.getUnitDie().getTag() == dwarf::DW_TAG_type_unit); break; } case AccelTableKind::Default: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index a74d43897d45..9d6e1bb367bc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/ErrorHandling.h" #include <algorithm> @@ -546,6 +547,41 @@ bool DwarfExpression::addExpression( LocationKind = Unknown; return true; } + case dwarf::DW_OP_LLVM_extract_bits_sext: + case dwarf::DW_OP_LLVM_extract_bits_zext: { + unsigned SizeInBits = Op->getArg(1); + unsigned BitOffset = Op->getArg(0); + + // If we have a memory location then dereference to get the value, though + // we have to make sure we don't dereference any bytes past the end of the + // object. + if (isMemoryLocation()) { + emitOp(dwarf::DW_OP_deref_size); + emitUnsigned(alignTo(BitOffset + SizeInBits, 8) / 8); + } + + // Extract the bits by a shift left (to shift out the bits after what we + // want to extract) followed by shift right (to shift the bits to position + // 0 and also sign/zero extend). These operations are done in the DWARF + // "generic type" whose size is the size of a pointer. + unsigned PtrSizeInBytes = CU.getAsmPrinter()->MAI->getCodePointerSize(); + unsigned LeftShift = PtrSizeInBytes * 8 - (SizeInBits + BitOffset); + unsigned RightShift = LeftShift + BitOffset; + if (LeftShift) { + emitOp(dwarf::DW_OP_constu); + emitUnsigned(LeftShift); + emitOp(dwarf::DW_OP_shl); + } + emitOp(dwarf::DW_OP_constu); + emitUnsigned(RightShift); + emitOp(OpNum == dwarf::DW_OP_LLVM_extract_bits_sext ? dwarf::DW_OP_shra + : dwarf::DW_OP_shr); + + // The value is now at the top of the stack, so set the location to + // implicit so that we get a stack_value at the end. + LocationKind = Implicit; + break; + } case dwarf::DW_OP_plus_uconst: assert(!isRegisterLocation()); emitOp(dwarf::DW_OP_plus_uconst); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 667a9efc6f6c..4daa78b15b8e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -31,67 +31,6 @@ class DIELoc; class TargetRegisterInfo; class MachineLocation; -/// Holds a DIExpression and keeps track of how many operands have been consumed -/// so far. -class DIExpressionCursor { - DIExpression::expr_op_iterator Start, End; - -public: - DIExpressionCursor(const DIExpression *Expr) { - if (!Expr) { - assert(Start == End); - return; - } - Start = Expr->expr_op_begin(); - End = Expr->expr_op_end(); - } - - DIExpressionCursor(ArrayRef<uint64_t> Expr) - : Start(Expr.begin()), End(Expr.end()) {} - - DIExpressionCursor(const DIExpressionCursor &) = default; - - /// Consume one operation. - std::optional<DIExpression::ExprOperand> take() { - if (Start == End) - return std::nullopt; - return *(Start++); - } - - /// Consume N operations. - void consume(unsigned N) { std::advance(Start, N); } - - /// Return the current operation. - std::optional<DIExpression::ExprOperand> peek() const { - if (Start == End) - return std::nullopt; - return *(Start); - } - - /// Return the next operation. - std::optional<DIExpression::ExprOperand> peekNext() const { - if (Start == End) - return std::nullopt; - - auto Next = Start.getNext(); - if (Next == End) - return std::nullopt; - - return *Next; - } - - /// Determine whether there are any operations left in this expression. - operator bool() const { return Start != End; } - - DIExpression::expr_op_iterator begin() const { return Start; } - DIExpression::expr_op_iterator end() const { return End; } - - /// Retrieve the fragment information, if any. - std::optional<DIExpression::FragmentInfo> getFragmentInfo() const { - return DIExpression::getFragmentInfo(Start, End); - } -}; - /// Base class containing the logic for constructing DWARF expressions /// independently of whether they are emitted into a DIE or into a .debug_loc /// entry. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index d462859e4894..e76b0fe2081c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -30,6 +30,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include <cassert> #include <cstdint> +#include <limits> #include <string> #include <utility> @@ -577,28 +578,33 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, // Create new type. DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty); - updateAcceleratorTables(Context, Ty, TyDIE); + auto construct = [&](const auto *Ty) { + updateAcceleratorTables(Context, Ty, TyDIE); + constructTypeDIE(TyDIE, Ty); + }; - if (auto *BT = dyn_cast<DIBasicType>(Ty)) - constructTypeDIE(TyDIE, BT); - else if (auto *ST = dyn_cast<DIStringType>(Ty)) - constructTypeDIE(TyDIE, ST); - else if (auto *STy = dyn_cast<DISubroutineType>(Ty)) - constructTypeDIE(TyDIE, STy); - else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { if (DD->generateTypeUnits() && !Ty->isForwardDecl() && (Ty->getRawName() || CTy->getRawIdentifier())) { // Skip updating the accelerator tables since this is not the full type. - if (MDString *TypeId = CTy->getRawIdentifier()) + if (MDString *TypeId = CTy->getRawIdentifier()) { + addGlobalType(Ty, TyDIE, Context); DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); - else + } else { + updateAcceleratorTables(Context, Ty, TyDIE); finishNonUnitTypeDIE(TyDIE, CTy); + } return &TyDIE; } - constructTypeDIE(TyDIE, CTy); - } else { - constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty)); - } + construct(CTy); + } else if (auto *BT = dyn_cast<DIBasicType>(Ty)) + construct(BT); + else if (auto *ST = dyn_cast<DIStringType>(Ty)) + construct(ST); + else if (auto *STy = dyn_cast<DISubroutineType>(Ty)) + construct(STy); + else + construct(cast<DIDerivedType>(Ty)); return &TyDIE; } @@ -632,21 +638,31 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { void DwarfUnit::updateAcceleratorTables(const DIScope *Context, const DIType *Ty, const DIE &TyDIE) { - if (!Ty->getName().empty() && !Ty->isForwardDecl()) { - bool IsImplementation = false; - if (auto *CT = dyn_cast<DICompositeType>(Ty)) { - // A runtime language of 0 actually means C/C++ and that any - // non-negative value is some version of Objective-C/C++. - IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete(); - } - unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; - DD->addAccelType(*this, CUNode->getNameTableKind(), Ty->getName(), TyDIE, - Flags); + if (Ty->getName().empty()) + return; + if (Ty->isForwardDecl()) + return; - if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) || - isa<DINamespace>(Context) || isa<DICommonBlock>(Context)) - addGlobalType(Ty, TyDIE, Context); + // add temporary record for this type to be added later + + bool IsImplementation = false; + if (auto *CT = dyn_cast<DICompositeType>(Ty)) { + // A runtime language of 0 actually means C/C++ and that any + // non-negative value is some version of Objective-C/C++. + IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete(); } + unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; + DD->addAccelType(*this, CUNode->getNameTableKind(), Ty->getName(), TyDIE, + Flags); + + addGlobalType(Ty, TyDIE, Context); +} + +void DwarfUnit::addGlobalType(const DIType *Ty, const DIE &TyDIE, + const DIScope *Context) { + if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) || + isa<DINamespace>(Context) || isa<DICommonBlock>(Context)) + addGlobalTypeImpl(Ty, TyDIE, Context); } void DwarfUnit::addType(DIE &Entity, const DIType *Ty, @@ -803,6 +819,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { if (DTy->getDWARFAddressSpace()) addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4, *DTy->getDWARFAddressSpace()); + + // Add template alias template parameters. + if (Tag == dwarf::DW_TAG_template_alias) + addTemplateParams(Buffer, DTy->getTemplateParams()); + + if (auto PtrAuthData = DTy->getPtrAuthData()) { + addUInt(Buffer, dwarf::DW_AT_LLVM_ptrauth_key, dwarf::DW_FORM_data1, + PtrAuthData->key()); + if (PtrAuthData->isAddressDiscriminated()) + addFlag(Buffer, dwarf::DW_AT_LLVM_ptrauth_address_discriminated); + addUInt(Buffer, dwarf::DW_AT_LLVM_ptrauth_extra_discriminator, + dwarf::DW_FORM_data2, PtrAuthData->extraDiscriminator()); + if (PtrAuthData->isaPointer()) + addFlag(Buffer, dwarf::DW_AT_LLVM_ptrauth_isa_pointer); + if (PtrAuthData->authenticatesNullValues()) + addFlag(Buffer, dwarf::DW_AT_LLVM_ptrauth_authenticates_null_values); + } } void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { @@ -1552,7 +1585,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { const DIType *DTy = CTy->getBaseType(); bool IsUnsigned = DTy && DD->isUnsignedDIType(DTy); if (DTy) { - if (DD->getDwarfVersion() >= 3) + if (!Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= 3) addType(Buffer, DTy); if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass)) addFlag(Buffer, dwarf::DW_AT_enum_class); @@ -1632,7 +1665,9 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { addUInt(MemberDie, dwarf::DW_AT_byte_size, std::nullopt, FieldSize / 8); addUInt(MemberDie, dwarf::DW_AT_bit_size, std::nullopt, Size); - uint64_t Offset = DT->getOffsetInBits(); + assert(DT->getOffsetInBits() <= + (uint64_t)std::numeric_limits<int64_t>::max()); + int64_t Offset = DT->getOffsetInBits(); // We can't use DT->getAlignInBits() here: AlignInBits for member type // is non-zero if and only if alignment was forced (e.g. _Alignas()), // which can't be done with bitfields. Thus we use FieldSize here. @@ -1652,7 +1687,12 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { if (Asm->getDataLayout().isLittleEndian()) Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt, Offset); + if (Offset < 0) + addSInt(MemberDie, dwarf::DW_AT_bit_offset, dwarf::DW_FORM_sdata, + Offset); + else + addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt, + (uint64_t)Offset); OffsetInBytes = FieldOffset >> 3; } else { addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, std::nullopt, Offset); @@ -1819,8 +1859,8 @@ void DwarfTypeUnit::addGlobalName(StringRef Name, const DIE &Die, getCU().addGlobalNameForTypeUnit(Name, Context); } -void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die, - const DIScope *Context) { +void DwarfTypeUnit::addGlobalTypeImpl(const DIType *Ty, const DIE &Die, + const DIScope *Context) { getCU().addGlobalTypeUnitType(Ty, Context); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 18f50f86ec87..02256546b6b8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -128,8 +128,10 @@ public: const DIScope *Context) = 0; /// Add a new global type to the compile unit. - virtual void addGlobalType(const DIType *Ty, const DIE &Die, - const DIScope *Context) = 0; + virtual void addGlobalTypeImpl(const DIType *Ty, const DIE &Die, + const DIScope *Context) = 0; + + void addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context); /// Returns the DIE map slot for the specified debug variable. /// @@ -397,8 +399,8 @@ public: } void addGlobalName(StringRef Name, const DIE &Die, const DIScope *Context) override; - void addGlobalType(const DIType *Ty, const DIE &Die, - const DIScope *Context) override; + void addGlobalTypeImpl(const DIType *Ty, const DIE &Die, + const DIScope *Context) override; DwarfCompileUnit &getCU() override { return CU; } }; } // end llvm namespace diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 32239535e4d0..1c603f5988ad 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -253,8 +253,8 @@ void EHStreamer::computeCallSiteTable( // We start a call-site range upon function entry and at the beginning of // every basic block section. CallSiteRanges.push_back( - {Asm->MBBSectionRanges[MBB.getSectionIDNum()].BeginLabel, - Asm->MBBSectionRanges[MBB.getSectionIDNum()].EndLabel, + {Asm->MBBSectionRanges[MBB.getSectionID()].BeginLabel, + Asm->MBBSectionRanges[MBB.getSectionID()].EndLabel, Asm->getMBBExceptionSym(MBB), CallSites.size()}); PreviousIsInvoke = false; SawPotentiallyThrowing = false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h index 234e62506a56..705a61fb827f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -150,11 +150,6 @@ public: EHStreamer(AsmPrinter *A); ~EHStreamer() override; - // Unused. - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} - void beginInstruction(const MachineInstr *MI) override {} - void endInstruction() override {} - /// Return `true' if this is a call to a function marked `nounwind'. Return /// `false' otherwise. static bool callToNoUnwindFunction(const MachineInstr *MI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index 59c3fa15885e..5dda38383a65 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -20,8 +20,6 @@ using namespace llvm; -PseudoProbeHandler::~PseudoProbeHandler() = default; - void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, const DILocation *DebugLoc) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h index a92a89084cad..35461e53fbf1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h @@ -21,26 +21,17 @@ namespace llvm { class AsmPrinter; class DILocation; -class PseudoProbeHandler : public AsmPrinterHandler { +class PseudoProbeHandler { // Target of pseudo probe emission. AsmPrinter *Asm; // Name to GUID map, used as caching/memoization for speed. DenseMap<StringRef, uint64_t> NameGuidMap; public: - PseudoProbeHandler(AsmPrinter *A) : Asm(A){}; - ~PseudoProbeHandler() override; + PseudoProbeHandler(AsmPrinter *A) : Asm(A) {}; void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, const DILocation *DebugLoc); - - // Unused. - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} - void endModule() override {} - void beginFunction(const MachineFunction *MF) override {} - void endFunction(const MachineFunction *MF) override {} - void beginInstruction(const MachineInstr *MI) override {} - void endInstruction() override {} }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index cd18703b359e..1a1e6f0117e2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCStreamer.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h index 0e472af52c8f..f94acc912483 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -30,8 +30,6 @@ public: WinCFGuard(AsmPrinter *A); ~WinCFGuard() override; - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} - /// Emit the Control Flow Guard function ID table. void endModule() override; @@ -44,12 +42,6 @@ public: /// Please note that some AsmPrinter implementations may not call /// beginFunction at all. void endFunction(const MachineFunction *MF) override; - - /// Process beginning of an instruction. - void beginInstruction(const MachineInstr *MI) override {} - - /// Process end of an instruction. - void endInstruction() override {} }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index f8ce8f98864e..146276b4fd0b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" -#include "llvm/Analysis/Interval.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -24,6 +23,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PrintPasses.h" #include "llvm/InitializePasses.h" @@ -82,7 +82,7 @@ template <> struct llvm::DenseMapInfo<VariableID> { } }; -using VarLocInsertPt = PointerUnion<const Instruction *, const DPValue *>; +using VarLocInsertPt = PointerUnion<const Instruction *, const DbgRecord *>; namespace std { template <> struct hash<VarLocInsertPt> { @@ -215,22 +215,24 @@ void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) { // Insert a contiguous block of VarLocInfos for each instruction, mapping it // to the start and end position in the vector with VarLocsBeforeInst. This - // block includes VarLocs for any DPValues attached to that instruction. + // block includes VarLocs for any DbgVariableRecords attached to that + // instruction. for (auto &P : Builder.VarLocsBeforeInst) { - // Process VarLocs attached to a DPValue alongside their marker Instruction. - if (isa<const DPValue *>(P.first)) + // Process VarLocs attached to a DbgRecord alongside their marker + // Instruction. + if (isa<const DbgRecord *>(P.first)) continue; const Instruction *I = cast<const Instruction *>(P.first); unsigned BlockStart = VarLocRecords.size(); - // Any VarLocInfos attached to a DPValue should now be remapped to their - // marker Instruction, in order of DPValue appearance and prior to any + // Any VarLocInfos attached to a DbgRecord should now be remapped to their + // marker Instruction, in order of DbgRecord appearance and prior to any // VarLocInfos attached directly to that instruction. - for (const DPValue &DPV : I->getDbgValueRange()) { - // Even though DPV defines a variable location, VarLocsBeforeInst can + for (const DbgVariableRecord &DVR : filterDbgVars(I->getDbgRecordRange())) { + // Even though DVR defines a variable location, VarLocsBeforeInst can // still be empty if that VarLoc was redundant. - if (!Builder.VarLocsBeforeInst.count(&DPV)) + if (!Builder.VarLocsBeforeInst.count(&DVR)) continue; - for (const VarLocInfo &VarLoc : Builder.VarLocsBeforeInst[&DPV]) + for (const VarLocInfo &VarLoc : Builder.VarLocsBeforeInst[&DVR]) VarLocRecords.emplace_back(VarLoc); } for (const VarLocInfo &VarLoc : P.second) @@ -570,11 +572,10 @@ class MemLocFragmentFill { bool FirstMeet = true; // LiveIn locs for BB is the meet of the already-processed preds' LiveOut // locs. - for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) { + for (const BasicBlock *Pred : predecessors(&BB)) { // Ignore preds that haven't been processed yet. This is essentially the // same as initialising all variables to implicit top value (⊤) which is // the identity value for the meet operation. - const BasicBlock *Pred = *I; if (!Visited.count(Pred)) continue; @@ -829,10 +830,10 @@ class MemLocFragmentFill { void process(BasicBlock &BB, VarFragMap &LiveSet) { BBInsertBeforeMap[&BB].clear(); for (auto &I : BB) { - for (auto &DPV : I.getDbgValueRange()) { - if (const auto *Locs = FnVarLocs->getWedge(&DPV)) { + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { + if (const auto *Locs = FnVarLocs->getWedge(&DVR)) { for (const VarLocInfo &Loc : *Locs) { - addDef(Loc, &DPV, *I.getParent(), LiveSet); + addDef(Loc, &DVR, *I.getParent(), LiveSet); } } } @@ -890,9 +891,9 @@ public: DenseMap<BasicBlock *, unsigned int> BBToOrder; { // Init OrderToBB and BBToOrder. unsigned int RPONumber = 0; - for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) { - OrderToBB[RPONumber] = *RI; - BBToOrder[*RI] = RPONumber; + for (BasicBlock *BB : RPOT) { + OrderToBB[RPONumber] = BB; + BBToOrder[BB] = RPONumber; Worklist.push(RPONumber); ++RPONumber; } @@ -939,10 +940,10 @@ public: LLVM_DEBUG(dbgs() << BB->getName() << " has new OutLocs, add succs to worklist: [ "); LiveOut[BB] = std::move(LiveSet); - for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) { - if (OnPending.insert(*I).second) { - LLVM_DEBUG(dbgs() << I->getName() << " "); - Pending.push(BBToOrder[*I]); + for (BasicBlock *Succ : successors(BB)) { + if (OnPending.insert(Succ).second) { + LLVM_DEBUG(dbgs() << Succ->getName() << " "); + Pending.push(BBToOrder[Succ]); } } LLVM_DEBUG(dbgs() << "]\n"); @@ -1026,7 +1027,7 @@ public: /// i.e. for all values x and y where x != y: /// join(x, x) = x /// join(x, y) = NoneOrPhi - using AssignRecord = PointerUnion<DbgAssignIntrinsic *, DPValue *>; + using AssignRecord = PointerUnion<DbgAssignIntrinsic *, DbgVariableRecord *>; struct Assignment { enum S { Known, NoneOrPhi } Status; /// ID of the assignment. nullptr if Status is not Known. @@ -1053,16 +1054,16 @@ public: else if (isa<DbgAssignIntrinsic *>(Source)) OS << Source.get<DbgAssignIntrinsic *>(); else - OS << Source.get<DPValue *>(); + OS << Source.get<DbgVariableRecord *>(); OS << ")"; } static Assignment make(DIAssignID *ID, DbgAssignIntrinsic *Source) { return Assignment(Known, ID, Source); } - static Assignment make(DIAssignID *ID, DPValue *Source) { + static Assignment make(DIAssignID *ID, DbgVariableRecord *Source) { assert(Source->isDbgAssign() && - "Cannot make an assignment from a non-assign DPValue"); + "Cannot make an assignment from a non-assign DbgVariableRecord"); return Assignment(Known, ID, Source); } static Assignment make(DIAssignID *ID, AssignRecord Source) { @@ -1083,7 +1084,7 @@ public: // If the Status is Known then we expect there to be an assignment ID. assert(Status == NoneOrPhi || ID); } - Assignment(S Status, DIAssignID *ID, DPValue *Source) + Assignment(S Status, DIAssignID *ID, DbgVariableRecord *Source) : Status(Status), ID(ID), Source(Source) { // If the Status is Known then we expect there to be an assignment ID. assert(Status == NoneOrPhi || ID); @@ -1118,10 +1119,10 @@ private: /// Clear the location definitions currently cached for insertion after /p /// After. void resetInsertionPoint(Instruction &After); - void resetInsertionPoint(DPValue &After); + void resetInsertionPoint(DbgVariableRecord &After); // emitDbgValue can be called with: - // Source=[AssignRecord|DbgValueInst*|DbgAssignIntrinsic*|DPValue*] + // Source=[AssignRecord|DbgValueInst*|DbgAssignIntrinsic*|DbgVariableRecord*] // Since AssignRecord can be cast to one of the latter two types, and all // other types have a shared interface, we use a template to handle the latter // three types, and an explicit overload for AssignRecord that forwards to @@ -1354,9 +1355,10 @@ private: /// attachment, \p I. void processUntaggedInstruction(Instruction &I, BlockInfo *LiveSet); void processDbgAssign(AssignRecord Assign, BlockInfo *LiveSet); - void processDPValue(DPValue &DPV, BlockInfo *LiveSet); - void processDbgValue(PointerUnion<DbgValueInst *, DPValue *> DbgValueRecord, - BlockInfo *LiveSet); + void processDbgVariableRecord(DbgVariableRecord &DVR, BlockInfo *LiveSet); + void processDbgValue( + PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgValueRecord, + BlockInfo *LiveSet); /// Add an assignment to memory for the variable /p Var. void addMemDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV); /// Add an assignment to the variable /p Var. @@ -1456,10 +1458,10 @@ static DIAssignID *getIDFromMarker(const DbgAssignIntrinsic &DAI) { return cast<DIAssignID>(DAI.getAssignID()); } -static DIAssignID *getIDFromMarker(const DPValue &DPV) { - assert(DPV.isDbgAssign() && - "Cannot get a DIAssignID from a non-assign DPValue!"); - return DPV.getAssignID(); +static DIAssignID *getIDFromMarker(const DbgVariableRecord &DVR) { + assert(DVR.isDbgAssign() && + "Cannot get a DIAssignID from a non-assign DbgVariableRecord!"); + return DVR.getAssignID(); } /// Return true if \p Var has an assignment in \p M matching \p AV. @@ -1492,32 +1494,32 @@ const char *locStr(AssignmentTrackingLowering::LocKind Loc) { } #endif -VarLocInsertPt getNextNode(const DPValue *DPV) { - auto NextIt = ++(DPV->getIterator()); - if (NextIt == DPV->getMarker()->getDbgValueRange().end()) - return DPV->getMarker()->MarkedInstr; +VarLocInsertPt getNextNode(const DbgRecord *DVR) { + auto NextIt = ++(DVR->getIterator()); + if (NextIt == DVR->getMarker()->getDbgRecordRange().end()) + return DVR->getMarker()->MarkedInstr; return &*NextIt; } VarLocInsertPt getNextNode(const Instruction *Inst) { const Instruction *Next = Inst->getNextNode(); - if (!Next->hasDbgValues()) + if (!Next->hasDbgRecords()) return Next; - return &*Next->getDbgValueRange().begin(); + return &*Next->getDbgRecordRange().begin(); } VarLocInsertPt getNextNode(VarLocInsertPt InsertPt) { if (isa<const Instruction *>(InsertPt)) return getNextNode(cast<const Instruction *>(InsertPt)); - return getNextNode(cast<const DPValue *>(InsertPt)); + return getNextNode(cast<const DbgRecord *>(InsertPt)); } DbgAssignIntrinsic *CastToDbgAssign(DbgVariableIntrinsic *DVI) { return cast<DbgAssignIntrinsic>(DVI); } -DPValue *CastToDbgAssign(DPValue *DPV) { - assert(DPV->isDbgAssign() && - "Attempted to cast non-assign DPValue to DPVAssign."); - return DPV; +DbgVariableRecord *CastToDbgAssign(DbgVariableRecord *DVR) { + assert(DVR->isDbgAssign() && + "Attempted to cast non-assign DbgVariableRecord to DVRAssign."); + return DVR; } void AssignmentTrackingLowering::emitDbgValue( @@ -1526,7 +1528,7 @@ void AssignmentTrackingLowering::emitDbgValue( if (isa<DbgAssignIntrinsic *>(Source)) emitDbgValue(Kind, cast<DbgAssignIntrinsic *>(Source), After); else - emitDbgValue(Kind, cast<DPValue *>(Source), After); + emitDbgValue(Kind, cast<DbgVariableRecord *>(Source), After); } template <typename T> void AssignmentTrackingLowering::emitDbgValue( @@ -1649,7 +1651,7 @@ void AssignmentTrackingLowering::processUntaggedInstruction( Ops.push_back(dwarf::DW_OP_deref); DIE = DIExpression::prependOpcodes(DIE, Ops, /*StackValue=*/false, /*EntryValue=*/false); - // Find a suitable insert point, before the next instruction or DPValue + // Find a suitable insert point, before the next instruction or DbgRecord // after I. auto InsertBefore = getNextNode(&I); assert(InsertBefore && "Shouldn't be inserting after a terminator"); @@ -1673,7 +1675,7 @@ void AssignmentTrackingLowering::processUntaggedInstruction( void AssignmentTrackingLowering::processTaggedInstruction( Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) { auto Linked = at::getAssignmentMarkers(&I); - auto LinkedDPAssigns = at::getDPVAssignmentMarkers(&I); + auto LinkedDPAssigns = at::getDVRAssignmentMarkers(&I); // No dbg.assign intrinsics linked. // FIXME: All vars that have a stack slot this store modifies that don't have // a dbg.assign linked to it should probably treat this like an untagged @@ -1756,8 +1758,8 @@ void AssignmentTrackingLowering::processTaggedInstruction( }; for (DbgAssignIntrinsic *DAI : Linked) ProcessLinkedAssign(DAI); - for (DPValue *DPV : LinkedDPAssigns) - ProcessLinkedAssign(DPV); + for (DbgVariableRecord *DVR : LinkedDPAssigns) + ProcessLinkedAssign(DVR); } void AssignmentTrackingLowering::processDbgAssign(AssignRecord Assign, @@ -1802,13 +1804,13 @@ void AssignmentTrackingLowering::processDbgAssign(AssignRecord Assign, emitDbgValue(LocKind::Val, DbgAssign, DbgAssign); } }; - if (isa<DPValue *>(Assign)) - return ProcessDbgAssignImpl(cast<DPValue *>(Assign)); + if (isa<DbgVariableRecord *>(Assign)) + return ProcessDbgAssignImpl(cast<DbgVariableRecord *>(Assign)); return ProcessDbgAssignImpl(cast<DbgAssignIntrinsic *>(Assign)); } void AssignmentTrackingLowering::processDbgValue( - PointerUnion<DbgValueInst *, DPValue *> DbgValueRecord, + PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgValueRecord, BlockInfo *LiveSet) { auto ProcessDbgValueImpl = [&](auto *DbgValue) { // Only other tracking variables that are at some point stack homed. @@ -1833,8 +1835,8 @@ void AssignmentTrackingLowering::processDbgValue( setLocKind(LiveSet, Var, LocKind::Val); emitDbgValue(LocKind::Val, DbgValue, DbgValue); }; - if (isa<DPValue *>(DbgValueRecord)) - return ProcessDbgValueImpl(cast<DPValue *>(DbgValueRecord)); + if (isa<DbgVariableRecord *>(DbgValueRecord)) + return ProcessDbgValueImpl(cast<DbgVariableRecord *>(DbgValueRecord)); return ProcessDbgValueImpl(cast<DbgValueInst *>(DbgValueRecord)); } @@ -1859,16 +1861,16 @@ void AssignmentTrackingLowering::processDbgInstruction( else if (auto *DVI = dyn_cast<DbgValueInst>(&I)) processDbgValue(DVI, LiveSet); } -void AssignmentTrackingLowering::processDPValue( - DPValue &DPV, AssignmentTrackingLowering::BlockInfo *LiveSet) { +void AssignmentTrackingLowering::processDbgVariableRecord( + DbgVariableRecord &DVR, AssignmentTrackingLowering::BlockInfo *LiveSet) { // Ignore assignments to zero bits of the variable. - if (hasZeroSizedFragment(DPV)) + if (hasZeroSizedFragment(DVR)) return; - if (DPV.isDbgAssign()) - processDbgAssign(&DPV, LiveSet); - else if (DPV.isDbgValue()) - processDbgValue(&DPV, LiveSet); + if (DVR.isDbgAssign()) + processDbgAssign(&DVR, LiveSet); + else if (DVR.isDbgValue()) + processDbgValue(&DVR, LiveSet); } void AssignmentTrackingLowering::resetInsertionPoint(Instruction &After) { @@ -1878,7 +1880,7 @@ void AssignmentTrackingLowering::resetInsertionPoint(Instruction &After) { return; R->second.clear(); } -void AssignmentTrackingLowering::resetInsertionPoint(DPValue &After) { +void AssignmentTrackingLowering::resetInsertionPoint(DbgVariableRecord &After) { auto *R = InsertBeforeMap.find(getNextNode(&After)); if (R == InsertBeforeMap.end()) return; @@ -1886,21 +1888,21 @@ void AssignmentTrackingLowering::resetInsertionPoint(DPValue &After) { } void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) { - // If the block starts with DPValues, we need to process those DPValues as + // If the block starts with DbgRecords, we need to process those DbgRecords as // their own frame without processing any instructions first. - bool ProcessedLeadingDPValues = !BB.begin()->hasDbgValues(); + bool ProcessedLeadingDbgRecords = !BB.begin()->hasDbgRecords(); for (auto II = BB.begin(), EI = BB.end(); II != EI;) { assert(VarsTouchedThisFrame.empty()); // Process the instructions in "frames". A "frame" includes a single // non-debug instruction followed any debug instructions before the // next non-debug instruction. - // Skip the current instruction if it has unprocessed DPValues attached (see - // comment above `ProcessedLeadingDPValues`). - if (ProcessedLeadingDPValues) { + // Skip the current instruction if it has unprocessed DbgRecords attached + // (see comment above `ProcessedLeadingDbgRecords`). + if (ProcessedLeadingDbgRecords) { // II is now either a debug intrinsic, a non-debug instruction with no - // attached DPValues, or a non-debug instruction with attached processed - // DPValues. + // attached DbgRecords, or a non-debug instruction with attached processed + // DbgRecords. // II has not been processed. if (!isa<DbgInfoIntrinsic>(&*II)) { if (II->isTerminator()) @@ -1912,16 +1914,19 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) { } } // II is now either a debug intrinsic, a non-debug instruction with no - // attached DPValues, or a non-debug instruction with attached unprocessed - // DPValues. - if (II != EI && II->hasDbgValues()) { - for (DPValue &DPV : II->getDbgValueRange()) { - resetInsertionPoint(DPV); - processDPValue(DPV, LiveSet); + // attached DbgRecords, or a non-debug instruction with attached unprocessed + // DbgRecords. + if (II != EI && II->hasDbgRecords()) { + // Skip over non-variable debug records (i.e., labels). They're going to + // be read from IR (possibly re-ordering them within the debug record + // range) rather than from the analysis results. + for (DbgVariableRecord &DVR : filterDbgVars(II->getDbgRecordRange())) { + resetInsertionPoint(DVR); + processDbgVariableRecord(DVR, LiveSet); assert(LiveSet->isValid()); } } - ProcessedLeadingDPValues = true; + ProcessedLeadingDbgRecords = true; while (II != EI) { auto *Dbg = dyn_cast<DbgInfoIntrinsic>(&*II); if (!Dbg) @@ -1931,9 +1936,9 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) { assert(LiveSet->isValid()); ++II; } - // II is now a non-debug instruction either with no attached DPValues, or - // with attached processed DPValues. II has not been processed, and all - // debug instructions or DPValues in the frame preceding II have been + // II is now a non-debug instruction either with no attached DbgRecords, or + // with attached processed DbgRecords. II has not been processed, and all + // debug instructions or DbgRecords in the frame preceding II have been // processed. // We've processed everything in the "frame". Now determine which variables @@ -1996,9 +2001,11 @@ AssignmentTrackingLowering::joinAssignment(const Assignment &A, return A.Source; if (!A.Source || !B.Source) return AssignRecord(); - assert(isa<DPValue *>(A.Source) == isa<DPValue *>(B.Source)); - if (isa<DPValue *>(A.Source) && - cast<DPValue *>(A.Source)->isEquivalentTo(*cast<DPValue *>(B.Source))) + assert(isa<DbgVariableRecord *>(A.Source) == + isa<DbgVariableRecord *>(B.Source)); + if (isa<DbgVariableRecord *>(A.Source) && + cast<DbgVariableRecord *>(A.Source)->isEquivalentTo( + *cast<DbgVariableRecord *>(B.Source))) return A.Source; if (isa<DbgAssignIntrinsic *>(A.Source) && cast<DbgAssignIntrinsic *>(A.Source)->isIdenticalTo( @@ -2119,8 +2126,8 @@ DbgDeclareInst *DynCastToDbgDeclare(DbgVariableIntrinsic *DVI) { return dyn_cast<DbgDeclareInst>(DVI); } -DPValue *DynCastToDbgDeclare(DPValue *DPV) { - return DPV->isDbgDeclare() ? DPV : nullptr; +DbgVariableRecord *DynCastToDbgDeclare(DbgVariableRecord *DVR) { + return DVR->isDbgDeclare() ? DVR : nullptr; } /// Build a map of {Variable x: Variables y} where all variable fragments @@ -2157,7 +2164,7 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares( // We need to add fragments for untagged stores too so that we can correctly // clobber overlapped fragment locations later. SmallVector<DbgDeclareInst *> InstDeclares; - SmallVector<DPValue *> DPDeclares; + SmallVector<DbgVariableRecord *> DPDeclares; auto ProcessDbgRecord = [&](auto *Record, auto &DeclareList) { if (auto *Declare = DynCastToDbgDeclare(Record)) { DeclareList.push_back(Declare); @@ -2172,12 +2179,12 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares( }; for (auto &BB : Fn) { for (auto &I : BB) { - for (auto &DPV : I.getDbgValueRange()) - ProcessDbgRecord(&DPV, DPDeclares); + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) + ProcessDbgRecord(&DVR, DPDeclares); if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) { ProcessDbgRecord(DII, InstDeclares); } else if (auto Info = getUntaggedStoreAssignmentInfo( - I, Fn.getParent()->getDataLayout())) { + I, Fn.getDataLayout())) { // Find markers linked to this alloca. auto HandleDbgAssignForStore = [&](auto *Assign) { std::optional<DIExpression::FragmentInfo> FragInfo; @@ -2185,7 +2192,7 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares( // Skip this assignment if the affected bits are outside of the // variable fragment. if (!at::calculateFragmentIntersect( - I.getModule()->getDataLayout(), Info->Base, + I.getDataLayout(), Info->Base, Info->OffsetInBits, Info->SizeInBits, Assign, FragInfo) || (FragInfo && FragInfo->SizeInBits == 0)) return; @@ -2214,8 +2221,8 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares( }; for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(Info->Base)) HandleDbgAssignForStore(DAI); - for (DPValue *DPV : at::getDPVAssignmentMarkers(Info->Base)) - HandleDbgAssignForStore(DPV); + for (DbgVariableRecord *DVR : at::getDVRAssignmentMarkers(Info->Base)) + HandleDbgAssignForStore(DVR); } } } @@ -2265,10 +2272,10 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares( for (auto *DDI : InstDeclares) FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(), DDI->getDebugLoc(), DDI->getWrappedLocation()); - for (auto *DPV : DPDeclares) - FnVarLocs->addSingleLocVar(DebugVariable(DPV), DPV->getExpression(), - DPV->getDebugLoc(), - RawLocationWrapper(DPV->getRawLocation())); + for (auto *DVR : DPDeclares) + FnVarLocs->addSingleLocVar(DebugVariable(DVR), DVR->getExpression(), + DVR->getDebugLoc(), + RawLocationWrapper(DVR->getRawLocation())); return Map; } @@ -2305,9 +2312,9 @@ bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) { DenseMap<BasicBlock *, unsigned int> BBToOrder; { // Init OrderToBB and BBToOrder. unsigned int RPONumber = 0; - for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) { - OrderToBB[RPONumber] = *RI; - BBToOrder[*RI] = RPONumber; + for (BasicBlock *BB : RPOT) { + OrderToBB[RPONumber] = BB; + BBToOrder[BB] = RPONumber; Worklist.push(RPONumber); ++RPONumber; } @@ -2352,10 +2359,10 @@ bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) { LLVM_DEBUG(dbgs() << BB->getName() << " has new OutLocs, add succs to worklist: [ "); LiveOut[BB] = std::move(LiveSet); - for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) { - if (OnPending.insert(*I).second) { - LLVM_DEBUG(dbgs() << I->getName() << " "); - Pending.push(BBToOrder[*I]); + for (BasicBlock *Succ : successors(BB)) { + if (OnPending.insert(Succ).second) { + LLVM_DEBUG(dbgs() << Succ->getName() << " "); + Pending.push(BBToOrder[Succ]); } } LLVM_DEBUG(dbgs() << "]\n"); @@ -2462,9 +2469,9 @@ bool AssignmentTrackingLowering::emitPromotedVarLocs( for (auto &BB : Fn) { for (auto &I : BB) { // Skip instructions other than dbg.values and dbg.assigns. - for (DPValue &DPV : I.getDbgValueRange()) - if (DPV.isDbgValue() || DPV.isDbgAssign()) - TranslateDbgRecord(&DPV); + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) + if (DVR.isDbgValue() || DVR.isDbgAssign()) + TranslateDbgRecord(&DVR); auto *DVI = dyn_cast<DbgValueInst>(&I); if (DVI) TranslateDbgRecord(DVI); @@ -2486,7 +2493,7 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB, bool Changed = false; SmallDenseMap<DebugAggregate, BitVector> VariableDefinedBytes; // Scan over the entire block, not just over the instructions mapped by - // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug + // FnVarLocs, because wedges in FnVarLocs may only be separated by debug // instructions. for (const Instruction &I : reverse(*BB)) { if (!isa<DbgVariableIntrinsic>(I)) { @@ -2564,8 +2571,8 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB, } }; HandleLocsForWedge(&I); - for (DPValue &DPV : reverse(I.getDbgValueRange())) - HandleLocsForWedge(&DPV); + for (DbgVariableRecord &DVR : reverse(filterDbgVars(I.getDbgRecordRange()))) + HandleLocsForWedge(&DVR); } return Changed; @@ -2586,7 +2593,7 @@ removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB, VariableMap; // Scan over the entire block, not just over the instructions mapped by - // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug + // FnVarLocs, because wedges in FnVarLocs may only be separated by debug // instructions. for (const Instruction &I : *BB) { // Get the defs that come just before this instruction. @@ -2629,8 +2636,8 @@ removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB, } }; - for (DPValue &DPV : I.getDbgValueRange()) - HandleLocsForWedge(&DPV); + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) + HandleLocsForWedge(&DVR); HandleLocsForWedge(&I); } @@ -2674,7 +2681,7 @@ removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB, DenseMap<DebugVariable, std::pair<Value *, DIExpression *>> VariableMap; // Scan over the entire block, not just over the instructions mapped by - // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug + // FnVarLocs, because wedges in FnVarLocs may only be separated by debug // instructions. for (const Instruction &I : *BB) { // Get the defs that come just before this instruction. @@ -2715,8 +2722,8 @@ removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB, Changed = true; } }; - for (DPValue &DPV : I.getDbgValueRange()) - HandleLocsForWedge(&DPV); + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) + HandleLocsForWedge(&DVR); HandleLocsForWedge(&I); } @@ -2749,8 +2756,8 @@ static DenseSet<DebugAggregate> findVarsWithStackSlot(Function &Fn) { for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(&I)) { Result.insert({DAI->getVariable(), DAI->getDebugLoc().getInlinedAt()}); } - for (DPValue *DPV : at::getDPVAssignmentMarkers(&I)) { - Result.insert({DPV->getVariable(), DPV->getDebugLoc().getInlinedAt()}); + for (DbgVariableRecord *DVR : at::getDVRAssignmentMarkers(&I)) { + Result.insert({DVR->getVariable(), DVR->getDebugLoc().getInlinedAt()}); } } } @@ -2793,7 +2800,7 @@ DebugAssignmentTrackingAnalysis::run(Function &F, if (!isAssignmentTrackingEnabled(*F.getParent())) return FunctionVarLocs(); - auto &DL = F.getParent()->getDataLayout(); + auto &DL = F.getDataLayout(); FunctionVarLocsBuilder Builder; analyzeFunction(F, DL, &Builder); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp index ccf3e9ec6492..ebcf76175a36 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -19,8 +19,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/AtomicExpand.h" #include "llvm/CodeGen/AtomicExpandUtils.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -36,6 +37,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -59,19 +62,10 @@ using namespace llvm; namespace { -class AtomicExpand : public FunctionPass { +class AtomicExpandImpl { const TargetLowering *TLI = nullptr; const DataLayout *DL = nullptr; -public: - static char ID; // Pass identification, replacement for typeid - - AtomicExpand() : FunctionPass(ID) { - initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); @@ -124,47 +118,77 @@ private: friend bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg); + +public: + bool run(Function &F, const TargetMachine *TM); +}; + +class AtomicExpandLegacy : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + + AtomicExpandLegacy() : FunctionPass(ID) { + initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; }; // IRBuilder to be used for replacement atomic instructions. -struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> { +struct ReplacementIRBuilder + : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> { + MDNode *MMRAMD = nullptr; + // Preserves the DebugLoc from I, and preserves still valid metadata. + // Enable StrictFP builder mode when appropriate. explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL) - : IRBuilder(I->getContext(), DL) { + : IRBuilder(I->getContext(), DL, + IRBuilderCallbackInserter( + [this](Instruction *I) { addMMRAMD(I); })) { SetInsertPoint(I); this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections}); + if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP)) + this->setIsFPConstrained(true); + + MMRAMD = I->getMetadata(LLVMContext::MD_mmra); + } + + void addMMRAMD(Instruction *I) { + if (canInstructionHaveMMRAs(*I)) + I->setMetadata(LLVMContext::MD_mmra, MMRAMD); } }; } // end anonymous namespace -char AtomicExpand::ID = 0; +char AtomicExpandLegacy::ID = 0; -char &llvm::AtomicExpandID = AtomicExpand::ID; +char &llvm::AtomicExpandID = AtomicExpandLegacy::ID; -INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, - false) - -FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } +INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE, + "Expand Atomic instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE, + "Expand Atomic instructions", false, false) // Helper functions to retrieve the size of atomic instructions. static unsigned getAtomicOpSize(LoadInst *LI) { - const DataLayout &DL = LI->getModule()->getDataLayout(); + const DataLayout &DL = LI->getDataLayout(); return DL.getTypeStoreSize(LI->getType()); } static unsigned getAtomicOpSize(StoreInst *SI) { - const DataLayout &DL = SI->getModule()->getDataLayout(); + const DataLayout &DL = SI->getDataLayout(); return DL.getTypeStoreSize(SI->getValueOperand()->getType()); } static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { - const DataLayout &DL = RMWI->getModule()->getDataLayout(); + const DataLayout &DL = RMWI->getDataLayout(); return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); } static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { - const DataLayout &DL = CASI->getModule()->getDataLayout(); + const DataLayout &DL = CASI->getDataLayout(); return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); } @@ -179,17 +203,12 @@ static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; } -bool AtomicExpand::runOnFunction(Function &F) { - auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); - if (!TPC) - return false; - - auto &TM = TPC->getTM<TargetMachine>(); - const auto *Subtarget = TM.getSubtargetImpl(F); +bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) { + const auto *Subtarget = TM->getSubtargetImpl(F); if (!Subtarget->enableAtomicExpand()) return false; TLI = Subtarget->getTargetLowering(); - DL = &F.getParent()->getDataLayout(); + DL = &F.getDataLayout(); SmallVector<Instruction *, 1> AtomicInsts; @@ -322,16 +341,6 @@ bool AtomicExpand::runOnFunction(Function &F) { if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { MadeChange = true; } else { - AtomicRMWInst::BinOp Op = RMWI->getOperation(); - unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; - unsigned ValueSize = getAtomicOpSize(RMWI); - if (ValueSize < MinCASSize && - (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || - Op == AtomicRMWInst::And)) { - RMWI = widenPartwordAtomicRMW(RMWI); - MadeChange = true; - } - MadeChange |= tryExpandAtomicRMW(RMWI); } } else if (CASI) @@ -340,7 +349,33 @@ bool AtomicExpand::runOnFunction(Function &F) { return MadeChange; } -bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { +bool AtomicExpandLegacy::runOnFunction(Function &F) { + + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) + return false; + auto *TM = &TPC->getTM<TargetMachine>(); + AtomicExpandImpl AE; + return AE.run(F, TM); +} + +FunctionPass *llvm::createAtomicExpandLegacyPass() { + return new AtomicExpandLegacy(); +} + +PreservedAnalyses AtomicExpandPass::run(Function &F, + FunctionAnalysisManager &AM) { + AtomicExpandImpl AE; + + bool Changed = AE.run(F, TM); + if (!Changed) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +bool AtomicExpandImpl::bracketInstWithFences(Instruction *I, + AtomicOrdering Order) { ReplacementIRBuilder Builder(I, *DL); auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); @@ -355,8 +390,8 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { } /// Get the iX type with the same bitwidth as T. -IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, - const DataLayout &DL) { +IntegerType * +AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) { EVT VT = TLI->getMemValueType(DL, T); unsigned BitWidth = VT.getStoreSizeInBits(); assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); @@ -366,7 +401,7 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, /// Convert an atomic load of a non-integral type to an integer load of the /// equivalent bitwidth. See the function comment on /// convertAtomicStoreToIntegerType for background. -LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { +LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *M = LI->getModule(); Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); @@ -387,7 +422,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { } AtomicRMWInst * -AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { +AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { auto *M = RMWI->getModule(); Type *NewTy = getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); @@ -400,9 +435,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { ? Builder.CreatePtrToInt(Val, NewTy) : Builder.CreateBitCast(Val, NewTy); - auto *NewRMWI = - Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal, - RMWI->getAlign(), RMWI->getOrdering()); + auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal, + RMWI->getAlign(), RMWI->getOrdering(), + RMWI->getSyncScopeID()); NewRMWI->setVolatile(RMWI->isVolatile()); LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); @@ -414,7 +449,7 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { return NewRMWI; } -bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { +bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) { switch (TLI->shouldExpandAtomicLoadInIR(LI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; @@ -436,7 +471,7 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { } } -bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) { +bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) { switch (TLI->shouldExpandAtomicStoreInIR(SI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; @@ -451,7 +486,7 @@ bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) { } } -bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { +bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) { ReplacementIRBuilder Builder(LI, *DL); // On some architectures, load-linked instructions are atomic for larger @@ -467,7 +502,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { return true; } -bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { +bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) { ReplacementIRBuilder Builder(LI, *DL); AtomicOrdering Order = LI->getOrdering(); if (Order == AtomicOrdering::Unordered) @@ -496,7 +531,7 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { /// instruction select from the original atomic store, but as a migration /// mechanism, we convert back to the old format which the backends understand. /// Each backend will need individual work to recognize the new format. -StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { +StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) { ReplacementIRBuilder Builder(SI, *DL); auto *M = SI->getModule(); Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), @@ -514,7 +549,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { return NewSI; } -void AtomicExpand::expandAtomicStore(StoreInst *SI) { +void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) { // This function is only called on atomic stores that are too large to be // atomic if implemented as a native store. So we replace them by an // atomic swap, that can be implemented for example as a ldrex/strex on ARM @@ -542,9 +577,9 @@ static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *&Success, Value *&NewLoaded) { Type *OrigTy = NewVal->getType(); - // This code can go away when cmpxchg supports FP types. + // This code can go away when cmpxchg supports FP and vector types. assert(!OrigTy->isPointerTy()); - bool NeedBitcast = OrigTy->isFloatingPointTy(); + bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy(); if (NeedBitcast) { IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); NewVal = Builder.CreateBitCast(NewVal, IntTy); @@ -561,7 +596,7 @@ static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); } -bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { +bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) { LLVMContext &Ctx = AI->getModule()->getContext(); TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); switch (Kind) { @@ -607,6 +642,17 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { return true; } case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { + unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; + unsigned ValueSize = getAtomicOpSize(AI); + if (ValueSize < MinCASSize) { + AtomicRMWInst::BinOp Op = AI->getOperation(); + // Widen And/Or/Xor and give the target another chance at expanding it. + if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || + Op == AtomicRMWInst::And) { + tryExpandAtomicRMW(widenPartwordAtomicRMW(AI)); + return true; + } + } expandAtomicRMWToMaskedIntrinsic(AI); return true; } @@ -700,7 +746,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, unsigned ValueSize = DL.getTypeStoreSize(ValueType); PMV.ValueType = PMV.IntValueType = ValueType; - if (PMV.ValueType->isFloatingPointTy()) + if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy()) PMV.IntValueType = Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits()); @@ -719,7 +765,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, assert(ValueSize < MinWordSize); PointerType *PtrTy = cast<PointerType>(Addr->getType()); - IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace()); + IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace()); Value *PtrLSB; if (AddrAlign < MinWordSize) { @@ -843,8 +889,15 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, /// way as a typical atomicrmw expansion. The only difference here is /// that the operation inside of the loop may operate upon only a /// part of the value. -void AtomicExpand::expandPartwordAtomicRMW( +void AtomicExpandImpl::expandPartwordAtomicRMW( AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { + // Widen And/Or/Xor and give the target another chance at expanding it. + AtomicRMWInst::BinOp Op = AI->getOperation(); + if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || + Op == AtomicRMWInst::And) { + tryExpandAtomicRMW(widenPartwordAtomicRMW(AI)); + return; + } AtomicOrdering MemOpOrder = AI->getOrdering(); SyncScope::ID SSID = AI->getSyncScopeID(); @@ -855,18 +908,17 @@ void AtomicExpand::expandPartwordAtomicRMW( AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); Value *ValOperand_Shifted = nullptr; - if (AI->getOperation() == AtomicRMWInst::Xchg || - AI->getOperation() == AtomicRMWInst::Add || - AI->getOperation() == AtomicRMWInst::Sub || - AI->getOperation() == AtomicRMWInst::Nand) { + if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add || + Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) { + Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType); ValOperand_Shifted = - Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), - PMV.ShiftAmt, "ValOperand_Shifted"); + Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt, + "ValOperand_Shifted"); } auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) { - return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, - ValOperand_Shifted, AI->getValOperand(), PMV); + return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted, + AI->getValOperand(), PMV); }; Value *OldResult; @@ -886,8 +938,38 @@ void AtomicExpand::expandPartwordAtomicRMW( AI->eraseFromParent(); } +/// Copy metadata that's safe to preserve when widening atomics. +static void copyMetadataForAtomic(Instruction &Dest, + const Instruction &Source) { + SmallVector<std::pair<unsigned, MDNode *>, 8> MD; + Source.getAllMetadata(MD); + LLVMContext &Ctx = Dest.getContext(); + MDBuilder MDB(Ctx); + + for (auto [ID, N] : MD) { + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_access_group: + case LLVMContext::MD_mmra: + Dest.setMetadata(ID, N); + break; + default: + if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory")) + Dest.setMetadata(ID, N); + else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory")) + Dest.setMetadata(ID, N); + + break; + } + } +} + // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. -AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { +AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) { ReplacementIRBuilder Builder(AI, *DL); AtomicRMWInst::BinOp Op = AI->getOperation(); @@ -907,14 +989,15 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { if (Op == AtomicRMWInst::And) NewOperand = - Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand"); + Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand"); else NewOperand = ValOperand_Shifted; AtomicRMWInst *NewAI = Builder.CreateAtomicRMW( Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment, AI->getOrdering(), AI->getSyncScopeID()); - // TODO: Preserve metadata + + copyMetadataForAtomic(*NewAI, *AI); Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); AI->replaceAllUsesWith(FinalOldResult); @@ -922,7 +1005,7 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { return NewAI; } -bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { +bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { // The basic idea here is that we're expanding a cmpxchg of a // smaller memory size up to a word-sized cmpxchg. To do this, we // need to add a retry-loop for strong cmpxchg, so that @@ -1047,7 +1130,7 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { return true; } -void AtomicExpand::expandAtomicOpToLLSC( +void AtomicExpandImpl::expandAtomicOpToLLSC( Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { @@ -1059,7 +1142,7 @@ void AtomicExpand::expandAtomicOpToLLSC( I->eraseFromParent(); } -void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { +void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { ReplacementIRBuilder Builder(AI, *DL); PartwordMaskValues PMV = @@ -1085,7 +1168,8 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { AI->eraseFromParent(); } -void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { +void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic( + AtomicCmpXchgInst *CI) { ReplacementIRBuilder Builder(CI, *DL); PartwordMaskValues PMV = createMaskInstrs( @@ -1112,7 +1196,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { CI->eraseFromParent(); } -Value *AtomicExpand::insertRMWLLSCLoop( +Value *AtomicExpandImpl::insertRMWLLSCLoop( IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { @@ -1121,7 +1205,7 @@ Value *AtomicExpand::insertRMWLLSCLoop( Function *F = BB->getParent(); assert(AddrAlign >= - F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) && + F->getDataLayout().getTypeStoreSize(ResultTy) && "Expected at least natural alignment at this point."); // Given: atomicrmw some_op iN* %addr, iN %incr ordering @@ -1168,7 +1252,7 @@ Value *AtomicExpand::insertRMWLLSCLoop( /// way to represent a pointer cmpxchg so that we can update backends one by /// one. AtomicCmpXchgInst * -AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { +AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { auto *M = CI->getModule(); Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), M->getDataLayout()); @@ -1201,7 +1285,7 @@ AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { return NewCI; } -bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { +bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); Value *Addr = CI->getPointerOperand(); @@ -1447,7 +1531,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { return true; } -bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) { +bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) { auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); if (!C) return false; @@ -1467,7 +1551,7 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) { } } -bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { +bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { tryExpandAtomicLoad(ResultingLoad); return true; @@ -1475,7 +1559,7 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { return false; } -Value *AtomicExpand::insertRMWCmpXchgLoop( +Value *AtomicExpandImpl::insertRMWCmpXchgLoop( IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, @@ -1536,7 +1620,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( return NewLoaded; } -bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { +bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(CI); @@ -1561,13 +1645,13 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Note: This function is exposed externally by AtomicExpandUtils.h bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) { - ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout()); + ReplacementIRBuilder Builder(AI, AI->getDataLayout()); Builder.setIsFPConstrained( AI->getFunction()->hasFnAttribute(Attribute::StrictFP)); // FIXME: If FP exceptions are observable, we should force them off for the // loop for the FP atomics. - Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( + Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop( Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), AI->getOrdering(), AI->getSyncScopeID(), [&](IRBuilderBase &Builder, Value *Loaded) { @@ -1601,7 +1685,7 @@ static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, Size <= LargestSize; } -void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { +void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) { static const RTLIB::Libcall Libcalls[6] = { RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; @@ -1614,7 +1698,7 @@ void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load"); } -void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { +void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) { static const RTLIB::Libcall Libcalls[6] = { RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; @@ -1627,7 +1711,7 @@ void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store"); } -void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { +void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { static const RTLIB::Libcall Libcalls[6] = { RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, @@ -1705,7 +1789,7 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { llvm_unreachable("Unexpected AtomicRMW operation."); } -void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { +void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) { ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); unsigned Size = getAtomicOpSize(I); @@ -1744,7 +1828,7 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { // ATOMIC libcalls to be emitted. All of the other arguments besides // 'I' are extracted from the Instruction subclass by the // caller. Depending on the particular call, some will be null. -bool AtomicExpand::expandAtomicOpToLibcall( +bool AtomicExpandImpl::expandAtomicOpToLibcall( Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp index 901542e8507b..19f824850607 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -119,6 +119,16 @@ bool IsValidCloning(const MachineFunction &MF, return false; } } + if (PathBB->isMachineBlockAddressTaken()) { + // Avoid cloning blocks which have their address taken since we can't + // rewire branches to those blocks as easily (e.g., branches within + // inline assembly). + WithColor::warning() + << "block #" << BBID + << " has its machine block address taken in function " + << MF.getName() << "\n"; + return false; + } } if (I != ClonePath.size() - 1 && !PathBB->empty() && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp index dbb6ebb3d7eb..09e45ea5794b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -57,10 +57,10 @@ // function into potentially several disjoint pieces, and CFI needs to be // emitted per cluster. This also bloats the object file and binary sizes. // -// Basic Block Labels +// Basic Block Address Map // ================== // -// With -fbasic-block-sections=labels, we encode the offsets of BB addresses of +// With -fbasic-block-address-map, we emit the offsets of BB addresses of // every function into the .llvm_bb_addr_map section. Along with the function // symbols, this allows for mapping of virtual addresses in PMU profiles back to // the corresponding basic blocks. This logic is implemented in AsmPrinter. This @@ -118,6 +118,10 @@ public: /// Identify basic blocks that need separate sections and prepare to emit them /// accordingly. bool runOnMachineFunction(MachineFunction &MF) override; + +private: + bool handleBBSections(MachineFunction &MF); + bool handleBBAddrMap(MachineFunction &MF); }; } // end anonymous namespace @@ -204,9 +208,14 @@ assignSections(MachineFunction &MF, if (I != FuncClusterInfo.end()) { MBB.setSectionID(I->second.ClusterID); } else { - // BB goes into the special cold section if it is not specified in the - // cluster info map. - MBB.setSectionID(MBBSectionID::ColdSectionID); + const TargetInstrInfo &TII = + *MBB.getParent()->getSubtarget().getInstrInfo(); + + if (TII.isMBBSafeToSplitToCold(MBB)) { + // BB goes into the special cold section if it is not specified in the + // cluster info map. + MBB.setSectionID(MBBSectionID::ColdSectionID); + } } } @@ -280,10 +289,12 @@ bool llvm::hasInstrProfHashMismatch(MachineFunction &MF) { return false; } -bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { +// Identify, arrange, and modify basic blocks which need separate sections +// according to the specification provided by the -fbasic-block-sections flag. +bool BasicBlockSections::handleBBSections(MachineFunction &MF) { auto BBSectionsType = MF.getTarget().getBBSectionsType(); - assert(BBSectionsType != BasicBlockSection::None && - "BB Sections not enabled!"); + if (BBSectionsType == BasicBlockSection::None) + return false; // Check for source drift. If the source has changed since the profiles // were obtained, optimizing basic blocks might be sub-optimal. @@ -300,7 +311,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { if (BBSectionsType == BasicBlockSection::Labels) { MF.setBBSectionsType(BBSectionsType); - return false; + return true; } DenseMap<UniqueBBID, BBClusterInfo> FuncClusterInfo; @@ -364,6 +375,27 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } +// When the BB address map needs to be generated, this renumbers basic blocks to +// make them appear in increasing order of their IDs in the function. This +// avoids the need to store basic block IDs in the BB address map section, since +// they can be determined implicitly. +bool BasicBlockSections::handleBBAddrMap(MachineFunction &MF) { + if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) + return false; + if (!MF.getTarget().Options.BBAddrMap) + return false; + MF.RenumberBlocks(); + return true; +} + +bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { + // First handle the basic block sections. + auto R1 = handleBBSections(MF); + // Handle basic block address map after basic block sections are finalized. + auto R2 = handleBBAddrMap(MF); + return R1 || R2; +} + void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 6eef5d2c50a2..fa5464026516 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -170,7 +170,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { return false; // Return a match if debug-info-filename is not specified. Otherwise, // check for equality. - return DIFilename.empty() || It->second.equals(DIFilename); + return DIFilename.empty() || It->second == DIFilename; }); if (!FunctionFound) { // Skip the following profile by setting the profile iterator (FI) to @@ -317,7 +317,7 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() { return false; // Return a match if debug-info-filename is not specified. Otherwise, // check for equality. - return DIFilename.empty() || It->second.equals(DIFilename); + return DIFilename.empty() || It->second == DIFilename; }); if (!FunctionFound) { // Skip the following profile by setting the profile iterator (FI) to diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index 57cefae2066a..80a4eb86cf9e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -30,5 +30,5 @@ llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), cl::Hidden); BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F) - : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp index ecf7bc30913f..92a03eb52e35 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp @@ -80,7 +80,6 @@ TailMergeThreshold("tail-merge-threshold", cl::init(150), cl::Hidden); // Heuristic for tail merging (and, inversely, tail duplication). -// TODO: This should be replaced with a target query. static cl::opt<unsigned> TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), @@ -98,8 +97,8 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -130,10 +129,11 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && PassConfig->getEnableTailMerge(); MBFIWrapper MBBFreqInfo( - getAnalysis<MachineBlockFrequencyInfo>()); - BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, - getAnalysis<MachineBranchProbabilityInfo>(), - &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI()); + getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()); + BranchFolder Folder( + EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, + getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(), + &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI()); return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo()); } @@ -144,8 +144,6 @@ BranchFolder::BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist, ProfileSummaryInfo *PSI, unsigned MinTailLength) : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength), MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) { - if (MinCommonTailLength == 0) - MinCommonTailLength = TailMergeSize; switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = DefaultEnableTailMerge; @@ -194,6 +192,12 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MLI = mli; this->MRI = &MRI; + if (MinCommonTailLength == 0) { + MinCommonTailLength = TailMergeSize.getNumOccurrences() > 0 + ? TailMergeSize + : TII->getTailMergeSize(MF); + } + UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF); if (!UpdateLiveIns) MRI.invalidateLiveness(); @@ -414,7 +418,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // NewMBB belongs to the same loop as CurMBB. if (MLI) if (MachineLoop *ML = MLI->getLoopFor(&CurMBB)) - ML->addBasicBlockToLoop(NewMBB, MLI->getBase()); + ML->addBasicBlockToLoop(NewMBB, *MLI); // NewMBB inherits CurMBB's block frequency. MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); @@ -455,12 +459,14 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, // with a conditional branch to the next block, optimize by reversing the // test and conditionally branching to SuccMBB instead. static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, - const TargetInstrInfo *TII) { + const TargetInstrInfo *TII, const DebugLoc &BranchDL) { MachineFunction *MF = CurMBB->getParent(); MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB)); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; DebugLoc dl = CurMBB->findBranchDebugLoc(); + if (!dl) + dl = BranchDL; if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { MachineBasicBlock *NextBB = &*I; if (TBB == NextBB && !Cond.empty() && !FBB) { @@ -686,7 +692,8 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock *SuccBB, - MachineBasicBlock *PredBB) { + MachineBasicBlock *PredBB, + const DebugLoc &BranchDL) { MPIterator CurMPIter, B; for (CurMPIter = std::prev(MergePotentials.end()), B = MergePotentials.begin(); @@ -694,7 +701,7 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, // Put the unconditional branch back, if we need one. MachineBasicBlock *CurMBB = CurMPIter->getBlock(); if (SuccBB && CurMBB != PredBB) - FixTail(CurMBB, SuccBB, TII); + FixTail(CurMBB, SuccBB, TII, BranchDL); if (CurMPIter == B) break; } @@ -908,6 +915,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Walk through equivalence sets looking for actual exact matches. while (MergePotentials.size() > 1) { unsigned CurHash = MergePotentials.back().getHash(); + const DebugLoc &BranchDL = MergePotentials.back().getBranchDebugLoc(); // Build SameTails, identifying the set of blocks with this hash code // and with the maximum number of instructions in common. @@ -918,7 +926,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // If we didn't find any pair that has at least MinCommonTailLength // instructions in common, remove all blocks with this hash code and retry. if (SameTails.empty()) { - RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + RemoveBlocksWithHash(CurHash, SuccBB, PredBB, BranchDL); continue; } @@ -965,7 +973,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Split a block so that one does. if (!CreateCommonTailOnlyBlock(PredBB, SuccBB, maxCommonTailLength, commonTailIndex)) { - RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + RemoveBlocksWithHash(CurHash, SuccBB, PredBB, BranchDL); continue; } } @@ -1013,7 +1021,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (MergePotentials.size() == TailMergeThreshold) break; if (!TriedMerging.count(&MBB) && MBB.succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB, + MBB.findBranchDebugLoc())); } // If this is a large problem, avoid visiting the same basic blocks @@ -1115,8 +1124,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { } // Remove the unconditional branch at the end, if any. + DebugLoc dl = PBB->findBranchDebugLoc(); if (TBB && (Cond.empty() || FBB)) { - DebugLoc dl = PBB->findBranchDebugLoc(); TII->removeBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now @@ -1124,7 +1133,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { NewCond, dl); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB)); + MergePotentials.push_back( + MergePotentialsElt(HashEndOfMBB(*PBB), PBB, dl)); } } @@ -1142,7 +1152,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks if (MergePotentials.size() == 1 && MergePotentials.begin()->getBlock() != PredBB) - FixTail(MergePotentials.begin()->getBlock(), IBB, TII); + FixTail(MergePotentials.begin()->getBlock(), IBB, TII, + MergePotentials.begin()->getBranchDebugLoc()); } return MadeChange; @@ -2047,12 +2058,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { MBB->splice(Loc, TBB, TBB->begin(), TIB); FBB->erase(FBB->begin(), FIB); - if (UpdateLiveIns) { - bool anyChange = false; - do { - anyChange = recomputeLiveIns(*TBB) || recomputeLiveIns(*FBB); - } while (anyChange); - } + if (UpdateLiveIns) + fullyRecomputeLiveIns({TBB, FBB}); ++NumHoist; return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h index 63b2ef04b21b..ff2bbe06c048 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h @@ -50,10 +50,11 @@ class TargetRegisterInfo; class MergePotentialsElt { unsigned Hash; MachineBasicBlock *Block; + DebugLoc BranchDebugLoc; public: - MergePotentialsElt(unsigned h, MachineBasicBlock *b) - : Hash(h), Block(b) {} + MergePotentialsElt(unsigned h, MachineBasicBlock *b, DebugLoc bdl) + : Hash(h), Block(b), BranchDebugLoc(std::move(bdl)) {} unsigned getHash() const { return Hash; } MachineBasicBlock *getBlock() const { return Block; } @@ -62,6 +63,8 @@ class TargetRegisterInfo; Block = MBB; } + const DebugLoc &getBranchDebugLoc() { return BranchDebugLoc; } + bool operator<(const MergePotentialsElt &) const; }; @@ -162,8 +165,9 @@ class TargetRegisterInfo; /// Remove all blocks with hash CurHash from MergePotentials, restoring /// branches at ends of blocks as appropriate. - void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB); + void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB, + const DebugLoc &BranchDL); /// None of the blocks to be tail-merged consist only of the common tail. /// Create a block that does by splitting one. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp index c3bf93855111..04de01140056 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -61,7 +62,7 @@ FunctionPass *llvm::createCFGuardLongjmpPass() { return new CFGuardLongjmp(); } bool CFGuardLongjmp::runOnMachineFunction(MachineFunction &MF) { // Skip modules for which the cfguard flag is not set. - if (!MF.getMMI().getModule()->getModuleFlag("cfguard")) + if (!MF.getFunction().getParent()->getModuleFlag("cfguard")) return false; // Skip functions that do not have calls to _setjmp. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp index 87b062a16df1..1ff01ad34b30 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -248,6 +248,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpGnuArgsSize: + case MCCFIInstruction::OpLabel: break; } if (CSRReg || CSROffset) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp index fa7ef669ec11..9d8c9119f771 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -252,7 +252,8 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, // For terminators that produce values, ask the backend if the register is // not spillable. - if (TII.isUnspillableTerminator(MI) && MI->definesRegister(LI.reg())) { + if (TII.isUnspillableTerminator(MI) && + MI->definesRegister(LI.reg(), /*TRI=*/nullptr)) { LI.markNotSpillable(); return -1.0f; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp index fddc4d74b2da..b6fe0fa00f2b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp @@ -52,7 +52,7 @@ using namespace llvm; -#define DEBUG_TYPE "callbrprepare" +#define DEBUG_TYPE "callbr-prepare" static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT); static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, @@ -94,9 +94,11 @@ PreservedAnalyses CallBrPreparePass::run(Function &Fn, } char CallBrPrepare::ID = 0; -INITIALIZE_PASS_BEGIN(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false) +INITIALIZE_PASS_BEGIN(CallBrPrepare, "callbrprepare", "Prepare callbr", false, + false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false) +INITIALIZE_PASS_END(CallBrPrepare, "callbrprepare", "Prepare callbr", false, + false) FunctionPass *llvm::createCallBrPass() { return new CallBrPrepare(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp index 418066452c17..31fa4c105cef 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp @@ -19,7 +19,7 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAssignmentTrackingAnalysisPass(Registry); - initializeAtomicExpandPass(Registry); + initializeAtomicExpandLegacyPass(Registry); initializeBasicBlockPathCloningPass(Registry); initializeBasicBlockSectionsPass(Registry); initializeBranchFolderPassPass(Registry); @@ -54,15 +54,16 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeIfConverterPass(Registry); initializeImplicitNullChecksPass(Registry); initializeIndirectBrExpandLegacyPassPass(Registry); + initializeInitUndefPass(Registry); initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); initializeJMCInstrumenterPass(Registry); initializeLiveDebugValuesPass(Registry); initializeLiveDebugVariablesPass(Registry); - initializeLiveIntervalsPass(Registry); + initializeLiveIntervalsWrapperPassPass(Registry); initializeLiveRangeShrinkPass(Registry); initializeLiveStacksPass(Registry); - initializeLiveVariablesPass(Registry); + initializeLiveVariablesWrapperPassPass(Registry); initializeLocalStackSlotPassPass(Registry); initializeLowerGlobalDtorsLegacyPassPass(Registry); initializeLowerIntrinsicsPass(Registry); @@ -70,7 +71,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMIRCanonicalizerPass(Registry); initializeMIRNamerPass(Registry); initializeMIRProfileLoaderPassPass(Registry); - initializeMachineBlockFrequencyInfoPass(Registry); + initializeMachineBlockFrequencyInfoWrapperPassPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); initializeMachineCFGPrinterPass(Registry); @@ -79,24 +80,24 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineCopyPropagationPass(Registry); initializeMachineCycleInfoPrinterPassPass(Registry); initializeMachineCycleInfoWrapperPassPass(Registry); - initializeMachineDominatorTreePass(Registry); + initializeMachineDominatorTreeWrapperPassPass(Registry); initializeMachineFunctionPrinterPassPass(Registry); initializeMachineLateInstrsCleanupPass(Registry); initializeMachineLICMPass(Registry); - initializeMachineLoopInfoPass(Registry); + initializeMachineLoopInfoWrapperPassPass(Registry); initializeMachineModuleInfoWrapperPassPass(Registry); initializeMachineOptimizationRemarkEmitterPassPass(Registry); initializeMachineOutlinerPass(Registry); initializeMachinePipelinerPass(Registry); initializeMachineSanitizerBinaryMetadataPass(Registry); initializeModuloScheduleTestPass(Registry); - initializeMachinePostDominatorTreePass(Registry); + initializeMachinePostDominatorTreeWrapperPassPass(Registry); initializeMachineRegionInfoPassPass(Registry); initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineUniformityAnalysisPassPass(Registry); initializeMachineUniformityInfoPrinterPassPass(Registry); - initializeMachineVerifierPassPass(Registry); + initializeMachineVerifierLegacyPassPass(Registry); initializeObjCARCContractLegacyPassPass(Registry); initializeOptimizePHIsPass(Registry); initializePEIPass(Registry); @@ -122,7 +123,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeShadowStackGCLoweringPass(Registry); initializeShrinkWrapPass(Registry); initializeSjLjEHPreparePass(Registry); - initializeSlotIndexesPass(Registry); + initializeSlotIndexesWrapperPassPass(Registry); initializeStackColoringPass(Registry); initializeStackFrameLayoutAnalysisPassPass(Registry); initializeStackMapLivenessPass(Registry); @@ -131,7 +132,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStripDebugMachineModulePass(Registry); initializeTailDuplicatePass(Registry); initializeTargetPassConfigPass(Registry); - initializeTwoAddressInstructionPassPass(Registry); + initializeTwoAddressInstructionLegacyPassPass(Registry); initializeTypePromotionLegacyPass(Registry); initializeUnpackMachineBundlesPass(Registry); initializeUnreachableBlockElimLegacyPassPass(Registry); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp index 577c5dbc8e2d..fe144d3c1820 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -260,7 +260,8 @@ void llvm::salvageDebugInfoForDbgValue(const MachineRegisterInfo &MRI, continue; } - int UseMOIdx = DbgMI->findRegisterUseOperandIdx(DefMO->getReg()); + int UseMOIdx = + DbgMI->findRegisterUseOperandIdx(DefMO->getReg(), /*TRI=*/nullptr); assert(UseMOIdx != -1 && DbgMI->hasDebugOperandForReg(DefMO->getReg()) && "Must use salvaged instruction as its location"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp deleted file mode 100644 index 82945528e768..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===--- CodeGenPassBuilder.cpp --------------------------------------- ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines interfaces to access the target independent code -// generation passes provided by the LLVM backend. -// -//===---------------------------------------------------------------------===// - -#include "llvm/CodeGen/CodeGenPassBuilder.h" - -using namespace llvm; - -namespace llvm { -#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - MachinePassKey PASS_NAME::Key; -#include "llvm/CodeGen/MachinePassRegistry.def" -#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ - MachinePassKey PASS_NAME::Key; -#define DUMMY_MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ - AnalysisKey PASS_NAME::Key; -#include "llvm/CodeGen/MachinePassRegistry.def" -} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index 1cca56fc19cf..22d0708f5478 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -34,12 +34,12 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -445,8 +445,8 @@ private: bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT); bool fixupDbgValue(Instruction *I); - bool fixupDPValue(DPValue &I); - bool fixupDPValuesOnInst(Instruction &I); + bool fixupDbgVariableRecord(DbgVariableRecord &I); + bool fixupDbgVariableRecordsOnInst(Instruction &I); bool placeDbgValues(Function &F); bool placePseudoProbes(Function &F); bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts, @@ -509,7 +509,7 @@ bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) { return false; auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); CodeGenPrepare CGP(TM); - CGP.DL = &F.getParent()->getDataLayout(); + CGP.DL = &F.getDataLayout(); CGP.SubtargetInfo = TM->getSubtargetImpl(F); CGP.TLI = CGP.SubtargetInfo->getTargetLowering(); CGP.TRI = CGP.SubtargetInfo->getRegisterInfo(); @@ -557,7 +557,7 @@ PreservedAnalyses CodeGenPreparePass::run(Function &F, } bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) { - DL = &F.getParent()->getDataLayout(); + DL = &F.getDataLayout(); SubtargetInfo = TM->getSubtargetImpl(F); TLI = SubtargetInfo->getTargetLowering(); TRI = SubtargetInfo->getRegisterInfo(); @@ -972,10 +972,9 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, // that leads to this block. // FIXME: Is this really needed? Is this a correctness issue? for (BasicBlock *Pred : predecessors(BB)) { - if (auto *CBI = dyn_cast<CallBrInst>((Pred)->getTerminator())) - for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) - if (DestBB == CBI->getSuccessor(i)) - return false; + if (isa<CallBrInst>(Pred->getTerminator()) && + llvm::is_contained(successors(Pred), DestBB)) + return false; } // Try to skip merging if the unique predecessor of BB is terminated by a @@ -1195,12 +1194,12 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { // derived pointer relocation instructions given a vector of all relocate calls static void computeBaseDerivedRelocateMap( const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls, - DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> + MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> &RelocateInstMap) { // Collect information in two maps: one primarily for locating the base object // while filling the second map; the second map is the final structure holding // a mapping between Base and corresponding Derived relocate calls - DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap; + MapVector<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap; for (auto *ThisRelocate : AllRelocateCalls) { auto K = std::make_pair(ThisRelocate->getBasePtrIndex(), ThisRelocate->getDerivedPtrIndex()); @@ -1376,7 +1375,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) { // RelocateInstMap is a mapping from the base relocate instruction to the // corresponding derived relocate instructions - DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap; + MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap; computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); if (RelocateInstMap.empty()) return false; @@ -1432,10 +1431,8 @@ static bool SinkCast(CastInst *CI) { if (!InsertedCast) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); assert(InsertPt != UserBB->end()); - InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), - CI->getType(), ""); + InsertedCast = cast<CastInst>(CI->clone()); InsertedCast->insertBefore(*UserBB, InsertPt); - InsertedCast->setDebugLoc(CI->getDebugLoc()); } // Replace a use of the cast with a use of the new cast. @@ -1502,8 +1499,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, // Match a simple increment by constant operation. Note that if a sub is // matched, the step is negated (as if the step had been canonicalized to // an add, even though we leave the instruction alone.) -bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, - Constant *&Step) { +static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, + Constant *&Step) { if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) || match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>( m_Instruction(LHS), m_Constant(Step))))) @@ -1944,6 +1941,39 @@ static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) { return false; } +static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, + const DataLayout &DL) { + FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp); + if (!FCmp) + return false; + + // Don't fold if the target offers free fabs and the predicate is legal. + EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType()); + if (TLI.isFAbsFree(VT) && + TLI.isCondCodeLegal(getFCmpCondCode(FCmp->getPredicate()), + VT.getSimpleVT())) + return false; + + // Reverse the canonicalization if it is a FP class test + auto ShouldReverseTransform = [](FPClassTest ClassTest) { + return ClassTest == fcInf || ClassTest == (fcInf | fcNan); + }; + auto [ClassVal, ClassTest] = + fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(), + FCmp->getOperand(0), FCmp->getOperand(1)); + if (!ClassVal) + return false; + + if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest)) + return false; + + IRBuilder<> Builder(Cmp); + Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest); + Cmp->replaceAllUsesWith(IsFPClass); + RecursivelyDeleteTriviallyDeadInstructions(Cmp); + return true; +} + bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (sinkCmpExpression(Cmp, *TLI)) return true; @@ -1960,6 +1990,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (swapICmpOperandsToExposeCSEOpportunities(Cmp)) return true; + if (foldFCmpToFPClassTest(Cmp, *TLI, *DL)) + return true; + return false; } @@ -2022,9 +2055,9 @@ static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, // Keep the 'and' in the same place if the use is already in the same block. Instruction *InsertPt = User->getParent() == AndI->getParent() ? AndI : User; - Instruction *InsertedAnd = - BinaryOperator::Create(Instruction::And, AndI->getOperand(0), - AndI->getOperand(1), "", InsertPt); + Instruction *InsertedAnd = BinaryOperator::Create( + Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "", + InsertPt->getIterator()); // Propagate the debug info. InsertedAnd->setDebugLoc(AndI->getDebugLoc()); @@ -2427,8 +2460,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { break; case Intrinsic::assume: llvm_unreachable("llvm.assume should have been removed already"); + case Intrinsic::allow_runtime_check: + case Intrinsic::allow_ubsan_check: case Intrinsic::experimental_widenable_condition: { - // Give up on future widening oppurtunties so that we can fold away dead + // Give up on future widening opportunities so that we can fold away dead // paths and merge blocks before going into block-local instruction // selection. if (II->use_empty()) { @@ -2523,8 +2558,40 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { return false; } +static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, + const CallInst *CI) { + assert(CI && CI->use_empty()); + + if (const auto *II = dyn_cast<IntrinsicInst>(CI)) + switch (II->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + return true; + default: + return false; + } + + LibFunc LF; + Function *Callee = CI->getCalledFunction(); + if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF)) + switch (LF) { + case LibFunc_strcpy: + case LibFunc_strncpy: + case LibFunc_strcat: + case LibFunc_strncat: + return true; + default: + return false; + } + + return false; +} + /// Look for opportunities to duplicate return instructions to the predecessor -/// to enable tail call optimizations. The case it is currently looking for is: +/// to enable tail call optimizations. The case it is currently looking for is +/// the following one. Known intrinsics or library function that may be tail +/// called are taken into account as well. /// @code /// bb0: /// %tmp0 = tail call i32 @f0() @@ -2581,8 +2648,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, } PN = dyn_cast<PHINode>(V); - if (!PN) - return false; } if (PN && PN->getParent() != BB) @@ -2621,8 +2686,30 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, // Make sure the phi value is indeed produced by the tail call. if (CI && CI->hasOneUse() && CI->getParent() == PredBB && TLI->mayBeEmittedAsTailCall(CI) && - attributesPermitTailCall(F, CI, RetI, *TLI)) + attributesPermitTailCall(F, CI, RetI, *TLI)) { TailCallBBs.push_back(PredBB); + } else { + // Consider the cases in which the phi value is indirectly produced by + // the tail call, for example when encountering memset(), memmove(), + // strcpy(), whose return value may have been optimized out. In such + // cases, the value needs to be the first function argument. + // + // bb0: + // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1) + // br label %return + // return: + // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ] + if (PredBB && PredBB->getSingleSuccessor() == BB) + CI = dyn_cast_or_null<CallInst>( + PredBB->getTerminator()->getPrevNonDebugInstruction(true)); + + if (CI && CI->use_empty() && + isIntrinsicOrLFToBeTailCalled(TLInfo, CI) && + IncomingVal == CI->getArgOperand(0) && + TLI->mayBeEmittedAsTailCall(CI) && + attributesPermitTailCall(F, CI, RetI, *TLI)) + TailCallBBs.push_back(PredBB); + } } } else { SmallPtrSet<BasicBlock *, 4> VisitedBBs; @@ -2632,8 +2719,15 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) { CallInst *CI = dyn_cast<CallInst>(I); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && - attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCallBBs.push_back(Pred); + attributesPermitTailCall(F, CI, RetI, *TLI)) { + // Either we return void or the return value must be the first + // argument of a known intrinsic or library function. + if (!V || isa<UndefValue>(V) || + (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) && + V == CI->getArgOperand(0))) { + TailCallBBs.push_back(Pred); + } + } } } } @@ -2888,7 +2982,7 @@ class TypePromotionTransaction { Instruction *PrevInst; BasicBlock *BB; } Point; - std::optional<DPValue::self_iterator> BeforeDPValue = std::nullopt; + std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt; /// Remember whether or not the instruction had a previous instruction. bool HasPrevInstruction; @@ -2900,9 +2994,9 @@ class TypePromotionTransaction { BasicBlock *BB = Inst->getParent(); // Record where we would have to re-insert the instruction in the sequence - // of DPValues, if we ended up reinserting. + // of DbgRecords, if we ended up reinserting. if (BB->IsNewDbgInfoFormat) - BeforeDPValue = Inst->getDbgReinsertionPosition(); + BeforeDbgRecord = Inst->getDbgReinsertionPosition(); if (HasPrevInstruction) { Point.PrevInst = &*std::prev(Inst->getIterator()); @@ -2925,7 +3019,7 @@ class TypePromotionTransaction { Inst->insertBefore(*Point.BB, Position); } - Inst->getParent()->reinsertInstInDPValues(Inst, BeforeDPValue); + Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord); } }; @@ -3129,7 +3223,7 @@ class TypePromotionTransaction { /// Keep track of the debug users. SmallVector<DbgValueInst *, 1> DbgValues; /// And non-instruction debug-users too. - SmallVector<DPValue *, 1> DPValues; + SmallVector<DbgVariableRecord *, 1> DbgVariableRecords; /// Keep track of the new value so that we can undo it by replacing /// instances of the new value with the original value. @@ -3150,7 +3244,7 @@ class TypePromotionTransaction { } // Record the debug uses separately. They are not in the instruction's // use list, but they are replaced by RAUW. - findDbgValues(DbgValues, Inst, &DPValues); + findDbgValues(DbgValues, Inst, &DbgVariableRecords); // Now, we can replace the uses. Inst->replaceAllUsesWith(New); @@ -3167,10 +3261,10 @@ class TypePromotionTransaction { // correctness and utility of debug value instructions. for (auto *DVI : DbgValues) DVI->replaceVariableLocationOp(New, Inst); - // Similar story with DPValues, the non-instruction representation of - // dbg.values. - for (DPValue *DPV : DPValues) // tested by transaction-test I'm adding - DPV->replaceVariableLocationOp(New, Inst); + // Similar story with DbgVariableRecords, the non-instruction + // representation of dbg.values. + for (DbgVariableRecord *DVR : DbgVariableRecords) + DVR->replaceVariableLocationOp(New, Inst); } }; @@ -3402,7 +3496,7 @@ class AddressingModeMatcher { std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), - DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn), + DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn), AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) { @@ -4059,9 +4153,10 @@ private: if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) { // Is it OK to get metadata from OrigSelect?! // Create a Select placeholder with dummy value. - SelectInst *Select = SelectInst::Create( - CurrentSelect->getCondition(), Dummy, Dummy, - CurrentSelect->getName(), CurrentSelect, CurrentSelect); + SelectInst *Select = + SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy, + CurrentSelect->getName(), + CurrentSelect->getIterator(), CurrentSelect); Map[Current] = Select; ST.insertNewSelect(Select); // We are interested in True and False values. @@ -4072,7 +4167,7 @@ private: PHINode *CurrentPhi = cast<PHINode>(Current); unsigned PredCount = CurrentPhi->getNumIncomingValues(); PHINode *PHI = - PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi); + PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator()); Map[Current] = PHI; ST.insertNewPhi(PHI); append_range(Worklist, CurrentPhi->incoming_values()); @@ -4985,6 +5080,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, } return true; } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) { + if (II->getIntrinsicID() == Intrinsic::threadlocal_address) { + GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0)); + if (TLI.addressingModeSupportsTLS(GV)) + return matchAddr(AddrInst->getOperand(0), Depth); + } + } + break; } return false; } @@ -5081,7 +5185,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetRegisterInfo &TRI) { const Function *F = CI->getFunction(); TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI); + TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI); for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) { // Compute the constraint code and ConstraintType to use. @@ -5523,11 +5627,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return Modified; } - if (AddrMode.BaseGV) { + GlobalValue *BaseGV = AddrMode.BaseGV; + if (BaseGV != nullptr) { if (ResultPtr) return Modified; - ResultPtr = AddrMode.BaseGV; + if (BaseGV->isThreadLocal()) { + ResultPtr = Builder.CreateThreadLocalAddress(BaseGV); + } else { + ResultPtr = BaseGV; + } } // If the real base value actually came from an inttoptr, then the matcher @@ -5692,8 +5801,15 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } // Add in the BaseGV if present. - if (AddrMode.BaseGV) { - Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); + GlobalValue *BaseGV = AddrMode.BaseGV; + if (BaseGV != nullptr) { + Value *BaseGVPtr; + if (BaseGV->isThreadLocal()) { + BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV); + } else { + BaseGVPtr = BaseGV; + } + Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr"); if (Result) Result = Builder.CreateAdd(Result, V, "sunkaddr"); else @@ -6155,9 +6271,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { }; // Sorting all the GEPs of the same data structures based on the offsets. llvm::sort(LargeOffsetGEPs, compareGEPOffset); - LargeOffsetGEPs.erase( - std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()), - LargeOffsetGEPs.end()); + LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end()); // Skip if all the GEPs have the same offsets. if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second) continue; @@ -6372,13 +6486,13 @@ bool CodeGenPrepare::optimizePhiType( ValMap[D] = D->getOperand(0); DeletedInstrs.insert(D); } else { - ValMap[D] = - new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode()); + BasicBlock::iterator insertPt = std::next(D->getIterator()); + ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt); } } for (PHINode *Phi : PhiNodes) ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(), - Phi->getName() + ".tc", Phi); + Phi->getName() + ".tc", Phi->getIterator()); // Pipe together all the PhiNodes. for (PHINode *Phi : PhiNodes) { PHINode *NewPhi = cast<PHINode>(ValMap[Phi]); @@ -6393,8 +6507,8 @@ bool CodeGenPrepare::optimizePhiType( DeletedInstrs.insert(U); replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc); } else { - U->setOperand(0, - new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U)); + U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", + U->getIterator())); } } @@ -7022,9 +7136,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { CurInstIterator = std::next(LastSI->getIterator()); // Examine debug-info attached to the consecutive select instructions. They // won't be individually optimised by optimizeInst, so we need to perform - // DPValue maintenence here instead. + // DbgVariableRecord maintenence here instead. for (SelectInst *SI : ArrayRef(ASI).drop_front()) - fixupDPValuesOnInst(*SI); + fixupDbgVariableRecordsOnInst(*SI); bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); @@ -7918,7 +8032,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, if (HBC && HBC->getParent() != SI.getParent()) HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); - bool IsLE = SI.getModule()->getDataLayout().isLittleEndian(); + bool IsLE = SI.getDataLayout().isLittleEndian(); auto CreateSplitStore = [&](Value *V, bool Upper) { V = Builder.CreateZExtOrBitCast(V, SplitStoreType); Value *Addr = SI.getPointerOperand(); @@ -8182,7 +8296,7 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { bool AnyChange = false; - AnyChange = fixupDPValuesOnInst(*I); + AnyChange = fixupDbgVariableRecordsOnInst(*I); // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. @@ -8217,7 +8331,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { if (OptimizeNoopCopyExpression(CI, *TLI, *DL)) return true; - if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) && + if ((isa<UIToFPInst>(I) || isa<SIToFPInst>(I) || isa<FPToUIInst>(I) || + isa<TruncInst>(I)) && TLI->optimizeExtendOrTruncateConversion( I, LI->getLoopFor(I->getParent()), *TTI)) return true; @@ -8292,7 +8407,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { if (GEPI->hasAllZeroIndices()) { /// The GEP operand must be a pointer, so must its result -> BitCast Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), - GEPI->getName(), GEPI); + GEPI->getName(), GEPI->getIterator()); NC->setDebugLoc(GEPI->getDebugLoc()); replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc); RecursivelyDeleteTriviallyDeadInstructions( @@ -8324,7 +8439,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { isa<ConstantPointerNull>(Op1); if (Const0 || Const1) { if (!Const0 || !Const1) { - auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI); + auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator()); F->takeName(FI); CmpI->setOperand(Const0 ? 1 : 0, F); } @@ -8448,23 +8563,24 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) { return AnyChange; } -bool CodeGenPrepare::fixupDPValuesOnInst(Instruction &I) { +bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) { bool AnyChange = false; - for (DPValue &DPV : I.getDbgValueRange()) - AnyChange |= fixupDPValue(DPV); + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) + AnyChange |= fixupDbgVariableRecord(DVR); return AnyChange; } // FIXME: should updating debug-info really cause the "changed" flag to fire, // which can cause a function to be reprocessed? -bool CodeGenPrepare::fixupDPValue(DPValue &DPV) { - if (DPV.Type != DPValue::LocationType::Value) +bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) { + if (DVR.Type != DbgVariableRecord::LocationType::Value && + DVR.Type != DbgVariableRecord::LocationType::Assign) return false; - // Does this DPValue refer to a sunk address calculation? + // Does this DbgVariableRecord refer to a sunk address calculation? bool AnyChange = false; - SmallDenseSet<Value *> LocationOps(DPV.location_ops().begin(), - DPV.location_ops().end()); + SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(), + DVR.location_ops().end()); for (Value *Location : LocationOps) { WeakTrackingVH SunkAddrVH = SunkAddrs[Location]; Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; @@ -8474,7 +8590,7 @@ bool CodeGenPrepare::fixupDPValue(DPValue &DPV) { // of pointer being referred to; however this makes no difference to // debugging information, and we can't generate bitcasts that may affect // codegen. - DPV.replaceVariableLocationOp(Location, SunkAddr); + DVR.replaceVariableLocationOp(Location, SunkAddr); AnyChange = true; } } @@ -8489,13 +8605,13 @@ static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI) { DVI->insertAfter(VI); } -static void DbgInserterHelper(DPValue *DPV, Instruction *VI) { - DPV->removeFromParent(); +static void DbgInserterHelper(DbgVariableRecord *DVR, Instruction *VI) { + DVR->removeFromParent(); BasicBlock *VIBB = VI->getParent(); if (isa<PHINode>(VI)) - VIBB->insertDPValueBefore(DPV, VIBB->getFirstInsertionPt()); + VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt()); else - VIBB->insertDPValueAfter(DPV, VI); + VIBB->insertDbgRecordAfter(DVR, VI); } // A llvm.dbg.value may be using a value before its definition, due to @@ -8560,12 +8676,13 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { continue; } - // If this isn't a dbg.value, process any attached DPValue records - // attached to this instruction. - for (DPValue &DPV : llvm::make_early_inc_range(Insn.getDbgValueRange())) { - if (DPV.Type != DPValue::LocationType::Value) + // If this isn't a dbg.value, process any attached DbgVariableRecord + // records attached to this instruction. + for (DbgVariableRecord &DVR : llvm::make_early_inc_range( + filterDbgVars(Insn.getDbgRecordRange()))) { + if (DVR.Type != DbgVariableRecord::LocationType::Value) continue; - DbgProcessor(&DPV, &Insn); + DbgProcessor(&DVR, &Insn); } } } @@ -8748,7 +8865,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) { scaleWeights(NewTrueWeight, NewFalseWeight); Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) - .createBranchWeights(TrueWeight, FalseWeight)); + .createBranchWeights(TrueWeight, FalseWeight, + hasBranchWeightOrigin(*Br1))); NewTrueWeight = TrueWeight; NewFalseWeight = 2 * FalseWeight; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp index 51406fb287e6..9e42deb94903 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp @@ -85,17 +85,18 @@ CGOPT(bool, StackRealign) CGOPT(std::string, TrapFuncName) CGOPT(bool, UseCtors) CGOPT(bool, DisableIntegratedAS) -CGOPT(bool, RelaxELFRelocations) CGOPT_EXP(bool, DataSections) CGOPT_EXP(bool, FunctionSections) CGOPT(bool, IgnoreXCOFFVisibility) CGOPT(bool, XCOFFTracebackTable) +CGOPT(bool, EnableBBAddrMap) CGOPT(std::string, BBSections) CGOPT(unsigned, TLSSize) CGOPT_EXP(bool, EmulatedTLS) CGOPT_EXP(bool, EnableTLSDESC) CGOPT(bool, UniqueSectionNames) CGOPT(bool, UniqueBasicBlockSectionNames) +CGOPT(bool, SeparateNamedSections) CGOPT(EABI, EABIVersion) CGOPT(DebuggerKind, DebuggerTuningOpt) CGOPT(bool, EnableStackSizeSection) @@ -210,6 +211,9 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { "Disable frame pointer elimination"), clEnumValN(FramePointerKind::NonLeaf, "non-leaf", "Disable frame pointer elimination for non-leaf frame"), + clEnumValN(FramePointerKind::Reserved, "reserved", + "Enable frame pointer elimination, but reserve the frame " + "pointer register"), clEnumValN(FramePointerKind::None, "none", "Enable frame pointer elimination"))); CGBINDOPT(FramePointerUsage); @@ -361,13 +365,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(UseCtors); - static cl::opt<bool> RelaxELFRelocations( - "relax-elf-relocations", - cl::desc( - "Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"), - cl::init(true)); - CGBINDOPT(RelaxELFRelocations); - static cl::opt<bool> DataSections( "data-sections", cl::desc("Emit data into separate sections"), cl::init(false)); @@ -390,6 +387,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(true)); CGBINDOPT(XCOFFTracebackTable); + static cl::opt<bool> EnableBBAddrMap( + "basic-block-address-map", + cl::desc("Emit the basic block address map section"), cl::init(false)); + CGBINDOPT(EnableBBAddrMap); + static cl::opt<std::string> BBSections( "basic-block-sections", cl::desc("Emit basic blocks into separate sections"), @@ -421,6 +423,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(UniqueBasicBlockSectionNames); + static cl::opt<bool> SeparateNamedSections( + "separate-named-sections", + cl::desc("Use separate unique sections for named sections"), + cl::init(false)); + CGBINDOPT(SeparateNamedSections); + static cl::opt<EABI> EABIVersion( "meabi", cl::desc("Set EABI type (default depends on triple):"), cl::init(EABI::Default), @@ -562,15 +570,16 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.StackSymbolOrdering = getStackSymbolOrdering(); Options.UseInitArray = !getUseCtors(); Options.DisableIntegratedAS = getDisableIntegratedAS(); - Options.RelaxELFRelocations = getRelaxELFRelocations(); Options.DataSections = getExplicitDataSections().value_or(TheTriple.hasDefaultDataSections()); Options.FunctionSections = getFunctionSections(); Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility(); Options.XCOFFTracebackTable = getXCOFFTracebackTable(); + Options.BBAddrMap = getEnableBBAddrMap(); Options.BBSections = getBBSectionsMode(Options); Options.UniqueSectionNames = getUniqueSectionNames(); Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames(); + Options.SeparateNamedSections = getSeparateNamedSections(); Options.TLSSize = getTLSSize(); Options.EmulatedTLS = getExplicitEmulatedTLS().value_or(TheTriple.hasDefaultEmulatedTLS()); @@ -615,12 +624,9 @@ std::string codegen::getFeaturesStr() { // This is necessary for x86 where the CPU might not support all the // features the autodetected CPU name lists in the target. For example, // not all Sandybridge processors support AVX. - if (getMCPU() == "native") { - StringMap<bool> HostFeatures; - if (sys::getHostCPUFeatures(HostFeatures)) - for (const auto &[Feature, IsEnabled] : HostFeatures) - Features.AddFeature(Feature, IsEnabled); - } + if (getMCPU() == "native") + for (const auto &[Feature, IsEnabled] : sys::getHostCPUFeatures()) + Features.AddFeature(Feature, IsEnabled); for (auto const &MAttr : getMAttrs()) Features.AddFeature(MAttr); @@ -635,12 +641,9 @@ std::vector<std::string> codegen::getFeatureList() { // This is necessary for x86 where the CPU might not support all the // features the autodetected CPU name lists in the target. For example, // not all Sandybridge processors support AVX. - if (getMCPU() == "native") { - StringMap<bool> HostFeatures; - if (sys::getHostCPUFeatures(HostFeatures)) - for (const auto &[Feature, IsEnabled] : HostFeatures) - Features.AddFeature(Feature, IsEnabled); - } + if (getMCPU() == "native") + for (const auto &[Feature, IsEnabled] : sys::getHostCPUFeatures()) + Features.AddFeature(Feature, IsEnabled); for (auto const &MAttr : getMAttrs()) Features.AddFeature(MAttr); @@ -687,6 +690,8 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, NewAttrs.addAttribute("frame-pointer", "all"); else if (getFramePointerUsage() == FramePointerKind::NonLeaf) NewAttrs.addAttribute("frame-pointer", "non-leaf"); + else if (getFramePointerUsage() == FramePointerKind::Reserved) + NewAttrs.addAttribute("frame-pointer", "reserved"); else if (getFramePointerUsage() == FramePointerKind::None) NewAttrs.addAttribute("frame-pointer", "none"); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index a6cacf874bdc..8573b016d1e5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -1639,8 +1639,7 @@ bool ComplexDeinterleavingGraph::checkNodes() { ComplexDeinterleavingGraph::NodePtr ComplexDeinterleavingGraph::identifyRoot(Instruction *RootI) { if (auto *Intrinsic = dyn_cast<IntrinsicInst>(RootI)) { - if (Intrinsic->getIntrinsicID() != - Intrinsic::experimental_vector_interleave2) + if (Intrinsic->getIntrinsicID() != Intrinsic::vector_interleave2) return nullptr; auto *Real = dyn_cast<Instruction>(Intrinsic->getOperand(0)); @@ -1675,7 +1674,7 @@ ComplexDeinterleavingGraph::identifyDeinterleave(Instruction *Real, Value *FinalValue = nullptr; if (match(Real, m_ExtractValue<0>(m_Instruction(I))) && match(Imag, m_ExtractValue<1>(m_Specific(I))) && - match(I, m_Intrinsic<Intrinsic::experimental_vector_deinterleave2>( + match(I, m_Intrinsic<Intrinsic::vector_deinterleave2>( m_Value(FinalValue)))) { NodePtr PlaceholderNode = prepareCompositeNode( llvm::ComplexDeinterleavingOperation::Deinterleave, Real, Imag); @@ -1960,13 +1959,11 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, // Splats that are not constant are interleaved where they are located Instruction *InsertPoint = (I->comesBefore(R) ? R : I)->getNextNode(); IRBuilder<> IRB(InsertPoint); - ReplacementNode = - IRB.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, NewTy, - {Node->Real, Node->Imag}); + ReplacementNode = IRB.CreateIntrinsic(Intrinsic::vector_interleave2, + NewTy, {Node->Real, Node->Imag}); } else { - ReplacementNode = - Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, - NewTy, {Node->Real, Node->Imag}); + ReplacementNode = Builder.CreateIntrinsic( + Intrinsic::vector_interleave2, NewTy, {Node->Real, Node->Imag}); } break; } @@ -1975,7 +1972,7 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, // It is filled later when the ReductionOperation is processed. auto *VTy = cast<VectorType>(Node->Real->getType()); auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy); - auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHI()); + auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHIIt()); OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI; ReplacementNode = NewPHI; break; @@ -1991,9 +1988,8 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, auto *B = replaceNode(Builder, Node->Operands[1]); auto *NewMaskTy = VectorType::getDoubleElementsVectorType( cast<VectorType>(MaskReal->getType())); - auto *NewMask = - Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, - NewMaskTy, {MaskReal, MaskImag}); + auto *NewMask = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, + NewMaskTy, {MaskReal, MaskImag}); ReplacementNode = Builder.CreateSelect(NewMask, A, B); break; } @@ -2021,8 +2017,8 @@ void ComplexDeinterleavingGraph::processReductionOperation( Value *InitImag = OldPHIImag->getIncomingValueForBlock(Incoming); IRBuilder<> Builder(Incoming->getTerminator()); - auto *NewInit = Builder.CreateIntrinsic( - Intrinsic::experimental_vector_interleave2, NewVTy, {InitReal, InitImag}); + auto *NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy, + {InitReal, InitImag}); NewPHI->addIncoming(NewInit, Incoming); NewPHI->addIncoming(OperationReplacement, BackEdge); @@ -2034,9 +2030,9 @@ void ComplexDeinterleavingGraph::processReductionOperation( Builder.SetInsertPoint( &*FinalReductionReal->getParent()->getFirstInsertionPt()); - auto *Deinterleave = Builder.CreateIntrinsic( - Intrinsic::experimental_vector_deinterleave2, - OperationReplacement->getType(), OperationReplacement); + auto *Deinterleave = Builder.CreateIntrinsic(Intrinsic::vector_deinterleave2, + OperationReplacement->getType(), + OperationReplacement); auto *NewReal = Builder.CreateExtractValue(Deinterleave, (uint64_t)0); FinalReductionReal->replaceUsesOfWith(Real, NewReal); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp index 48bb4a07662e..c16166a1d5e1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -252,12 +252,13 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, bool VLIWPacketizerList::alias(const MachineMemOperand &Op1, const MachineMemOperand &Op2, bool UseTBAA) const { - if (!Op1.getValue() || !Op2.getValue()) + if (!Op1.getValue() || !Op2.getValue() || !Op1.getSize().hasValue() || + !Op2.getSize().hasValue()) return true; int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset()); - int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset; - int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset; + int64_t Overlapa = Op1.getSize().getValue() + Op1.getOffset() - MinOffset; + int64_t Overlapb = Op2.getSize().getValue() + Op2.getOffset() - MinOffset; AliasResult AAResult = AA->alias(MemoryLocation(Op1.getValue(), Overlapa, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 6a7de3b241fe..578854cdb4a5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/DeadMachineInstructionElim.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveRegUnits.h" @@ -28,37 +29,57 @@ using namespace llvm; STATISTIC(NumDeletes, "Number of dead instructions deleted"); namespace { - class DeadMachineInstructionElim : public MachineFunctionPass { - bool runOnMachineFunction(MachineFunction &MF) override; +class DeadMachineInstructionElimImpl { + const MachineRegisterInfo *MRI = nullptr; + const TargetInstrInfo *TII = nullptr; + LiveRegUnits LivePhysRegs; - const MachineRegisterInfo *MRI = nullptr; - const TargetInstrInfo *TII = nullptr; - LiveRegUnits LivePhysRegs; +public: + bool runImpl(MachineFunction &MF); - public: - static char ID; // Pass identification, replacement for typeid - DeadMachineInstructionElim() : MachineFunctionPass(ID) { - initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry()); - } +private: + bool isDead(const MachineInstr *MI) const; + bool eliminateDeadMI(MachineFunction &MF); +}; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } +class DeadMachineInstructionElim : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid - private: - bool isDead(const MachineInstr *MI) const; + DeadMachineInstructionElim() : MachineFunctionPass(ID) { + initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + return DeadMachineInstructionElimImpl().runImpl(MF); + } - bool eliminateDeadMI(MachineFunction &MF); - }; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // namespace + +PreservedAnalyses +DeadMachineInstructionElimPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + if (!DeadMachineInstructionElimImpl().runImpl(MF)) + return PreservedAnalyses::all(); + PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet<CFGAnalyses>(); + return PA; } + char DeadMachineInstructionElim::ID = 0; char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID; INITIALIZE_PASS(DeadMachineInstructionElim, DEBUG_TYPE, "Remove dead machine instructions", false, false) -bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { +bool DeadMachineInstructionElimImpl::isDead(const MachineInstr *MI) const { // Technically speaking inline asm without side effects and no defs can still // be deleted. But there is so much bad inline asm code out there, we should // let them be. @@ -102,10 +123,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { return true; } -bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction())) - return false; - +bool DeadMachineInstructionElimImpl::runImpl(MachineFunction &MF) { MRI = &MF.getRegInfo(); const TargetSubtargetInfo &ST = MF.getSubtarget(); @@ -118,7 +136,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { return AnyChanges; } -bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { +bool DeadMachineInstructionElimImpl::eliminateDeadMI(MachineFunction &MF) { bool AnyChanges = false; // Loop over all instructions in all blocks, from bottom to top, so that it's diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp index e7eb34d8e651..324329ce989e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -18,7 +18,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -111,7 +111,8 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { } if (!ExnObj) - ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI); + ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", + RI->getIterator()); RI->eraseFromParent(); @@ -158,7 +159,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes( Resumes[ResumesLeft++] = RI; } else { BasicBlock *BB = RI->getParent(); - new UnreachableInst(Ctx, RI); + new UnreachableInst(Ctx, RI->getIterator()); RI->eraseFromParent(); simplifyCFG(BB, *TTI, DTU); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp index b26aa792bb93..cd1cdb065361 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -61,7 +62,7 @@ FunctionPass *llvm::createEHContGuardCatchretPass() { bool EHContGuardCatchret::runOnMachineFunction(MachineFunction &MF) { // Skip modules for which the ehcontguard flag is not set. - if (!MF.getMMI().getModule()->getModuleFlag("ehcontguard")) + if (!MF.getFunction().getParent()->getModuleFlag("ehcontguard")) return false; // Skip functions that do not have catchret diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp index 31e107ade1cc..a5c99498921d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -599,8 +599,8 @@ static bool hasSameValue(const MachineRegisterInfo &MRI, return false; // Further, check that the two defs come from corresponding operands. - int TIdx = TDef->findRegisterDefOperandIdx(TReg); - int FIdx = FDef->findRegisterDefOperandIdx(FReg); + int TIdx = TDef->findRegisterDefOperandIdx(TReg, /*TRI=*/nullptr); + int FIdx = FDef->findRegisterDefOperandIdx(FReg, /*TRI=*/nullptr); if (TIdx == -1 || FIdx == -1) return false; @@ -617,8 +617,7 @@ void SSAIfConv::replacePHIInstrs() { DebugLoc HeadDL = FirstTerm->getDebugLoc(); // Convert all PHIs to select instructions inserted before FirstTerm. - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { - PHIInfo &PI = PHIs[i]; + for (PHIInfo &PI : PHIs) { LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); Register DstReg = PI.PHI->getOperand(0).getReg(); if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { @@ -645,8 +644,7 @@ void SSAIfConv::rewritePHIOperands() { DebugLoc HeadDL = FirstTerm->getDebugLoc(); // Convert all PHIs to select instructions inserted before FirstTerm. - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { - PHIInfo &PI = PHIs[i]; + for (PHIInfo &PI : PHIs) { unsigned DstReg = 0; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); @@ -789,18 +787,18 @@ char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE, "Early If Converter", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE, "Early If Converter", false, false) void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); AU.addRequired<MachineTraceMetrics>(); AU.addPreserved<MachineTraceMetrics>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -962,8 +960,7 @@ bool EarlyIfConverter::shouldConvertIf() { CriticalPathInfo TBlock{}; CriticalPathInfo FBlock{}; bool ShouldConvert = true; - for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { - SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; + for (SSAIfConv::PHIInfo &PI : IfConv.PHIs) { unsigned Slack = TailTrace.getInstrSlack(*PI.PHI); unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth; LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); @@ -1089,8 +1086,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { TRI = STI.getRegisterInfo(); SchedModel = STI.getSchedModel(); MRI = &MF.getRegInfo(); - DomTree = &getAnalysis<MachineDominatorTree>(); - Loops = &getAnalysis<MachineLoopInfo>(); + DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; @@ -1144,17 +1141,17 @@ char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID; INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false, false) void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -1223,9 +1220,9 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { TRI = STI.getRegisterInfo(); MRI = &MF.getRegInfo(); SchedModel.init(&STI); - DomTree = &getAnalysis<MachineDominatorTree>(); - Loops = &getAnalysis<MachineLoopInfo>(); - MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); + MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); bool Changed = false; IfConv.runOnMachineFunction(MF); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp index 973c814604b3..ab893410fabc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp @@ -54,8 +54,32 @@ static bool isSigned(unsigned int Opcode) { return Opcode == Instruction::SDiv || Opcode == Instruction::SRem; } +static void scalarize(BinaryOperator *BO, + SmallVectorImpl<BinaryOperator *> &Replace) { + VectorType *VTy = cast<FixedVectorType>(BO->getType()); + + IRBuilder<> Builder(BO); + + unsigned NumElements = VTy->getElementCount().getFixedValue(); + Value *Result = PoisonValue::get(VTy); + for (unsigned Idx = 0; Idx < NumElements; ++Idx) { + Value *LHS = Builder.CreateExtractElement(BO->getOperand(0), Idx); + Value *RHS = Builder.CreateExtractElement(BO->getOperand(1), Idx); + Value *Op = Builder.CreateBinOp(BO->getOpcode(), LHS, RHS); + Result = Builder.CreateInsertElement(Result, Op, Idx); + if (auto *NewBO = dyn_cast<BinaryOperator>(Op)) { + NewBO->copyIRFlags(Op, true); + Replace.push_back(NewBO); + } + } + BO->replaceAllUsesWith(Result); + BO->dropAllReferences(); + BO->eraseFromParent(); +} + static bool runImpl(Function &F, const TargetLowering &TLI) { SmallVector<BinaryOperator *, 4> Replace; + SmallVector<BinaryOperator *, 4> ReplaceVector; bool Modified = false; unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported(); @@ -71,16 +95,23 @@ static bool runImpl(Function &F, const TargetLowering &TLI) { case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: { - // TODO: This doesn't handle vectors. - auto *IntTy = dyn_cast<IntegerType>(I.getType()); + // TODO: This pass doesn't handle scalable vectors. + if (I.getOperand(0)->getType()->isScalableTy()) + continue; + + auto *IntTy = dyn_cast<IntegerType>(I.getType()->getScalarType()); if (!IntTy || IntTy->getIntegerBitWidth() <= MaxLegalDivRemBitWidth) continue; // The backend has peephole optimizations for powers of two. + // TODO: We don't consider vectors here. if (isConstantPowerOfTwo(I.getOperand(1), isSigned(I.getOpcode()))) continue; - Replace.push_back(&cast<BinaryOperator>(I)); + if (I.getOperand(0)->getType()->isVectorTy()) + ReplaceVector.push_back(&cast<BinaryOperator>(I)); + else + Replace.push_back(&cast<BinaryOperator>(I)); Modified = true; break; } @@ -89,6 +120,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI) { } } + while (!ReplaceVector.empty()) { + BinaryOperator *BO = ReplaceVector.pop_back_val(); + scalarize(BO, Replace); + } + if (Replace.empty()) return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp index 78ad2a25d0e4..11f123aa5bed 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp @@ -116,7 +116,8 @@ static void expandFPToI(Instruction *FPToI) { // fp80 conversion is implemented by fpext to fp128 first then do the // conversion. FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; - unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth); + unsigned FloatWidth = + PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits()); unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1; unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1; Value *ImplicitBit = Builder.CreateShl( @@ -175,9 +176,10 @@ static void expandFPToI(Instruction *FPToI) { // if.end: Builder.SetInsertPoint(IfEnd); Value *Add1 = Builder.CreateAdd( - And2, ConstantInt::getSigned(IntTy, -int64_t(ExponentBias + BitWidth))); - Value *Cmp3 = - Builder.CreateICmpULT(Add1, ConstantInt::getSigned(IntTy, -BitWidth)); + And2, ConstantInt::getSigned( + IntTy, -static_cast<int64_t>(ExponentBias + BitWidth))); + Value *Cmp3 = Builder.CreateICmpULT( + Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth))); Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9); // if.then5: @@ -203,8 +205,8 @@ static void expandFPToI(Instruction *FPToI) { // if.else: Builder.SetInsertPoint(IfElse); Value *Sub15 = Builder.CreateAdd( - And2, - ConstantInt::getSigned(IntTy, -(ExponentBias + FPMantissaWidth))); + And2, ConstantInt::getSigned( + IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth))); Value *Shl = Builder.CreateShl(Or, Sub15); Value *Mul16 = Builder.CreateMul(Shl, Sign); Builder.CreateBr(End); @@ -318,6 +320,7 @@ static void expandIToFP(Instruction *IToFP) { // FIXME: As there is no related builtins added in compliler-rt, // here currently utilized the fp32 <-> fp16 lib calls to implement. FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth; + FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth; unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth); bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP; @@ -375,7 +378,7 @@ static void expandIToFP(Instruction *IToFP) { Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1), FloatWidth == 128 ? Call : Cast); Value *Cmp3 = Builder.CreateICmpSGT( - Sub2, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1)); + Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1)); Builder.CreateCondBr(Cmp3, IfThen4, IfElse); // if.then4: @@ -546,7 +549,7 @@ static void expandIToFP(Instruction *IToFP) { Value *A40 = Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext())); A4 = Builder.CreateFPTrunc(A40, IToFP->getType()); - } else if (IToFP->getType()->isHalfTy()) { + } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) { // Deal with "half" situation. This is a workaround since we don't have // floattihf.c currently as referring. Value *A40 = @@ -567,8 +570,29 @@ static void expandIToFP(Instruction *IToFP) { IToFP->eraseFromParent(); } +static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) { + VectorType *VTy = cast<FixedVectorType>(I->getType()); + + IRBuilder<> Builder(I); + + unsigned NumElements = VTy->getElementCount().getFixedValue(); + Value *Result = PoisonValue::get(VTy); + for (unsigned Idx = 0; Idx < NumElements; ++Idx) { + Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx); + Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext, + I->getType()->getScalarType()); + Result = Builder.CreateInsertElement(Result, Cast, Idx); + if (isa<Instruction>(Cast)) + Replace.push_back(cast<Instruction>(Cast)); + } + I->replaceAllUsesWith(Result); + I->dropAllReferences(); + I->eraseFromParent(); +} + static bool runImpl(Function &F, const TargetLowering &TLI) { SmallVector<Instruction *, 4> Replace; + SmallVector<Instruction *, 4> ReplaceVector; bool Modified = false; unsigned MaxLegalFpConvertBitWidth = @@ -583,29 +607,36 @@ static bool runImpl(Function &F, const TargetLowering &TLI) { switch (I.getOpcode()) { case Instruction::FPToUI: case Instruction::FPToSI: { - // TODO: This pass doesn't handle vectors. - if (I.getOperand(0)->getType()->isVectorTy()) + // TODO: This pass doesn't handle scalable vectors. + if (I.getOperand(0)->getType()->isScalableTy()) continue; - auto *IntTy = dyn_cast<IntegerType>(I.getType()); + auto *IntTy = cast<IntegerType>(I.getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - Replace.push_back(&I); + if (I.getOperand(0)->getType()->isVectorTy()) + ReplaceVector.push_back(&I); + else + Replace.push_back(&I); Modified = true; break; } case Instruction::UIToFP: case Instruction::SIToFP: { - // TODO: This pass doesn't handle vectors. - if (I.getOperand(0)->getType()->isVectorTy()) + // TODO: This pass doesn't handle scalable vectors. + if (I.getOperand(0)->getType()->isScalableTy()) continue; - auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType()); + auto *IntTy = + cast<IntegerType>(I.getOperand(0)->getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - Replace.push_back(&I); + if (I.getOperand(0)->getType()->isVectorTy()) + ReplaceVector.push_back(&I); + else + Replace.push_back(&I); Modified = true; break; } @@ -614,6 +645,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI) { } } + while (!ReplaceVector.empty()) { + Instruction *I = ReplaceVector.pop_back_val(); + scalarize(I, Replace); + } + if (Replace.empty()) return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp index bb84813569f4..2758f7be4d50 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -970,7 +970,7 @@ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, if (DT) DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); - const DataLayout& DL = F.getParent()->getDataLayout(); + const DataLayout& DL = F.getDataLayout(); bool MadeChanges = false; for (auto BBIt = F.begin(); BBIt != F.end();) { if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp index 79b6dc9154b3..d6778ec666cb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp @@ -26,54 +26,6 @@ using namespace llvm; namespace { -unsigned getOpcode(Intrinsic::ID ID) { - switch (ID) { - case Intrinsic::vector_reduce_fadd: - return Instruction::FAdd; - case Intrinsic::vector_reduce_fmul: - return Instruction::FMul; - case Intrinsic::vector_reduce_add: - return Instruction::Add; - case Intrinsic::vector_reduce_mul: - return Instruction::Mul; - case Intrinsic::vector_reduce_and: - return Instruction::And; - case Intrinsic::vector_reduce_or: - return Instruction::Or; - case Intrinsic::vector_reduce_xor: - return Instruction::Xor; - case Intrinsic::vector_reduce_smax: - case Intrinsic::vector_reduce_smin: - case Intrinsic::vector_reduce_umax: - case Intrinsic::vector_reduce_umin: - return Instruction::ICmp; - case Intrinsic::vector_reduce_fmax: - case Intrinsic::vector_reduce_fmin: - return Instruction::FCmp; - default: - llvm_unreachable("Unexpected ID"); - } -} - -RecurKind getRK(Intrinsic::ID ID) { - switch (ID) { - case Intrinsic::vector_reduce_smax: - return RecurKind::SMax; - case Intrinsic::vector_reduce_smin: - return RecurKind::SMin; - case Intrinsic::vector_reduce_umax: - return RecurKind::UMax; - case Intrinsic::vector_reduce_umin: - return RecurKind::UMin; - case Intrinsic::vector_reduce_fmax: - return RecurKind::FMax; - case Intrinsic::vector_reduce_fmin: - return RecurKind::FMin; - default: - return RecurKind::None; - } -} - bool expandReductions(Function &F, const TargetTransformInfo *TTI) { bool Changed = false; SmallVector<IntrinsicInst *, 4> Worklist; @@ -106,7 +58,9 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { FastMathFlags FMF = isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; Intrinsic::ID ID = II->getIntrinsicID(); - RecurKind RK = getRK(ID); + RecurKind RK = getMinMaxReductionRecurKind(ID); + TargetTransformInfo::ReductionShuffle RS = + TTI->getPreferredExpandedReductionShuffle(II); Value *Rdx = nullptr; IRBuilder<> Builder(II); @@ -120,16 +74,16 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { // and it can't be handled by generating a shuffle sequence. Value *Acc = II->getArgOperand(0); Value *Vec = II->getArgOperand(1); + unsigned RdxOpcode = getArithmeticReductionInstruction(ID); if (!FMF.allowReassoc()) - Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK); + Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK); else { if (!isPowerOf2_32( cast<FixedVectorType>(Vec->getType())->getNumElements())) continue; - - Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); - Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), - Acc, Rdx, "bin.rdx"); + Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK); + Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx, + "bin.rdx"); } break; } @@ -159,8 +113,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { } break; } - - Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); + unsigned RdxOpcode = getArithmeticReductionInstruction(ID); + Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK); break; } case Intrinsic::vector_reduce_add: @@ -174,8 +128,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { if (!isPowerOf2_32( cast<FixedVectorType>(Vec->getType())->getNumElements())) continue; - - Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); + unsigned RdxOpcode = getArithmeticReductionInstruction(ID); + Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK); break; } case Intrinsic::vector_reduce_fmax: @@ -187,8 +141,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { cast<FixedVectorType>(Vec->getType())->getNumElements()) || !FMF.noNaNs()) continue; - - Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); + unsigned RdxOpcode = getArithmeticReductionInstruction(ID); + Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK); break; } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 0fe4cfefdb16..97c6ee4773f2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -340,6 +340,8 @@ Value *CachingVPExpander::expandPredicationToFPCall( replaceOperation(*NewOp, VPI); return NewOp; } + case Intrinsic::fma: + case Intrinsic::fmuladd: case Intrinsic::experimental_constrained_fma: case Intrinsic::experimental_constrained_fmuladd: { Value *Op0 = VPI.getOperand(0); @@ -347,8 +349,12 @@ Value *CachingVPExpander::expandPredicationToFPCall( Value *Op2 = VPI.getOperand(2); Function *Fn = Intrinsic::getDeclaration( VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()}); - Value *NewOp = - Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName()); + Value *NewOp; + if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID)) + NewOp = + Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName()); + else + NewOp = Builder.CreateCall(Fn, {Op0, Op1, Op2}, VPI.getName()); replaceOperation(*NewOp, VPI); return NewOp; } @@ -361,7 +367,8 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, Type *EltTy) { bool Negative = false; unsigned EltBits = EltTy->getScalarSizeInBits(); - switch (VPI.getIntrinsicID()) { + Intrinsic::ID VID = VPI.getIntrinsicID(); + switch (VID) { default: llvm_unreachable("Expecting a VP reduction intrinsic"); case Intrinsic::vp_reduce_add: @@ -381,12 +388,17 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, return ConstantInt::get(EltTy->getContext(), APInt::getSignedMinValue(EltBits)); case Intrinsic::vp_reduce_fmax: + case Intrinsic::vp_reduce_fmaximum: Negative = true; [[fallthrough]]; - case Intrinsic::vp_reduce_fmin: { + case Intrinsic::vp_reduce_fmin: + case Intrinsic::vp_reduce_fminimum: { + bool PropagatesNaN = VID == Intrinsic::vp_reduce_fminimum || + VID == Intrinsic::vp_reduce_fmaximum; FastMathFlags Flags = VPI.getFastMathFlags(); const fltSemantics &Semantics = EltTy->getFltSemantics(); - return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative) + return (!Flags.noNaNs() && !PropagatesNaN) + ? ConstantFP::getQNaN(EltTy, Negative) : !Flags.noInfs() ? ConstantFP::getInfinity(EltTy, Negative) : ConstantFP::get(EltTy, @@ -474,6 +486,18 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, Reduction = Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start); break; + case Intrinsic::vp_reduce_fmaximum: + Reduction = Builder.CreateFPMaximumReduce(RedOp); + transferDecorations(*Reduction, VPI); + Reduction = + Builder.CreateBinaryIntrinsic(Intrinsic::maximum, Reduction, Start); + break; + case Intrinsic::vp_reduce_fminimum: + Reduction = Builder.CreateFPMinimumReduce(RedOp); + transferDecorations(*Reduction, VPI); + Reduction = + Builder.CreateBinaryIntrinsic(Intrinsic::minimum, Reduction, Start); + break; case Intrinsic::vp_reduce_fadd: Reduction = Builder.CreateFAddReduce(Start, RedOp); break; @@ -547,7 +571,7 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, VPIntrinsic &VPI) { assert(VPI.canIgnoreVectorLengthParam()); - const auto &DL = F.getParent()->getDataLayout(); + const auto &DL = F.getDataLayout(); Value *MaskParam = VPI.getMaskParam(); Value *PtrParam = VPI.getMemoryPointerParam(); @@ -731,6 +755,8 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { case Intrinsic::vp_minnum: case Intrinsic::vp_maximum: case Intrinsic::vp_minimum: + case Intrinsic::vp_fma: + case Intrinsic::vp_fmuladd: return expandPredicationToFPCall(Builder, VPI, VPI.getFunctionalIntrinsicID().value()); case Intrinsic::vp_load: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp index 329c9587e321..477512dc6b03 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp @@ -14,8 +14,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/FinalizeISel.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" @@ -38,13 +41,10 @@ namespace { }; } // end anonymous namespace -char FinalizeISel::ID = 0; -char &llvm::FinalizeISelID = FinalizeISel::ID; -INITIALIZE_PASS(FinalizeISel, DEBUG_TYPE, - "Finalize ISel and expand pseudo-instructions", false, false) - -bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) { +static std::pair<bool, bool> runImpl(MachineFunction &MF) { bool Changed = false; + bool PreserveCFG = true; + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); // Iterate through each instruction in the function, looking for pseudos. @@ -54,12 +54,18 @@ bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) { MBBI != MBBE; ) { MachineInstr &MI = *MBBI++; + // Set AdjustsStack to true if the instruction selector emits a stack + // frame setup instruction or a stack aligning inlineasm. + if (TII->isFrameInstr(MI) || MI.isStackAligningInlineAsm()) + MF.getFrameInfo().setAdjustsStack(true); + // If MI is a pseudo, expand it. if (MI.usesCustomInsertionHook()) { Changed = true; MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); // The expansion may involve new basic blocks. if (NewMBB != MBB) { + PreserveCFG = false; MBB = NewMBB; I = NewMBB->getIterator(); MBBI = NewMBB->begin(); @@ -71,5 +77,25 @@ bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) { TLI->finalizeLowering(MF); - return Changed; + return {Changed, PreserveCFG}; +} + +char FinalizeISel::ID = 0; +char &llvm::FinalizeISelID = FinalizeISel::ID; +INITIALIZE_PASS(FinalizeISel, DEBUG_TYPE, + "Finalize ISel and expand pseudo-instructions", false, false) + +bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) { + return runImpl(MF).first; +} + +PreservedAnalyses FinalizeISelPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + auto [Changed, PreserveCFG] = runImpl(MF); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + if (PreserveCFG) + PA.preserveSet<CFGAnalyses>(); + return PA; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 4d668c53f715..3bb9da5f1a37 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -112,7 +112,7 @@ static Register performCopyPropagation(Register Reg, bool &IsKill, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { // First check if statepoint itself uses Reg in non-meta operands. - int Idx = RI->findRegisterUseOperandIdx(Reg, false, &TRI); + int Idx = RI->findRegisterUseOperandIdx(Reg, &TRI, false); if (Idx >= 0 && (unsigned)Idx < StatepointOpers(&*RI).getNumDeoptArgsIdx()) { IsKill = false; return Reg; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp index cad7d1f1137b..e1af457c9b9d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp index 894ab9a0486a..700714d53984 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp @@ -81,6 +81,9 @@ public: PreservedAnalyses GCLoweringPass::run(Function &F, FunctionAnalysisManager &FAM) { + if (!F.hasGC()) + return PreservedAnalyses::all(); + auto &Info = FAM.getResult<GCFunctionAnalysis>(F); bool Changed = DoLowering(F, Info.getStrategy()); @@ -178,7 +181,7 @@ static bool InsertRootInitializers(Function &F, ArrayRef<AllocaInst *> Roots) { if (!InitedRoots.count(Root)) { new StoreInst( ConstantPointerNull::get(cast<PointerType>(Root->getAllocatedType())), - Root, Root->getNextNode()); + Root, std::next(Root->getIterator())); MadeChange = true; } @@ -213,8 +216,8 @@ bool DoLowering(Function &F, GCStrategy &S) { default: break; case Intrinsic::gcwrite: { // Replace a write barrier with a simple store. - Value *St = new StoreInst(CI->getArgOperand(0), - CI->getArgOperand(2), CI); + Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), + CI->getIterator()); CI->replaceAllUsesWith(St); CI->eraseFromParent(); MadeChange = true; @@ -222,7 +225,8 @@ bool DoLowering(Function &F, GCStrategy &S) { } case Intrinsic::gcread: { // Replace a read barrier with a simple load. - Value *Ld = new LoadInst(CI->getType(), CI->getArgOperand(1), "", CI); + Value *Ld = new LoadInst(CI->getType(), CI->getArgOperand(1), "", + CI->getIterator()); Ld->takeName(CI); CI->replaceAllUsesWith(Ld); CI->eraseFromParent(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 64e2d517e3b9..547529bbe699 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -51,6 +51,11 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID, // this builder will have the def ready. setInsertPt(*CurMBB, std::next(MII)); } else if (!dominates(MI, CurrPos)) { + // Update the spliced machineinstr's debug location by merging it with the + // debug location of the instruction at the insertion point. + auto *Loc = DILocation::getMergedLocation(getDebugLoc().get(), + MI->getDebugLoc().get()); + MI->setDebugLoc(Loc); CurMBB->splice(CurrPos, CurMBB, MI); } return MachineInstrBuilder(getMF(), MI); @@ -174,6 +179,20 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, switch (Opc) { default: break; + case TargetOpcode::G_ICMP: { + assert(SrcOps.size() == 3 && "Invalid sources"); + assert(DstOps.size() == 1 && "Invalid dsts"); + LLT SrcTy = SrcOps[1].getLLTTy(*getMRI()); + + if (std::optional<SmallVector<APInt>> Cst = + ConstantFoldICmp(SrcOps[0].getPredicate(), SrcOps[1].getReg(), + SrcOps[2].getReg(), *getMRI())) { + if (SrcTy.isVector()) + return buildBuildVectorConstant(DstOps[0], *Cst); + return buildConstant(DstOps[0], Cst->front()); + } + break; + } case TargetOpcode::G_ADD: case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_AND: @@ -256,10 +275,16 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, return buildFConstant(DstOps[0], *Cst); break; } - case TargetOpcode::G_CTLZ: { + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTTZ: { assert(SrcOps.size() == 1 && "Expected one source"); assert(DstOps.size() == 1 && "Expected one dest"); - auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI()); + std::function<unsigned(APInt)> CB; + if (Opc == TargetOpcode::G_CTLZ) + CB = [](APInt V) -> unsigned { return V.countl_zero(); }; + else + CB = [](APInt V) -> unsigned { return V.countTrailingZeros(); }; + auto MaybeCsts = ConstantFoldCountZeros(SrcOps[0].getReg(), *getMRI(), CB); if (!MaybeCsts) break; if (MaybeCsts->size() == 1) @@ -309,7 +334,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val)); + return buildSplatBuildVector(Res, buildConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); @@ -336,7 +361,7 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res, // For vectors, CSE the element only for now. LLT Ty = Res.getLLTTy(*getMRI()); if (Ty.isVector()) - return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val)); + return buildSplatBuildVector(Res, buildFConstant(Ty.getElementType(), Val)); FoldingSetNodeID ID; GISelInstProfileBuilder ProfBuilder(ID, *getMRI()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index ccd9b13d730b..d16585b5650a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" @@ -35,6 +36,7 @@ void CallLowering::anchor() {} static void addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, const std::function<bool(Attribute::AttrKind)> &AttrFn) { + // TODO: There are missing flags. Add them here. if (AttrFn(Attribute::SExt)) Flags.setSExt(); if (AttrFn(Attribute::ZExt)) @@ -47,6 +49,8 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, Flags.setNest(); if (AttrFn(Attribute::ByVal)) Flags.setByVal(); + if (AttrFn(Attribute::ByRef)) + Flags.setByRef(); if (AttrFn(Attribute::Preallocated)) Flags.setPreallocated(); if (AttrFn(Attribute::InAlloca)) @@ -91,6 +95,8 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, ArrayRef<Register> ResRegs, ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg, + std::optional<PtrAuthInfo> PAI, + Register ConvergenceCtrlToken, std::function<unsigned()> GetCalleeReg) const { CallLoweringInfo Info; const DataLayout &DL = MIRBuilder.getDataLayout(); @@ -121,7 +127,6 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, CanBeTailCalled = false; } - // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that // we'll pass to the assigner function. @@ -144,9 +149,23 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, // Try looking through a bitcast from one function type to another. // Commonly happens with calls to objc_msgSend(). const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts(); - if (const Function *F = dyn_cast<Function>(CalleeV)) - Info.Callee = MachineOperand::CreateGA(F, 0); - else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) { + + // If IRTranslator chose to drop the ptrauth info, we can turn this into + // a direct call. + if (!PAI && CB.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) { + CalleeV = cast<ConstantPtrAuth>(CalleeV)->getPointer(); + assert(isa<Function>(CalleeV)); + } + + if (const Function *F = dyn_cast<Function>(CalleeV)) { + if (F->hasFnAttribute(Attribute::NonLazyBind)) { + LLT Ty = getLLTForType(*F->getType(), DL); + Register Reg = MIRBuilder.buildGlobalValue(Ty, F).getReg(0); + Info.Callee = MachineOperand::CreateReg(Reg, false); + } else { + Info.Callee = MachineOperand::CreateGA(F, 0); + } + } else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) { // IR IFuncs and Aliases can't be forward declared (only defined), so the // callee must be in the same TU and therefore we can direct-call it without // worrying about it being out of range. @@ -181,13 +200,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); Info.CallConv = CallConv; Info.SwiftErrorVReg = SwiftErrorVReg; + Info.PAI = PAI; + Info.ConvergenceCtrlToken = ConvergenceCtrlToken; Info.IsMustTailCall = CB.isMustTailCall(); Info.IsTailCall = CanBeTailCalled; Info.IsVarArg = IsVarArg; if (!lowerCall(MIRBuilder, Info)) return false; - if (ReturnHintAlignReg && !Info.IsTailCall) { + if (ReturnHintAlignReg && !Info.LoweredTailCall) { MIRBuilder.buildAssertAlign(ResRegs[0], ReturnHintAlignReg, ReturnHintAlign); } @@ -210,17 +231,26 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, } Align MemAlign = DL.getABITypeAlign(Arg.Ty); - if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() || + Flags.isByRef()) { assert(OpIdx >= AttributeList::FirstArgIndex); unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex; Type *ElementTy = FuncInfo.getParamByValType(ParamIdx); if (!ElementTy) + ElementTy = FuncInfo.getParamByRefType(ParamIdx); + if (!ElementTy) ElementTy = FuncInfo.getParamInAllocaType(ParamIdx); if (!ElementTy) ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx); + assert(ElementTy && "Must have byval, inalloca or preallocated type"); - Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); + + uint64_t MemSize = DL.getTypeAllocSize(ElementTy); + if (Flags.isByRef()) + Flags.setByRefSize(MemSize); + else + Flags.setByValSize(MemSize); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. @@ -412,7 +442,7 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs, // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to // have the same elt type, i.e. v4s32. // TODO: Extend this coersion to element multiples other than just 2. - if (PartLLT.getSizeInBits() > LLTy.getSizeInBits() && + if (TypeSize::isKnownGT(PartLLT.getSizeInBits(), LLTy.getSizeInBits()) && PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 && Regs.size() == 1) { LLT NewTy = PartLLT.changeElementType(LLTy.getElementType()) @@ -461,13 +491,15 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs, // Deal with vector with 64-bit elements decomposed to 32-bit // registers. Need to create intermediate 64-bit elements. SmallVector<Register, 8> EltMerges; - int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); - - assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); + int PartsPerElt = + divideCeil(DstEltTy.getSizeInBits(), PartLLT.getSizeInBits()); + LLT ExtendedPartTy = LLT::scalar(PartLLT.getSizeInBits() * PartsPerElt); for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { auto Merge = - B.buildMergeLikeInstr(RealDstEltTy, Regs.take_front(PartsPerElt)); + B.buildMergeLikeInstr(ExtendedPartTy, Regs.take_front(PartsPerElt)); + if (ExtendedPartTy.getSizeInBits() > RealDstEltTy.getSizeInBits()) + Merge = B.buildTrunc(RealDstEltTy, Merge); // Fix the type in case this is really a vector of pointers. MRI.setType(Merge.getReg(0), RealDstEltTy); EltMerges.push_back(Merge.getReg(0)); @@ -529,7 +561,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, // We could just insert a regular copy, but this is unreachable at the moment. assert(SrcTy != PartTy && "identical part types shouldn't reach here"); - const unsigned PartSize = PartTy.getSizeInBits(); + const TypeSize PartSize = PartTy.getSizeInBits(); if (PartTy.isVector() == SrcTy.isVector() && PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) { @@ -539,7 +571,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, } if (SrcTy.isVector() && !PartTy.isVector() && - PartSize > SrcTy.getElementType().getSizeInBits()) { + TypeSize::isKnownGT(PartSize, SrcTy.getElementType().getSizeInBits())) { // Vector was scalarized, and the elements extended. auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg); for (int i = 0, e = DstRegs.size(); i != e; ++i) @@ -548,9 +580,10 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, } if (SrcTy.isVector() && PartTy.isVector() && - PartTy.getScalarSizeInBits() == SrcTy.getScalarSizeInBits() && - SrcTy.getNumElements() < PartTy.getNumElements()) { - // A coercion like: v2f32 -> v4f32. + PartTy.getSizeInBits() == SrcTy.getSizeInBits() && + ElementCount::isKnownLT(SrcTy.getElementCount(), + PartTy.getElementCount())) { + // A coercion like: v2f32 -> v4f32 or nxv2f32 -> nxv4f32 Register DstReg = DstRegs.front(); B.buildPadVectorWithUndefElements(DstReg, SrcReg); return; @@ -563,6 +596,17 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, return; } + if (SrcTy.isVector() && !PartTy.isVector() && + SrcTy.getScalarSizeInBits() > PartTy.getSizeInBits()) { + LLT ExtTy = + LLT::vector(SrcTy.getElementCount(), + LLT::scalar(PartTy.getScalarSizeInBits() * DstRegs.size() / + SrcTy.getNumElements())); + auto Ext = B.buildAnyExt(ExtTy, SrcReg); + B.buildUnmerge(DstRegs, Ext); + return; + } + MachineRegisterInfo &MRI = *B.getMRI(); LLT DstTy = MRI.getType(DstRegs[0]); LLT LCMTy = getCoverTy(SrcTy, PartTy); @@ -697,7 +741,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, MachineFunction &MF = MIRBuilder.getMF(); MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); - const DataLayout &DL = F.getParent()->getDataLayout(); + const DataLayout &DL = F.getDataLayout(); const unsigned NumArgs = Args.size(); @@ -732,6 +776,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, continue; } + auto AllocaAddressSpace = MF.getDataLayout().getAllocaAddrSpace(); + const MVT ValVT = VA.getValVT(); const MVT LocVT = VA.getLocVT(); @@ -740,6 +786,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy; const EVT OrigVT = EVT::getEVT(Args[i].Ty); const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); + const LLT PointerTy = LLT::pointer( + AllocaAddressSpace, DL.getPointerSizeInBits(AllocaAddressSpace)); // Expected to be multiple regs for a single incoming arg. // There should be Regs.size() ArgLocs per argument. @@ -754,31 +802,76 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, // intermediate values. Args[i].Regs.resize(NumParts); - // For each split register, create and assign a vreg that will store - // the incoming component of the larger value. These will later be - // merged to form the final vreg. - for (unsigned Part = 0; Part < NumParts; ++Part) - Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT); + // When we have indirect parameter passing we are receiving a pointer, + // that points to the actual value, so we need one "temporary" pointer. + if (VA.getLocInfo() == CCValAssign::Indirect) { + if (Handler.isIncomingArgumentHandler()) + Args[i].Regs[0] = MRI.createGenericVirtualRegister(PointerTy); + } else { + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) + Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT); + } } assert((j + (NumParts - 1)) < ArgLocs.size() && "Too many regs for number of args"); // Coerce into outgoing value types before register assignment. - if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) { + if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy && + VA.getLocInfo() != CCValAssign::Indirect) { assert(Args[i].OrigRegs.size() == 1); buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy, ValTy, extendOpFromFlags(Args[i].Flags[0])); } + bool IndirectParameterPassingHandled = false; bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL); for (unsigned Part = 0; Part < NumParts; ++Part) { + assert((VA.getLocInfo() != CCValAssign::Indirect || Part == 0) && + "Only the first parameter should be processed when " + "handling indirect passing!"); Register ArgReg = Args[i].Regs[Part]; // There should be Regs.size() ArgLocs per argument. unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part; CCValAssign &VA = ArgLocs[j + Idx]; const ISD::ArgFlagsTy Flags = Args[i].Flags[Part]; + // We found an indirect parameter passing, and we have an + // OutgoingValueHandler as our handler (so we are at the call site or the + // return value). In this case, start the construction of the following + // GMIR, that is responsible for the preparation of indirect parameter + // passing: + // + // %1(indirectly passed type) = The value to pass + // %3(pointer) = G_FRAME_INDEX %stack.0 + // G_STORE %1, %3 :: (store (s128), align 8) + // + // After this GMIR, the remaining part of the loop body will decide how + // to get the value to the caller and we break out of the loop. + if (VA.getLocInfo() == CCValAssign::Indirect && + !Handler.isIncomingArgumentHandler()) { + Align AlignmentForStored = DL.getPrefTypeAlign(Args[i].Ty); + MachineFrameInfo &MFI = MF.getFrameInfo(); + // Get some space on the stack for the value, so later we can pass it + // as a reference. + int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(), + AlignmentForStored, false); + Register PointerToStackReg = + MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0); + MachinePointerInfo StackPointerMPO = + MachinePointerInfo::getFixedStack(MF, FrameIdx); + // Store the value in the previously created stack space. + MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, + StackPointerMPO, + inferAlignFromPtrInfo(MF, StackPointerMPO)); + + ArgReg = PointerToStackReg; + IndirectParameterPassingHandled = true; + } + if (VA.isMemLoc() && !Flags.isByVal()) { // Individual pieces may have been spilled to the stack and others // passed in registers. @@ -788,16 +881,23 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags); MachinePointerInfo MPO; - Register StackAddr = Handler.getStackAddress( - MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags); - - Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA); - continue; - } - - if (VA.isMemLoc() && Flags.isByVal()) { - assert(Args[i].Regs.size() == 1 && - "didn't expect split byval pointer"); + Register StackAddr = + Handler.getStackAddress(VA.getLocInfo() == CCValAssign::Indirect + ? PointerTy.getSizeInBytes() + : MemTy.getSizeInBytes(), + VA.getLocMemOffset(), MPO, Flags); + + // Finish the handling of indirect passing from the passers + // (OutgoingParameterHandler) side. + // This branch is needed, so the pointer to the value is loaded onto the + // stack. + if (VA.getLocInfo() == CCValAssign::Indirect) + Handler.assignValueToAddress(ArgReg, StackAddr, PointerTy, MPO, VA); + else + Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, + VA); + } else if (VA.isMemLoc() && Flags.isByVal()) { + assert(Args[i].Regs.size() == 1 && "didn't expect split byval pointer"); if (Handler.isIncomingArgumentHandler()) { // We just need to copy the frame index value to the pointer. @@ -834,30 +934,45 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, DstMPO, DstAlign, SrcMPO, SrcAlign, MemSize, VA); } - continue; - } - - assert(!VA.needsCustom() && "custom loc should have been handled already"); - - if (i == 0 && !ThisReturnRegs.empty() && - Handler.isIncomingArgumentHandler() && - isTypeIsValidForThisReturn(ValVT)) { + } else if (i == 0 && !ThisReturnRegs.empty() && + Handler.isIncomingArgumentHandler() && + isTypeIsValidForThisReturn(ValVT)) { Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA); - continue; - } - - if (Handler.isIncomingArgumentHandler()) + } else if (Handler.isIncomingArgumentHandler()) { Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); - else { + } else { DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() { Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); }); } + + // Finish the handling of indirect parameter passing when receiving + // the value (we are in the called function or the caller when receiving + // the return value). + if (VA.getLocInfo() == CCValAssign::Indirect && + Handler.isIncomingArgumentHandler()) { + Align Alignment = DL.getABITypeAlign(Args[i].Ty); + MachinePointerInfo MPO = MachinePointerInfo::getUnknownStack(MF); + + // Since we are doing indirect parameter passing, we know that the value + // in the temporary register is not the value passed to the function, + // but rather a pointer to that value. Let's load that value into the + // virtual register where the parameter should go. + MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], MPO, + Alignment); + + IndirectParameterPassingHandled = true; + } + + if (IndirectParameterPassingHandled) + break; } // Now that all pieces have been assigned, re-pack the register typed values - // into the original value typed registers. - if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) { + // into the original value typed registers. This is only necessary, when + // the value was passed in multiple registers, not indirectly. + if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT && + !IndirectParameterPassingHandled) { // Merge the split registers into the expected larger result vregs of // the original call. buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy, @@ -1198,7 +1313,8 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg, } switch (VA.getLocInfo()) { - default: break; + default: + break; case CCValAssign::Full: case CCValAssign::BCvt: // FIXME: bitconverting between vector types may or may not be a diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index d18e65a83484..3310ce5455c9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" @@ -27,6 +28,11 @@ using namespace llvm; +STATISTIC(NumOneIteration, "Number of functions with one iteration"); +STATISTIC(NumTwoIterations, "Number of functions with two iterations"); +STATISTIC(NumThreeOrMoreIterations, + "Number of functions with three or more iterations"); + namespace llvm { cl::OptionCategory GICombinerOptionCategory( "GlobalISel Combiner", @@ -135,7 +141,11 @@ bool Combiner::combineMachineInstrs() { bool MFChanged = false; bool Changed; - do { + unsigned Iteration = 0; + while (true) { + ++Iteration; + LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n'); + WorkList.clear(); // Collect all instructions. Do a post order traversal for basic blocks and @@ -166,7 +176,28 @@ bool Combiner::combineMachineInstrs() { WLObserver->reportFullyCreatedInstrs(); } MFChanged |= Changed; - } while (Changed); + + if (!Changed) { + LLVM_DEBUG(dbgs() << "\nCombiner reached fixed-point after iteration #" + << Iteration << '\n'); + break; + } + // Iterate until a fixed-point is reached if MaxIterations == 0, + // otherwise limit the number of iterations. + if (CInfo.MaxIterations && Iteration >= CInfo.MaxIterations) { + LLVM_DEBUG( + dbgs() << "\nCombiner reached iteration limit after iteration #" + << Iteration << '\n'); + break; + } + } + + if (Iteration == 1) + ++NumOneIteration; + else if (Iteration == 2) + ++NumTwoIterations; + else + ++NumThreeOrMoreIterations; #ifndef NDEBUG if (CSEInfo) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 61ddc858ba44..e77ea3e76ad7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" @@ -28,10 +29,12 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DivisionByConstantInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #include <cmath> @@ -220,21 +223,81 @@ void CombinerHelper::applyCombineCopy(MachineInstr &MI) { replaceRegWith(MRI, DstReg, SrcReg); } -bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { - bool IsUndef = false; - SmallVector<Register, 4> Ops; - if (matchCombineConcatVectors(MI, IsUndef, Ops)) { - applyCombineConcatVectors(MI, IsUndef, Ops); +bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand( + MachineInstr &MI, BuildFnTy &MatchInfo) { + // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating. + Register DstOp = MI.getOperand(0).getReg(); + Register OrigOp = MI.getOperand(1).getReg(); + + if (!MRI.hasOneNonDBGUse(OrigOp)) + return false; + + MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp); + // Even if only a single operand of the PHI is not guaranteed non-poison, + // moving freeze() backwards across a PHI can cause optimization issues for + // other users of that operand. + // + // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to + // the source register is unprofitable because it makes the freeze() more + // strict than is necessary (it would affect the whole register instead of + // just the subreg being frozen). + if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef)) + return false; + + if (canCreateUndefOrPoison(OrigOp, MRI, + /*ConsiderFlagsAndMetadata=*/false)) + return false; + + std::optional<MachineOperand> MaybePoisonOperand; + for (MachineOperand &Operand : OrigDef->uses()) { + if (!Operand.isReg()) + return false; + + if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI)) + continue; + + if (!MaybePoisonOperand) + MaybePoisonOperand = Operand; + else { + // We have more than one maybe-poison operand. Moving the freeze is + // unsafe. + return false; + } + } + + // Eliminate freeze if all operands are guaranteed non-poison. + if (!MaybePoisonOperand) { + MatchInfo = [=](MachineIRBuilder &B) { + Observer.changingInstr(*OrigDef); + cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags(); + Observer.changedInstr(*OrigDef); + B.buildCopy(DstOp, OrigOp); + }; return true; } - return false; + + Register MaybePoisonOperandReg = MaybePoisonOperand->getReg(); + LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg); + + MatchInfo = [=](MachineIRBuilder &B) mutable { + Observer.changingInstr(*OrigDef); + cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags(); + Observer.changedInstr(*OrigDef); + B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator()); + auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg); + replaceRegOpWith( + MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI), + Freeze.getReg(0)); + replaceRegWith(MRI, DstOp, OrigOp); + }; + return true; } -bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, - SmallVectorImpl<Register> &Ops) { +bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, + SmallVector<Register> &Ops) { assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Invalid instruction"); - IsUndef = true; + bool IsUndef = true; MachineInstr *Undef = nullptr; // Walk over all the operands of concat vectors and check if they are @@ -244,6 +307,8 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, Register Reg = MO.getReg(); MachineInstr *Def = MRI.getVRegDef(Reg); assert(Def && "Operand not defined"); + if (!MRI.hasOneNonDBGUse(Reg)) + return false; switch (Def->getOpcode()) { case TargetOpcode::G_BUILD_VECTOR: IsUndef = false; @@ -273,10 +338,21 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, return false; } } + + // Check if the combine is illegal + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) { + return false; + } + + if (IsUndef) + Ops.clear(); + return true; } -void CombinerHelper::applyCombineConcatVectors( - MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) { +void CombinerHelper::applyCombineConcatVectors(MachineInstr &MI, + SmallVector<Register> &Ops) { // We determined that the concat_vectors can be flatten. // Generate the flattened build_vector. Register DstReg = MI.getOperand(0).getReg(); @@ -287,9 +363,9 @@ void CombinerHelper::applyCombineConcatVectors( // checking that at all Ops are undef. Alternatively, we could have // generate a build_vector of undefs and rely on another combine to // clean that up. For now, given we already gather this information - // in tryCombineConcatVectors, just save compile time and issue the + // in matchCombineConcatVectors, just save compile time and issue the // right thing. - if (IsUndef) + if (Ops.empty()) Builder.buildUndef(NewDstReg); else Builder.buildBuildVector(NewDstReg, Ops); @@ -297,6 +373,86 @@ void CombinerHelper::applyCombineConcatVectors( replaceRegWith(MRI, DstReg, NewDstReg); } +bool CombinerHelper::matchCombineShuffleConcat(MachineInstr &MI, + SmallVector<Register> &Ops) { + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); + auto ConcatMI1 = + dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg())); + auto ConcatMI2 = + dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg())); + if (!ConcatMI1 || !ConcatMI2) + return false; + + // Check that the sources of the Concat instructions have the same type + if (MRI.getType(ConcatMI1->getSourceReg(0)) != + MRI.getType(ConcatMI2->getSourceReg(0))) + return false; + + LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1)); + LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg()); + unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements(); + for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) { + // Check if the index takes a whole source register from G_CONCAT_VECTORS + // Assumes that all Sources of G_CONCAT_VECTORS are the same type + if (Mask[i] == -1) { + for (unsigned j = 1; j < ConcatSrcNumElt; j++) { + if (i + j >= Mask.size()) + return false; + if (Mask[i + j] != -1) + return false; + } + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}})) + return false; + Ops.push_back(0); + } else if (Mask[i] % ConcatSrcNumElt == 0) { + for (unsigned j = 1; j < ConcatSrcNumElt; j++) { + if (i + j >= Mask.size()) + return false; + if (Mask[i + j] != Mask[i] + static_cast<int>(j)) + return false; + } + // Retrieve the source register from its respective G_CONCAT_VECTORS + // instruction + if (Mask[i] < ShuffleSrcTy1.getNumElements()) { + Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt)); + } else { + Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt - + ConcatMI1->getNumSources())); + } + } else { + return false; + } + } + + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_CONCAT_VECTORS, + {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}})) + return false; + + return !Ops.empty(); +} + +void CombinerHelper::applyCombineShuffleConcat(MachineInstr &MI, + SmallVector<Register> &Ops) { + LLT SrcTy = MRI.getType(Ops[0]); + Register UndefReg = 0; + + for (Register &Reg : Ops) { + if (Reg == 0) { + if (UndefReg == 0) + UndefReg = Builder.buildUndef(SrcTy).getReg(0); + Reg = UndefReg; + } + } + + if (Ops.size() > 1) + Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops); + else + Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]); + MI.eraseFromParent(); +} + bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { SmallVector<Register, 4> Ops; if (matchCombineShuffleVector(MI, Ops)) { @@ -764,12 +920,12 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, LLT RegTy = MRI.getType(LoadReg); Register PtrReg = LoadMI->getPointerReg(); unsigned RegSize = RegTy.getSizeInBits(); - uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); + LocationSize LoadSizeBits = LoadMI->getMemSizeInBits(); unsigned MaskSizeBits = MaskVal.countr_one(); // The mask may not be larger than the in-memory type, as it might cover sign // extended bits - if (MaskSizeBits > LoadSizeBits) + if (MaskSizeBits > LoadSizeBits.getValue()) return false; // If the mask covers the whole destination register, there's nothing to @@ -789,7 +945,8 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, // still adjust the opcode to indicate the high bit behavior. if (LoadMI->isSimple()) MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); - else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize) + else if (LoadSizeBits.getValue() > MaskSizeBits || + LoadSizeBits.getValue() == RegSize) return false; // TODO: Could check if it's legal with the reduced or original memory size. @@ -854,7 +1011,8 @@ bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) { // If truncating more than the original extended value, abort. auto LoadSizeBits = LoadMI->getMemSizeInBits(); - if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits) + if (TruncSrc && + MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue()) return false; if (LoadSizeBits == SizeInBits) return true; @@ -864,7 +1022,6 @@ bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); - Builder.setInstrAndDebugLoc(MI); Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.eraseFromParent(); } @@ -885,7 +1042,7 @@ bool CombinerHelper::matchSextInRegOfLoad( if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg)) return false; - uint64_t MemBits = LoadDef->getMemSizeInBits(); + uint64_t MemBits = LoadDef->getMemSizeInBits().getValue(); // If the sign extend extends from a narrower width than the load's width, // then we can narrow the load width when we combine to a G_SEXTLOAD. @@ -945,13 +1102,6 @@ void CombinerHelper::applySextInRegOfLoad( MI.eraseFromParent(); } -static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { - if (Ty.isVector()) - return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), - Ty.getNumElements()); - return IntegerType::get(C, Ty.getSizeInBits()); -} - /// Return true if 'MI' is a load or a store that may be fold it's address /// operand into the load / store addressing mode. static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, @@ -996,7 +1146,8 @@ bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const { LLT Ty = MRI.getType(LdSt.getReg(0)); LLT MemTy = LdSt.getMMO().getMemoryType(); SmallVector<LegalityQuery::MemDesc, 2> MemDescrs( - {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}}); + {{MemTy, MemTy.getSizeInBits().getKnownMinValue(), + AtomicOrdering::NotAtomic}}); unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode()); SmallVector<LLT> OpTys; if (IndexedOpc == TargetOpcode::G_INDEXED_STORE) @@ -1193,6 +1344,18 @@ bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI, if (!VecEltTy.isByteSized()) return false; + // Check for load fold barriers between the extraction and the load. + if (MI.getParent() != LoadMI->getParent()) + return false; + const unsigned MaxIter = 20; + unsigned Iter = 0; + for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) { + if (II->isLoadFoldBarrier()) + return false; + if (Iter++ == MaxIter) + return false; + } + // Check if the new load that we are going to create is legal // if we are in the post-legalization phase. MachineMemOperand MMO = LoadMI->getMMO(); @@ -1279,7 +1442,6 @@ bool CombinerHelper::matchCombineIndexedLoadStore( void CombinerHelper::applyCombineIndexedLoadStore( MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); - Builder.setInstrAndDebugLoc(MI); unsigned Opcode = MI.getOpcode(); bool IsStore = Opcode == TargetOpcode::G_STORE; unsigned NewOpcode = getIndexedOpc(Opcode); @@ -1396,14 +1558,8 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI, // deps by "moving" the instruction incorrectly. Also keep track of which // instruction is first so we pick it's operands, avoiding use-before-def // bugs. - MachineInstr *FirstInst; - if (dominates(MI, *OtherMI)) { - Builder.setInstrAndDebugLoc(MI); - FirstInst = &MI; - } else { - Builder.setInstrAndDebugLoc(*OtherMI); - FirstInst = OtherMI; - } + MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI; + Builder.setInstrAndDebugLoc(*FirstInst); Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM : TargetOpcode::G_UDIVREM, @@ -1472,7 +1628,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI, Observer.changedInstr(*BrCond); } - + bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; @@ -1536,7 +1692,6 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI, void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) { - Builder.setInstrAndDebugLoc(MI); APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue()); const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded); Builder.buildFConstant(MI.getOperand(0), *NewCst); @@ -1671,7 +1826,6 @@ void CombinerHelper::applyShiftImmedChain(MachineInstr &MI, Opcode == TargetOpcode::G_USHLSAT) && "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); - Builder.setInstrAndDebugLoc(MI); LLT Ty = MRI.getType(MI.getOperand(1).getReg()); unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); auto Imm = MatchInfo.Imm; @@ -1787,7 +1941,6 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); LLT DestType = MRI.getType(MI.getOperand(0).getReg()); - Builder.setInstrAndDebugLoc(MI); Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); @@ -1923,7 +2076,6 @@ void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, int64_t ShiftAmtVal = MatchData.Imm; LLT ExtSrcTy = MRI.getType(ExtSrcReg); - Builder.setInstrAndDebugLoc(MI); auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal); auto NarrowShift = Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); @@ -1993,7 +2145,6 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues( LLT SrcTy = MRI.getType(Operands[0]); LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); bool CanReuseInputDirectly = DstTy == SrcTy; - Builder.setInstrAndDebugLoc(MI); for (unsigned Idx = 0; Idx < NumElems; ++Idx) { Register DstReg = MI.getOperand(Idx).getReg(); Register SrcReg = Operands[Idx]; @@ -2046,7 +2197,6 @@ void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, assert((MI.getNumOperands() - 1 == Csts.size()) && "Not enough operands to replace all defs"); unsigned NumElems = MI.getNumOperands() - 1; - Builder.setInstrAndDebugLoc(MI); for (unsigned Idx = 0; Idx < NumElems; ++Idx) { Register DstReg = MI.getOperand(Idx).getReg(); Builder.buildConstant(DstReg, Csts[Idx]); @@ -2072,6 +2222,9 @@ bool CombinerHelper::matchCombineUnmergeUndef( bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); + if (MRI.getType(MI.getOperand(0).getReg()).isVector() || + MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector()) + return false; // Check that all the lanes are dead except the first one. for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) @@ -2081,23 +2234,9 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { } void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { - Builder.setInstrAndDebugLoc(MI); Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); - // Truncating a vector is going to truncate every single lane, - // whereas we want the full lowbits. - // Do the operation on a scalar instead. - LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy.isVector()) - SrcReg = - Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); - Register Dst0Reg = MI.getOperand(0).getReg(); - LLT Dst0Ty = MRI.getType(Dst0Reg); - if (Dst0Ty.isVector()) { - auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); - Builder.buildCast(Dst0Reg, MIB); - } else - Builder.buildTrunc(Dst0Reg, SrcReg); + Builder.buildTrunc(Dst0Reg, SrcReg); MI.eraseFromParent(); } @@ -2142,8 +2281,6 @@ void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { LLT Dst0Ty = MRI.getType(Dst0Reg); LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); - Builder.setInstrAndDebugLoc(MI); - if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { Builder.buildZExt(Dst0Reg, ZExtSrcReg); } else { @@ -2197,7 +2334,6 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, LLT HalfTy = LLT::scalar(HalfSize); - Builder.setInstr(MI); auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg); unsigned NarrowShiftAmt = ShiftVal - HalfSize; @@ -2282,7 +2418,6 @@ bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); Register DstReg = MI.getOperand(0).getReg(); - Builder.setInstr(MI); Builder.buildCopy(DstReg, Reg); MI.eraseFromParent(); } @@ -2290,7 +2425,6 @@ void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); Register DstReg = MI.getOperand(0).getReg(); - Builder.setInstr(MI); Builder.buildZExtOrTrunc(DstReg, Reg); MI.eraseFromParent(); } @@ -2333,7 +2467,6 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd( LLT PtrTy = MRI.getType(LHS); - Builder.setInstrAndDebugLoc(MI); auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); Builder.buildPtrToInt(Dst, PtrAdd); MI.eraseFromParent(); @@ -2365,7 +2498,6 @@ void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, auto &PtrAdd = cast<GPtrAdd>(MI); Register Dst = PtrAdd.getReg(0); - Builder.setInstrAndDebugLoc(MI); Builder.buildConstant(Dst, NewCst); PtrAdd.eraseFromParent(); } @@ -2445,7 +2577,6 @@ void CombinerHelper::applyCombineExtOfExt( (MI.getOpcode() == TargetOpcode::G_SEXT && SrcExtOp == TargetOpcode::G_ZEXT)) { Register DstReg = MI.getOperand(0).getReg(); - Builder.setInstrAndDebugLoc(MI); Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); MI.eraseFromParent(); } @@ -2478,7 +2609,6 @@ void CombinerHelper::applyCombineTruncOfExt( replaceRegWith(MRI, DstReg, SrcReg); return; } - Builder.setInstrAndDebugLoc(MI); if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); else @@ -2566,8 +2696,6 @@ bool CombinerHelper::matchCombineTruncOfShift( void CombinerHelper::applyCombineTruncOfShift( MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) { - Builder.setInstrAndDebugLoc(MI); - MachineInstr *ShiftMI = MatchInfo.first; LLT NewShiftTy = MatchInfo.second; @@ -2739,8 +2867,8 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>) // I1 and I2 are different instructions but produce same values, // %1 and %6 are same, %1 and %7 are not the same value. - return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) == - I2->findRegisterDefOperandIdx(InstAndDef2->Reg); + return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) == + I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr); } return false; } @@ -2813,7 +2941,6 @@ void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) { APInt NewConst = VRegAndVal->Value.urem( APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits())); - Builder.setInstrAndDebugLoc(MI); auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue()); Builder.buildInstr( MI.getOpcode(), {MI.getOperand(0)}, @@ -2856,35 +2983,31 @@ bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); - Builder.setInstr(MI); Builder.buildFConstant(MI.getOperand(0), C); MI.eraseFromParent(); } void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); - Builder.setInstr(MI); Builder.buildConstant(MI.getOperand(0), C); MI.eraseFromParent(); } void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); - Builder.setInstr(MI); Builder.buildConstant(MI.getOperand(0), C); MI.eraseFromParent(); } -void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) { +void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, + ConstantFP *CFP) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); - Builder.setInstr(MI); Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF()); MI.eraseFromParent(); } void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); - Builder.setInstr(MI); Builder.buildUndef(MI.getOperand(0)); MI.eraseFromParent(); } @@ -2946,13 +3069,14 @@ bool CombinerHelper::matchCombineInsertVecElts( } return true; } - // If we didn't end in a G_IMPLICIT_DEF, bail out. - return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; + // If we didn't end in a G_IMPLICIT_DEF and the source is not fully + // overwritten, bail out. + return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF || + all_of(MatchInfo, [](Register Reg) { return !!Reg; }); } void CombinerHelper::applyCombineInsertVecElts( MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { - Builder.setInstr(MI); Register UndefReg; auto GetUndef = [&]() { if (UndefReg) @@ -2961,9 +3085,9 @@ void CombinerHelper::applyCombineInsertVecElts( UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0); return UndefReg; }; - for (unsigned I = 0; I < MatchInfo.size(); ++I) { - if (!MatchInfo[I]) - MatchInfo[I] = GetUndef(); + for (Register &Reg : MatchInfo) { + if (!Reg) + Reg = GetUndef(); } Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); MI.eraseFromParent(); @@ -2971,7 +3095,6 @@ void CombinerHelper::applyCombineInsertVecElts( void CombinerHelper::applySimplifyAddToSub( MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { - Builder.setInstr(MI); Register SubLHS, SubRHS; std::tie(SubLHS, SubRHS) = MatchInfo; Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); @@ -3031,6 +3154,22 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( // Match: logic (ext X), (ext Y) --> ext (logic X, Y) break; } + case TargetOpcode::G_TRUNC: { + // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y) + const MachineFunction *MF = MI.getMF(); + const DataLayout &DL = MF->getDataLayout(); + LLVMContext &Ctx = MF->getFunction().getContext(); + + LLT DstTy = MRI.getType(Dst); + const TargetLowering &TLI = getTargetLowering(); + + // Be extra careful sinking truncate. If it's free, there's no benefit in + // widening a binop. + if (TLI.isZExtFree(DstTy, XTy, DL, Ctx) && + TLI.isTruncateFree(XTy, DstTy, DL, Ctx)) + return false; + break; + } case TargetOpcode::G_AND: case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: @@ -3074,7 +3213,6 @@ void CombinerHelper::applyBuildInstructionSteps( MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { assert(MatchInfo.InstrsToBuild.size() && "Expected at least one instr to build?"); - Builder.setInstr(MI); for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { assert(InstrToBuild.Opcode && "Expected a valid opcode?"); assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); @@ -3110,7 +3248,6 @@ void CombinerHelper::applyAshShlToSextInreg( int64_t ShiftAmt; std::tie(Src, ShiftAmt) = MatchInfo; unsigned Size = MRI.getType(Src).getScalarSizeInBits(); - Builder.setInstrAndDebugLoc(MI); Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); MI.eraseFromParent(); } @@ -3166,8 +3303,15 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, Register AndDst = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); - KnownBits LHSBits = KB->getKnownBits(LHS); + + // Check the RHS (maybe a constant) first, and if we have no KnownBits there, + // we can't do anything. If we do, then it depends on whether we have + // KnownBits on the LHS. KnownBits RHSBits = KB->getKnownBits(RHS); + if (RHSBits.isUnknown()) + return false; + + KnownBits LHSBits = KB->getKnownBits(LHS); // Check that x & Mask == x. // x & 1 == x, always @@ -3206,6 +3350,7 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { Register OrDst = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); + KnownBits LHSBits = KB->getKnownBits(LHS); KnownBits RHSBits = KB->getKnownBits(RHS); @@ -3389,7 +3534,6 @@ bool CombinerHelper::matchXorOfAndWithSameReg( void CombinerHelper::applyXorOfAndWithSameReg( MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { // Fold (xor (and x, y), y) -> (and (not x), y) - Builder.setInstrAndDebugLoc(MI); Register X, Y; std::tie(X, Y) = MatchInfo; auto Not = Builder.buildNot(MRI.getType(X), X); @@ -3421,7 +3565,6 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { void CombinerHelper::applyPtrAddZero(MachineInstr &MI) { auto &PtrAdd = cast<GPtrAdd>(MI); - Builder.setInstrAndDebugLoc(PtrAdd); Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg()); PtrAdd.eraseFromParent(); } @@ -3432,7 +3575,6 @@ void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { Register Src0 = MI.getOperand(1).getReg(); Register Pow2Src1 = MI.getOperand(2).getReg(); LLT Ty = MRI.getType(DstReg); - Builder.setInstrAndDebugLoc(MI); // Fold (urem x, pow2) -> (and x, pow2-1) auto NegOne = Builder.buildConstant(Ty, -1); @@ -3497,8 +3639,6 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI, /// to fold. void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOperand) { - Builder.setInstrAndDebugLoc(MI); - Register Dst = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -4019,7 +4159,6 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - Builder.setInstrAndDebugLoc(MI); if (ScalarTy != DstTy) { assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits()); Builder.buildTrunc(DstReg, Reg); @@ -4085,14 +4224,12 @@ void CombinerHelper::applyExtractAllEltsFromBuildVector( void CombinerHelper::applyBuildFn( MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { - Builder.setInstrAndDebugLoc(MI); - MatchInfo(Builder); + applyBuildFnNoErase(MI, MatchInfo); MI.eraseFromParent(); } void CombinerHelper::applyBuildFnNoErase( MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { - Builder.setInstrAndDebugLoc(MI); MatchInfo(Builder); } @@ -4194,7 +4331,6 @@ void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) { MI.getOpcode() == TargetOpcode::G_ROTR); unsigned Bitsize = MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); - Builder.setInstrAndDebugLoc(MI); Register Amt = MI.getOperand(2).getReg(); LLT AmtTy = MRI.getType(Amt); auto Bits = Builder.buildConstant(AmtTy, Bitsize); @@ -4208,43 +4344,67 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_ICMP); auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); - auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg()); + + // We want to avoid calling KnownBits on the LHS if possible, as this combine + // has no filter and runs on every G_ICMP instruction. We can avoid calling + // KnownBits on the LHS in two cases: + // + // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown + // we cannot do any transforms so we can safely bail out early. + // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and + // >=0. auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg()); + if (KnownRHS.isUnknown()) + return false; + std::optional<bool> KnownVal; - switch (Pred) { - default: - llvm_unreachable("Unexpected G_ICMP predicate?"); - case CmpInst::ICMP_EQ: - KnownVal = KnownBits::eq(KnownLHS, KnownRHS); - break; - case CmpInst::ICMP_NE: - KnownVal = KnownBits::ne(KnownLHS, KnownRHS); - break; - case CmpInst::ICMP_SGE: - KnownVal = KnownBits::sge(KnownLHS, KnownRHS); - break; - case CmpInst::ICMP_SGT: - KnownVal = KnownBits::sgt(KnownLHS, KnownRHS); - break; - case CmpInst::ICMP_SLE: - KnownVal = KnownBits::sle(KnownLHS, KnownRHS); - break; - case CmpInst::ICMP_SLT: - KnownVal = KnownBits::slt(KnownLHS, KnownRHS); - break; - case CmpInst::ICMP_UGE: - KnownVal = KnownBits::uge(KnownLHS, KnownRHS); - break; - case CmpInst::ICMP_UGT: - KnownVal = KnownBits::ugt(KnownLHS, KnownRHS); - break; - case CmpInst::ICMP_ULE: - KnownVal = KnownBits::ule(KnownLHS, KnownRHS); - break; - case CmpInst::ICMP_ULT: - KnownVal = KnownBits::ult(KnownLHS, KnownRHS); - break; + if (KnownRHS.isZero()) { + // ? uge 0 -> always true + // ? ult 0 -> always false + if (Pred == CmpInst::ICMP_UGE) + KnownVal = true; + else if (Pred == CmpInst::ICMP_ULT) + KnownVal = false; } + + if (!KnownVal) { + auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg()); + switch (Pred) { + default: + llvm_unreachable("Unexpected G_ICMP predicate?"); + case CmpInst::ICMP_EQ: + KnownVal = KnownBits::eq(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_NE: + KnownVal = KnownBits::ne(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_SGE: + KnownVal = KnownBits::sge(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_SGT: + KnownVal = KnownBits::sgt(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_SLE: + KnownVal = KnownBits::sle(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_SLT: + KnownVal = KnownBits::slt(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_UGE: + KnownVal = KnownBits::uge(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_UGT: + KnownVal = KnownBits::ugt(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_ULE: + KnownVal = KnownBits::ule(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_ULT: + KnownVal = KnownBits::ult(KnownLHS, KnownRHS); + break; + } + } + if (!KnownVal) return false; MatchInfo = @@ -4364,19 +4524,21 @@ bool CombinerHelper::matchBitfieldExtractFromSExtInReg( } /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants. -bool CombinerHelper::matchBitfieldExtractFromAnd( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_AND); - Register Dst = MI.getOperand(0).getReg(); +bool CombinerHelper::matchBitfieldExtractFromAnd(MachineInstr &MI, + BuildFnTy &MatchInfo) { + GAnd *And = cast<GAnd>(&MI); + Register Dst = And->getReg(0); LLT Ty = MRI.getType(Dst); LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + // Note that isLegalOrBeforeLegalizer is stricter and does not take custom + // into account. if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}})) return false; int64_t AndImm, LSBImm; Register ShiftSrc; const unsigned Size = Ty.getScalarSizeInBits(); - if (!mi_match(MI.getOperand(0).getReg(), MRI, + if (!mi_match(And->getReg(0), MRI, m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))), m_ICst(AndImm)))) return false; @@ -4928,24 +5090,6 @@ bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { return true; } -bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { - // (G_*ADDO x, 0) -> x + no carry out - assert(MI.getOpcode() == TargetOpcode::G_UADDO || - MI.getOpcode() == TargetOpcode::G_SADDO); - if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0))) - return false; - Register Carry = MI.getOperand(1).getReg(); - if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry))) - return false; - Register Dst = MI.getOperand(0).getReg(); - Register LHS = MI.getOperand(2).getReg(); - MatchInfo = [=](MachineIRBuilder &B) { - B.buildCopy(Dst, LHS); - B.buildConstant(Carry, 0); - }; - return true; -} - bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) { // (G_*ADDE x, y, 0) -> (G_*ADDO x, y) // (G_*SUBE x, y, 0) -> (G_*SUBO x, y) @@ -5034,12 +5178,64 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { const unsigned EltBits = ScalarTy.getScalarSizeInBits(); LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType(); + auto &MIB = Builder; - MIB.setInstrAndDebugLoc(MI); + + bool UseSRL = false; + SmallVector<Register, 16> Shifts, Factors; + auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI)); + bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value(); + + auto BuildExactUDIVPattern = [&](const Constant *C) { + // Don't recompute inverses for each splat element. + if (IsSplat && !Factors.empty()) { + Shifts.push_back(Shifts[0]); + Factors.push_back(Factors[0]); + return true; + } + + auto *CI = cast<ConstantInt>(C); + APInt Divisor = CI->getValue(); + unsigned Shift = Divisor.countr_zero(); + if (Shift) { + Divisor.lshrInPlace(Shift); + UseSRL = true; + } + + // Calculate the multiplicative inverse modulo BW. + APInt Factor = Divisor.multiplicativeInverse(); + Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0)); + Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0)); + return true; + }; + + if (MI.getFlag(MachineInstr::MIFlag::IsExact)) { + // Collect all magic values from the build vector. + if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern)) + llvm_unreachable("Expected unary predicate match to succeed"); + + Register Shift, Factor; + if (Ty.isVector()) { + Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0); + Factor = MIB.buildBuildVector(Ty, Factors).getReg(0); + } else { + Shift = Shifts[0]; + Factor = Factors[0]; + } + + Register Res = LHS; + + if (UseSRL) + Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0); + + return MIB.buildMul(Ty, Res, Factor); + } + + unsigned KnownLeadingZeros = + KB ? KB->getKnownBits(LHS).countMinLeadingZeros() : 0; bool UseNPQ = false; SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; - auto BuildUDIVPattern = [&](const Constant *C) { auto *CI = cast<ConstantInt>(C); const APInt &Divisor = CI->getValue(); @@ -5052,8 +5248,12 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { // at the end. // TODO: Use undef values for divisor of 1. if (!Divisor.isOne()) { + + // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros + // in the dividend exceeds the leading zeros for the divisor. UnsignedDivisionByConstantInfo magics = - UnsignedDivisionByConstantInfo::get(Divisor); + UnsignedDivisionByConstantInfo::get( + Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero())); Magic = std::move(magics.Magic); @@ -5133,9 +5333,6 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register RHS = MI.getOperand(2).getReg(); LLT DstTy = MRI.getType(Dst); - auto *RHSDef = MRI.getVRegDef(RHS); - if (!isConstantOrConstantVector(*RHSDef, MRI)) - return false; auto &MF = *MI.getMF(); AttributeList Attr = MF.getFunction().getAttributes(); @@ -5150,6 +5347,15 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) { if (MF.getFunction().hasMinSize()) return false; + if (MI.getFlag(MachineInstr::MIFlag::IsExact)) { + return matchUnaryPredicate( + MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); }); + } + + auto *RHSDef = MRI.getVRegDef(RHS); + if (!isConstantOrConstantVector(*RHSDef, MRI)) + return false; + // Don't do this if the types are not going to be legal. if (LI) { if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}})) @@ -5163,12 +5369,8 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) { return false; } - auto CheckEltValue = [&](const Constant *C) { - if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) - return !CI->isZero(); - return false; - }; - return matchUnaryPredicate(MRI, RHS, CheckEltValue); + return matchUnaryPredicate( + MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); }); } void CombinerHelper::applyUDivByConst(MachineInstr &MI) { @@ -5198,7 +5400,7 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) { // If the sdiv has an 'exact' flag we can use a simpler lowering. if (MI.getFlag(MachineInstr::MIFlag::IsExact)) { return matchUnaryPredicate( - MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); }); + MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); }); } // Don't support the general case for now. @@ -5221,7 +5423,6 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType(); auto &MIB = Builder; - MIB.setInstrAndDebugLoc(MI); bool UseSRA = false; SmallVector<Register, 16> Shifts, Factors; @@ -5247,10 +5448,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { // Calculate the multiplicative inverse modulo BW. // 2^W requires W + 1 bits, so we have to extend and then truncate. - unsigned W = Divisor.getBitWidth(); - APInt Factor = Divisor.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); + APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0)); Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0)); return true; @@ -5278,6 +5476,93 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { return MIB.buildMul(Ty, Res, Factor); } +bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) { + assert((MI.getOpcode() == TargetOpcode::G_SDIV || + MI.getOpcode() == TargetOpcode::G_UDIV) && + "Expected SDIV or UDIV"); + auto &Div = cast<GenericMachineInstr>(MI); + Register RHS = Div.getReg(2); + auto MatchPow2 = [&](const Constant *C) { + auto *CI = dyn_cast<ConstantInt>(C); + return CI && (CI->getValue().isPowerOf2() || + (IsSigned && CI->getValue().isNegatedPowerOf2())); + }; + return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false); +} + +void CombinerHelper::applySDivByPow2(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV"); + auto &SDiv = cast<GenericMachineInstr>(MI); + Register Dst = SDiv.getReg(0); + Register LHS = SDiv.getReg(1); + Register RHS = SDiv.getReg(2); + LLT Ty = MRI.getType(Dst); + LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + LLT CCVT = + Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1); + + // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2, + // to the following version: + // + // %c1 = G_CTTZ %rhs + // %inexact = G_SUB $bitwidth, %c1 + // %sign = %G_ASHR %lhs, $(bitwidth - 1) + // %lshr = G_LSHR %sign, %inexact + // %add = G_ADD %lhs, %lshr + // %ashr = G_ASHR %add, %c1 + // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr + // %zero = G_CONSTANT $0 + // %neg = G_NEG %ashr + // %isneg = G_ICMP SLT %rhs, %zero + // %res = G_SELECT %isneg, %neg, %ashr + + unsigned BitWidth = Ty.getScalarSizeInBits(); + auto Zero = Builder.buildConstant(Ty, 0); + + auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth); + auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS); + auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1); + // Splat the sign bit into the register + auto Sign = Builder.buildAShr( + Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1)); + + // Add (LHS < 0) ? abs2 - 1 : 0; + auto LSrl = Builder.buildLShr(Ty, Sign, Inexact); + auto Add = Builder.buildAdd(Ty, LHS, LSrl); + auto AShr = Builder.buildAShr(Ty, Add, C1); + + // Special case: (sdiv X, 1) -> X + // Special Case: (sdiv X, -1) -> 0-X + auto One = Builder.buildConstant(Ty, 1); + auto MinusOne = Builder.buildConstant(Ty, -1); + auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One); + auto IsMinusOne = + Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne); + auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne); + AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr); + + // If divided by a positive value, we're done. Otherwise, the result must be + // negated. + auto Neg = Builder.buildNeg(Ty, AShr); + auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero); + Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr); + MI.eraseFromParent(); +} + +void CombinerHelper::applyUDivByPow2(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV"); + auto &UDiv = cast<GenericMachineInstr>(MI); + Register Dst = UDiv.getReg(0); + Register LHS = UDiv.getReg(1); + Register RHS = UDiv.getReg(2); + LLT Ty = MRI.getType(Dst); + LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + + auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS); + Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1); + MI.eraseFromParent(); +} + bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UMULH); Register RHS = MI.getOperand(2).getReg(); @@ -5302,7 +5587,6 @@ void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) { LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); unsigned NumEltBits = Ty.getScalarSizeInBits(); - Builder.setInstrAndDebugLoc(MI); auto LogBase2 = buildLogBase2(RHS, Builder); auto ShiftAmt = Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2); @@ -5382,7 +5666,6 @@ bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) { } void CombinerHelper::applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) { - Builder.setInstrAndDebugLoc(MI); Register Dst = MI.getOperand(0).getReg(); Builder.buildFNeg( Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0)); @@ -6235,16 +6518,30 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) { } bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) { - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); - auto *LHSDef = MRI.getVRegDef(LHS); - if (getIConstantVRegVal(LHS, MRI).has_value()) - return true; - - // LHS may be a G_CONSTANT_FOLD_BARRIER. If so we commute - // as long as we don't already have a constant on the RHS. - if (LHSDef->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER) - return false; + unsigned LHSOpndIdx = 1; + unsigned RHSOpndIdx = 2; + switch (MI.getOpcode()) { + case TargetOpcode::G_UADDO: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UMULO: + case TargetOpcode::G_SMULO: + LHSOpndIdx = 2; + RHSOpndIdx = 3; + break; + default: + break; + } + Register LHS = MI.getOperand(LHSOpndIdx).getReg(); + Register RHS = MI.getOperand(RHSOpndIdx).getReg(); + if (!getIConstantVRegVal(LHS, MRI)) { + // Skip commuting if LHS is not a constant. But, LHS may be a + // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already + // have a constant on the RHS. + if (MRI.getVRegDef(LHS)->getOpcode() != + TargetOpcode::G_CONSTANT_FOLD_BARRIER) + return false; + } + // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER. return MRI.getVRegDef(RHS)->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER && !getIConstantVRegVal(RHS, MRI); @@ -6261,10 +6558,23 @@ bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) { void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) { Observer.changingInstr(MI); - Register LHSReg = MI.getOperand(1).getReg(); - Register RHSReg = MI.getOperand(2).getReg(); - MI.getOperand(1).setReg(RHSReg); - MI.getOperand(2).setReg(LHSReg); + unsigned LHSOpndIdx = 1; + unsigned RHSOpndIdx = 2; + switch (MI.getOpcode()) { + case TargetOpcode::G_UADDO: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UMULO: + case TargetOpcode::G_SMULO: + LHSOpndIdx = 2; + RHSOpndIdx = 3; + break; + default: + break; + } + Register LHSReg = MI.getOperand(LHSOpndIdx).getReg(); + Register RHSReg = MI.getOperand(RHSOpndIdx).getReg(); + MI.getOperand(LHSOpndIdx).setReg(RHSReg); + MI.getOperand(RHSOpndIdx).setReg(LHSReg); Observer.changedInstr(MI); } @@ -6346,6 +6656,26 @@ CombinerHelper::getConstantOrConstantSplatVector(Register Src) { return Value; } +// FIXME G_SPLAT_VECTOR +bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const { + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + if (IConstant) + return true; + + GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI); + if (!BuildVector) + return false; + + unsigned NumSources = BuildVector->getNumSources(); + for (unsigned I = 0; I < NumSources; ++I) { + std::optional<ValueAndVReg> IConstant = + getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI); + if (!IConstant) + return false; + } + return true; +} + // TODO: use knownbits to determine zeros bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo) { @@ -6361,6 +6691,9 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, if (CondTy != LLT::scalar(1)) return false; + if (TrueTy.isPointer()) + return false; + // Both are scalars. std::optional<ValueAndVReg> TrueOpt = getIConstantVRegValWithLookThrough(True, MRI); @@ -6503,7 +6836,8 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, B.setInstrAndDebugLoc(*Select); Register Ext = MRI.createGenericVirtualRegister(TrueTy); B.buildZExtOrTrunc(Ext, Cond); - B.buildOr(DstReg, Ext, False, Flags); + auto FreezeFalse = B.buildFreeze(TrueTy, False); + B.buildOr(DstReg, Ext, FreezeFalse, Flags); }; return true; } @@ -6515,7 +6849,8 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, B.setInstrAndDebugLoc(*Select); Register Ext = MRI.createGenericVirtualRegister(TrueTy); B.buildZExtOrTrunc(Ext, Cond); - B.buildAnd(DstReg, Ext, True); + auto FreezeTrue = B.buildFreeze(TrueTy, True); + B.buildAnd(DstReg, Ext, FreezeTrue); }; return true; } @@ -6530,7 +6865,8 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, // Then an ext to match the destination register. Register Ext = MRI.createGenericVirtualRegister(TrueTy); B.buildZExtOrTrunc(Ext, Inner); - B.buildOr(DstReg, Ext, True, Flags); + auto FreezeTrue = B.buildFreeze(TrueTy, True); + B.buildOr(DstReg, Ext, FreezeTrue, Flags); }; return true; } @@ -6545,7 +6881,8 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, // Then an ext to match the destination register. Register Ext = MRI.createGenericVirtualRegister(TrueTy); B.buildZExtOrTrunc(Ext, Inner); - B.buildAnd(DstReg, Ext, False); + auto FreezeFalse = B.buildFreeze(TrueTy, False); + B.buildAnd(DstReg, Ext, FreezeFalse); }; return true; } @@ -6553,10 +6890,12 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, return false; } -bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select, - BuildFnTy &MatchInfo) { +bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg())); + GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg())); + Register DstReg = Select->getReg(0); - Register Cond = Select->getCondReg(); Register True = Select->getTrueReg(); Register False = Select->getFalseReg(); LLT DstTy = MRI.getType(DstReg); @@ -6564,11 +6903,6 @@ bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select, if (DstTy.isPointer()) return false; - // We need an G_ICMP on the condition register. - GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI); - if (!Cmp) - return false; - // We want to fold the icmp and replace the select. if (!MRI.hasOneNonDBGUse(Cmp->getReg(0))) return false; @@ -6591,63 +6925,624 @@ bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select, // (icmp X, Y) ? X : Y -> integer minmax. // see matchSelectPattern in ValueTracking. // Legality between G_SELECT and integer minmax can differ. - if (True == CmpLHS && False == CmpRHS) { - switch (Pred) { - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: { - if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy})) - return false; - MatchInfo = [=](MachineIRBuilder &B) { - B.buildUMax(DstReg, True, False); - }; - return true; + if (True != CmpLHS || False != CmpRHS) + return false; + + switch (Pred) { + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); }; + return true; + } + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); }; + return true; + } + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); }; + return true; + } + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); }; + return true; + } + default: + return false; + } +} + +bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { + GSelect *Select = cast<GSelect>(&MI); + + if (tryFoldSelectOfConstants(Select, MatchInfo)) + return true; + + if (tryFoldBoolSelectToLogic(Select, MatchInfo)) + return true; + + return false; +} + +/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2) +/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2) +/// into a single comparison using range-based reasoning. +/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges. +bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, + BuildFnTy &MatchInfo) { + assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor"); + bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND; + Register DstReg = Logic->getReg(0); + Register LHS = Logic->getLHSReg(); + Register RHS = Logic->getRHSReg(); + unsigned Flags = Logic->getFlags(); + + // We need an G_ICMP on the LHS register. + GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI); + if (!Cmp1) + return false; + + // We need an G_ICMP on the RHS register. + GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI); + if (!Cmp2) + return false; + + // We want to fold the icmps. + if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) || + !MRI.hasOneNonDBGUse(Cmp2->getReg(0))) + return false; + + APInt C1; + APInt C2; + std::optional<ValueAndVReg> MaybeC1 = + getIConstantVRegValWithLookThrough(Cmp1->getRHSReg(), MRI); + if (!MaybeC1) + return false; + C1 = MaybeC1->Value; + + std::optional<ValueAndVReg> MaybeC2 = + getIConstantVRegValWithLookThrough(Cmp2->getRHSReg(), MRI); + if (!MaybeC2) + return false; + C2 = MaybeC2->Value; + + Register R1 = Cmp1->getLHSReg(); + Register R2 = Cmp2->getLHSReg(); + CmpInst::Predicate Pred1 = Cmp1->getCond(); + CmpInst::Predicate Pred2 = Cmp2->getCond(); + LLT CmpTy = MRI.getType(Cmp1->getReg(0)); + LLT CmpOperandTy = MRI.getType(R1); + + if (CmpOperandTy.isPointer()) + return false; + + // We build ands, adds, and constants of type CmpOperandTy. + // They must be legal to build. + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) || + !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) || + !isConstantLegalOrBeforeLegalizer(CmpOperandTy)) + return false; + + // Look through add of a constant offset on R1, R2, or both operands. This + // allows us to interpret the R + C' < C'' range idiom into a proper range. + std::optional<APInt> Offset1; + std::optional<APInt> Offset2; + if (R1 != R2) { + if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) { + std::optional<ValueAndVReg> MaybeOffset1 = + getIConstantVRegValWithLookThrough(Add->getRHSReg(), MRI); + if (MaybeOffset1) { + R1 = Add->getLHSReg(); + Offset1 = MaybeOffset1->Value; + } } - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: { - if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy})) - return false; + if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) { + std::optional<ValueAndVReg> MaybeOffset2 = + getIConstantVRegValWithLookThrough(Add->getRHSReg(), MRI); + if (MaybeOffset2) { + R2 = Add->getLHSReg(); + Offset2 = MaybeOffset2->Value; + } + } + } + + if (R1 != R2) + return false; + + // We calculate the icmp ranges including maybe offsets. + ConstantRange CR1 = ConstantRange::makeExactICmpRegion( + IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1); + if (Offset1) + CR1 = CR1.subtract(*Offset1); + + ConstantRange CR2 = ConstantRange::makeExactICmpRegion( + IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2); + if (Offset2) + CR2 = CR2.subtract(*Offset2); + + bool CreateMask = false; + APInt LowerDiff; + std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2); + if (!CR) { + // We need non-wrapping ranges. + if (CR1.isWrappedSet() || CR2.isWrappedSet()) + return false; + + // Check whether we have equal-size ranges that only differ by one bit. + // In that case we can apply a mask to map one range onto the other. + LowerDiff = CR1.getLower() ^ CR2.getLower(); + APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1); + APInt CR1Size = CR1.getUpper() - CR1.getLower(); + if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff || + CR1Size != CR2.getUpper() - CR2.getLower()) + return false; + + CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2; + CreateMask = true; + } + + if (IsAnd) + CR = CR->inverse(); + + CmpInst::Predicate NewPred; + APInt NewC, Offset; + CR->getEquivalentICmp(NewPred, NewC, Offset); + + // We take the result type of one of the original icmps, CmpTy, for + // the to be build icmp. The operand type, CmpOperandTy, is used for + // the other instructions and constants to be build. The types of + // the parameters and output are the same for add and and. CmpTy + // and the type of DstReg might differ. That is why we zext or trunc + // the icmp into the destination register. + + MatchInfo = [=](MachineIRBuilder &B) { + if (CreateMask && Offset != 0) { + auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff); + auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask. + auto OffsetC = B.buildConstant(CmpOperandTy, Offset); + auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags); + auto NewCon = B.buildConstant(CmpOperandTy, NewC); + auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon); + B.buildZExtOrTrunc(DstReg, ICmp); + } else if (CreateMask && Offset == 0) { + auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff); + auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask. + auto NewCon = B.buildConstant(CmpOperandTy, NewC); + auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon); + B.buildZExtOrTrunc(DstReg, ICmp); + } else if (!CreateMask && Offset != 0) { + auto OffsetC = B.buildConstant(CmpOperandTy, Offset); + auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags); + auto NewCon = B.buildConstant(CmpOperandTy, NewC); + auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon); + B.buildZExtOrTrunc(DstReg, ICmp); + } else if (!CreateMask && Offset == 0) { + auto NewCon = B.buildConstant(CmpOperandTy, NewC); + auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon); + B.buildZExtOrTrunc(DstReg, ICmp); + } else { + llvm_unreachable("unexpected configuration of CreateMask and Offset"); + } + }; + return true; +} + +bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic, + BuildFnTy &MatchInfo) { + assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor"); + Register DestReg = Logic->getReg(0); + Register LHS = Logic->getLHSReg(); + Register RHS = Logic->getRHSReg(); + bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND; + + // We need a compare on the LHS register. + GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI); + if (!Cmp1) + return false; + + // We need a compare on the RHS register. + GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI); + if (!Cmp2) + return false; + + LLT CmpTy = MRI.getType(Cmp1->getReg(0)); + LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg()); + + // We build one fcmp, want to fold the fcmps, replace the logic op, + // and the fcmps must have the same shape. + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) || + !MRI.hasOneNonDBGUse(Logic->getReg(0)) || + !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) || + !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) || + MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg())) + return false; + + CmpInst::Predicate PredL = Cmp1->getCond(); + CmpInst::Predicate PredR = Cmp2->getCond(); + Register LHS0 = Cmp1->getLHSReg(); + Register LHS1 = Cmp1->getRHSReg(); + Register RHS0 = Cmp2->getLHSReg(); + Register RHS1 = Cmp2->getRHSReg(); + + if (LHS0 == RHS1 && LHS1 == RHS0) { + // Swap RHS operands to match LHS. + PredR = CmpInst::getSwappedPredicate(PredR); + std::swap(RHS0, RHS1); + } + + if (LHS0 == RHS0 && LHS1 == RHS1) { + // We determine the new predicate. + unsigned CmpCodeL = getFCmpCode(PredL); + unsigned CmpCodeR = getFCmpCode(PredR); + unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR; + unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags(); + MatchInfo = [=](MachineIRBuilder &B) { + // The fcmp predicates fill the lower part of the enum. + FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred); + if (Pred == FCmpInst::FCMP_FALSE && + isConstantLegalOrBeforeLegalizer(CmpTy)) { + auto False = B.buildConstant(CmpTy, 0); + B.buildZExtOrTrunc(DestReg, False); + } else if (Pred == FCmpInst::FCMP_TRUE && + isConstantLegalOrBeforeLegalizer(CmpTy)) { + auto True = + B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(), + CmpTy.isVector() /*isVector*/, + true /*isFP*/)); + B.buildZExtOrTrunc(DestReg, True); + } else { // We take the predicate without predicate optimizations. + auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags); + B.buildZExtOrTrunc(DestReg, Cmp); + } + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) { + GAnd *And = cast<GAnd>(&MI); + + if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo)) + return true; + + if (tryFoldLogicOfFCmps(And, MatchInfo)) + return true; + + return false; +} + +bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) { + GOr *Or = cast<GOr>(&MI); + + if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo)) + return true; + + if (tryFoldLogicOfFCmps(Or, MatchInfo)) + return true; + + return false; +} + +bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) { + GAddCarryOut *Add = cast<GAddCarryOut>(&MI); + + // Addo has no flags + Register Dst = Add->getReg(0); + Register Carry = Add->getReg(1); + Register LHS = Add->getLHSReg(); + Register RHS = Add->getRHSReg(); + bool IsSigned = Add->isSigned(); + LLT DstTy = MRI.getType(Dst); + LLT CarryTy = MRI.getType(Carry); + + // Fold addo, if the carry is dead -> add, undef. + if (MRI.use_nodbg_empty(Carry) && + isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildAdd(Dst, LHS, RHS); + B.buildUndef(Carry); + }; + return true; + } + + // Canonicalize constant to RHS. + if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) { + if (IsSigned) { MatchInfo = [=](MachineIRBuilder &B) { - B.buildSMax(DstReg, True, False); + B.buildSAddo(Dst, Carry, RHS, LHS); }; return true; } - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: { - if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy})) - return false; + // !IsSigned + MatchInfo = [=](MachineIRBuilder &B) { + B.buildUAddo(Dst, Carry, RHS, LHS); + }; + return true; + } + + std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS); + std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS); + + // Fold addo(c1, c2) -> c3, carry. + if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) && + isConstantLegalOrBeforeLegalizer(CarryTy)) { + bool Overflow; + APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow) + : MaybeLHS->uadd_ov(*MaybeRHS, Overflow); + MatchInfo = [=](MachineIRBuilder &B) { + B.buildConstant(Dst, Result); + B.buildConstant(Carry, Overflow); + }; + return true; + } + + // Fold (addo x, 0) -> x, no carry + if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildCopy(Dst, LHS); + B.buildConstant(Carry, 0); + }; + return true; + } + + // Given 2 constant operands whose sum does not overflow: + // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1 + // saddo (X +nsw C0), C1 -> saddo X, C0 + C1 + GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI); + if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) && + ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) || + (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) { + std::optional<APInt> MaybeAddRHS = + getConstantOrConstantSplatVector(AddLHS->getRHSReg()); + if (MaybeAddRHS) { + bool Overflow; + APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow) + : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow); + if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) { + if (IsSigned) { + MatchInfo = [=](MachineIRBuilder &B) { + auto ConstRHS = B.buildConstant(DstTy, NewC); + B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS); + }; + return true; + } + // !IsSigned + MatchInfo = [=](MachineIRBuilder &B) { + auto ConstRHS = B.buildConstant(DstTy, NewC); + B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS); + }; + return true; + } + } + }; + + // We try to combine addo to non-overflowing add. + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) || + !isConstantLegalOrBeforeLegalizer(CarryTy)) + return false; + + // We try to combine uaddo to non-overflowing add. + if (!IsSigned) { + ConstantRange CRLHS = + ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false); + ConstantRange CRRHS = + ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false); + + switch (CRLHS.unsignedAddMayOverflow(CRRHS)) { + case ConstantRange::OverflowResult::MayOverflow: + return false; + case ConstantRange::OverflowResult::NeverOverflows: { MatchInfo = [=](MachineIRBuilder &B) { - B.buildUMin(DstReg, True, False); + B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap); + B.buildConstant(Carry, 0); }; return true; } - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: { - if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy})) - return false; + case ConstantRange::OverflowResult::AlwaysOverflowsLow: + case ConstantRange::OverflowResult::AlwaysOverflowsHigh: { MatchInfo = [=](MachineIRBuilder &B) { - B.buildSMin(DstReg, True, False); + B.buildAdd(Dst, LHS, RHS); + B.buildConstant(Carry, 1); }; return true; } - default: - return false; } + return false; + } + + // We try to combine saddo to non-overflowing add. + + // If LHS and RHS each have at least two sign bits, then there is no signed + // overflow. + if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap); + B.buildConstant(Carry, 0); + }; + return true; + } + + ConstantRange CRLHS = + ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true); + ConstantRange CRRHS = + ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true); + + switch (CRLHS.signedAddMayOverflow(CRRHS)) { + case ConstantRange::OverflowResult::MayOverflow: + return false; + case ConstantRange::OverflowResult::NeverOverflows: { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap); + B.buildConstant(Carry, 0); + }; + return true; + } + case ConstantRange::OverflowResult::AlwaysOverflowsLow: + case ConstantRange::OverflowResult::AlwaysOverflowsHigh: { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildAdd(Dst, LHS, RHS); + B.buildConstant(Carry, 1); + }; + return true; + } + } + + return false; +} + +void CombinerHelper::applyBuildFnMO(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); + MatchInfo(Builder); + Root->eraseFromParent(); +} + +bool CombinerHelper::matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) { + bool OptForSize = MI.getMF()->getFunction().hasOptSize(); + return getTargetLowering().isBeneficialToExpandPowI(Exponent, OptForSize); +} + +void CombinerHelper::applyExpandFPowI(MachineInstr &MI, int64_t Exponent) { + auto [Dst, Base] = MI.getFirst2Regs(); + LLT Ty = MRI.getType(Dst); + int64_t ExpVal = Exponent; + + if (ExpVal == 0) { + Builder.buildFConstant(Dst, 1.0); + MI.removeFromParent(); + return; + } + + if (ExpVal < 0) + ExpVal = -ExpVal; + + // We use the simple binary decomposition method from SelectionDAG ExpandPowI + // to generate the multiply sequence. There are more optimal ways to do this + // (for example, powi(x,15) generates one more multiply than it should), but + // this has the benefit of being both really simple and much better than a + // libcall. + std::optional<SrcOp> Res; + SrcOp CurSquare = Base; + while (ExpVal > 0) { + if (ExpVal & 1) { + if (!Res) + Res = CurSquare; + else + Res = Builder.buildFMul(Ty, *Res, CurSquare); + } + + CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare); + ExpVal >>= 1; + } + + // If the original exponent was negative, invert the result, producing + // 1/(x*x*x). + if (Exponent < 0) + Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res, + MI.getFlags()); + + Builder.buildCopy(Dst, *Res); + MI.eraseFromParent(); +} + +bool CombinerHelper::matchSextOfTrunc(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI)); + GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Sext->getSrcReg(), MRI)); + + Register Dst = Sext->getReg(0); + Register Src = Trunc->getSrcReg(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (DstTy == SrcTy) { + MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); }; + return true; + } + + if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() && + isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoSWrap); + }; + return true; + } + + if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() && + isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); }; + return true; } return false; } -bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { - GSelect *Select = cast<GSelect>(&MI); +bool CombinerHelper::matchZextOfTrunc(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + GZext *Zext = cast<GZext>(getDefIgnoringCopies(MO.getReg(), MRI)); + GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Zext->getSrcReg(), MRI)); - if (tryFoldSelectOfConstants(Select, MatchInfo)) + Register Dst = Zext->getReg(0); + Register Src = Trunc->getSrcReg(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (DstTy == SrcTy) { + MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); }; return true; + } - if (tryFoldBoolSelectToLogic(Select, MatchInfo)) + if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() && + isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoUWrap); + }; + return true; + } + + if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() && + isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildZExt(Dst, Src, MachineInstr::MIFlag::NonNeg); + }; return true; + } + + return false; +} - if (tryFoldSelectToIntMinMax(Select, MatchInfo)) +bool CombinerHelper::matchNonNegZext(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + GZext *Zext = cast<GZext>(MRI.getVRegDef(MO.getReg())); + + Register Dst = Zext->getReg(0); + Register Src = Zext->getSrcReg(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + const auto &TLI = getTargetLowering(); + + // Convert zext nneg to sext if sext is the preferred form for the target. + if (isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}}) && + TLI.isSExtCheaperThanZExt(getMVTForLLT(SrcTy), getMVTForLLT(DstTy))) { + MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); }; return true; + } return false; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp new file mode 100644 index 000000000000..66b1c5f8ca82 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp @@ -0,0 +1,486 @@ +//===- CombinerHelperVectorOps.cpp-----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT, +// G_INSERT_VECTOR_ELT, and G_VSCALE +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/Support/Casting.h" +#include <optional> + +#define DEBUG_TYPE "gi-combiner" + +using namespace llvm; +using namespace MIPatternMatch; + +bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI, + BuildFnTy &MatchInfo) { + GExtractVectorElement *Extract = cast<GExtractVectorElement>(&MI); + + Register Dst = Extract->getReg(0); + Register Vector = Extract->getVectorReg(); + Register Index = Extract->getIndexReg(); + LLT DstTy = MRI.getType(Dst); + LLT VectorTy = MRI.getType(Vector); + + // The vector register can be def'd by various ops that have vector as its + // type. They can all be used for constant folding, scalarizing, + // canonicalization, or combining based on symmetry. + // + // vector like ops + // * build vector + // * build vector trunc + // * shuffle vector + // * splat vector + // * concat vectors + // * insert/extract vector element + // * insert/extract subvector + // * vector loads + // * scalable vector loads + // + // compute like ops + // * binary ops + // * unary ops + // * exts and truncs + // * casts + // * fneg + // * select + // * phis + // * cmps + // * freeze + // * bitcast + // * undef + + // We try to get the value of the Index register. + std::optional<ValueAndVReg> MaybeIndex = + getIConstantVRegValWithLookThrough(Index, MRI); + std::optional<APInt> IndexC = std::nullopt; + + if (MaybeIndex) + IndexC = MaybeIndex->Value; + + // Fold extractVectorElement(Vector, TOOLARGE) -> undef + if (IndexC && VectorTy.isFixedVector() && + IndexC->uge(VectorTy.getNumElements()) && + isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) { + // For fixed-length vectors, it's invalid to extract out-of-range elements. + MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); }; + return true; + } + + return false; +} + +bool CombinerHelper::matchExtractVectorElementWithDifferentIndices( + const MachineOperand &MO, BuildFnTy &MatchInfo) { + MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); + GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root); + + // + // %idx1:_(s64) = G_CONSTANT i64 1 + // %idx2:_(s64) = G_CONSTANT i64 2 + // %insert:_(<2 x s32>) = G_INSERT_VECTOR_ELT_ELT %bv(<2 x s32>), + // %value(s32), %idx2(s64) %extract:_(s32) = G_EXTRACT_VECTOR_ELT %insert(<2 + // x s32>), %idx1(s64) + // + // --> + // + // %insert:_(<2 x s32>) = G_INSERT_VECTOR_ELT_ELT %bv(<2 x s32>), + // %value(s32), %idx2(s64) %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x + // s32>), %idx1(s64) + // + // + + Register Index = Extract->getIndexReg(); + + // We try to get the value of the Index register. + std::optional<ValueAndVReg> MaybeIndex = + getIConstantVRegValWithLookThrough(Index, MRI); + std::optional<APInt> IndexC = std::nullopt; + + if (!MaybeIndex) + return false; + else + IndexC = MaybeIndex->Value; + + Register Vector = Extract->getVectorReg(); + + GInsertVectorElement *Insert = + getOpcodeDef<GInsertVectorElement>(Vector, MRI); + if (!Insert) + return false; + + Register Dst = Extract->getReg(0); + + std::optional<ValueAndVReg> MaybeInsertIndex = + getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI); + + if (MaybeInsertIndex && MaybeInsertIndex->Value != *IndexC) { + // There is no one-use check. We have to keep the insert. When both Index + // registers are constants and not equal, we can look into the Vector + // register of the insert. + MatchInfo = [=](MachineIRBuilder &B) { + B.buildExtractVectorElement(Dst, Insert->getVectorReg(), Index); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchExtractVectorElementWithBuildVector( + const MachineOperand &MO, BuildFnTy &MatchInfo) { + MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); + GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root); + + // + // %zero:_(s64) = G_CONSTANT i64 0 + // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s64) + // + // --> + // + // %extract:_(32) = COPY %arg1(s32) + // + // + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // + // --> + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // + + Register Vector = Extract->getVectorReg(); + + // We expect a buildVector on the Vector register. + GBuildVector *Build = getOpcodeDef<GBuildVector>(Vector, MRI); + if (!Build) + return false; + + LLT VectorTy = MRI.getType(Vector); + + // There is a one-use check. There are more combines on build vectors. + EVT Ty(getMVTForLLT(VectorTy)); + if (!MRI.hasOneNonDBGUse(Build->getReg(0)) || + !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) + return false; + + Register Index = Extract->getIndexReg(); + + // If the Index is constant, then we can extract the element from the given + // offset. + std::optional<ValueAndVReg> MaybeIndex = + getIConstantVRegValWithLookThrough(Index, MRI); + if (!MaybeIndex) + return false; + + // We now know that there is a buildVector def'd on the Vector register and + // the index is const. The combine will succeed. + + Register Dst = Extract->getReg(0); + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildCopy(Dst, Build->getSourceReg(MaybeIndex->Value.getZExtValue())); + }; + + return true; +} + +bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc( + const MachineOperand &MO, BuildFnTy &MatchInfo) { + MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); + GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root); + + // + // %zero:_(s64) = G_CONSTANT i64 0 + // %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s64) + // + // --> + // + // %extract:_(32) = G_TRUNC %arg1(s64) + // + // + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // + // --> + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // + + Register Vector = Extract->getVectorReg(); + + // We expect a buildVectorTrunc on the Vector register. + GBuildVectorTrunc *Build = getOpcodeDef<GBuildVectorTrunc>(Vector, MRI); + if (!Build) + return false; + + LLT VectorTy = MRI.getType(Vector); + + // There is a one-use check. There are more combines on build vectors. + EVT Ty(getMVTForLLT(VectorTy)); + if (!MRI.hasOneNonDBGUse(Build->getReg(0)) || + !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) + return false; + + Register Index = Extract->getIndexReg(); + + // If the Index is constant, then we can extract the element from the given + // offset. + std::optional<ValueAndVReg> MaybeIndex = + getIConstantVRegValWithLookThrough(Index, MRI); + if (!MaybeIndex) + return false; + + // We now know that there is a buildVectorTrunc def'd on the Vector register + // and the index is const. The combine will succeed. + + Register Dst = Extract->getReg(0); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Build->getSourceReg(0)); + + // For buildVectorTrunc, the inputs are truncated. + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildTrunc(Dst, Build->getSourceReg(MaybeIndex->Value.getZExtValue())); + }; + + return true; +} + +bool CombinerHelper::matchExtractVectorElementWithShuffleVector( + const MachineOperand &MO, BuildFnTy &MatchInfo) { + GExtractVectorElement *Extract = + cast<GExtractVectorElement>(getDefIgnoringCopies(MO.getReg(), MRI)); + + // + // %zero:_(s64) = G_CONSTANT i64 0 + // %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>), + // shufflemask(0, 0, 0, 0) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %zero(s64) + // + // --> + // + // %zero1:_(s64) = G_CONSTANT i64 0 + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %arg1(<4 x s32>), %zero1(s64) + // + // + // + // + // %three:_(s64) = G_CONSTANT i64 3 + // %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>), + // shufflemask(0, 0, 0, -1) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %three(s64) + // + // --> + // + // %extract:_(s32) = G_IMPLICIT_DEF + // + // + // + // + // + // %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>), + // shufflemask(0, 0, 0, -1) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %opaque(s64) + // + // --> + // + // %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>), + // shufflemask(0, 0, 0, -1) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %opaque(s64) + // + + // We try to get the value of the Index register. + std::optional<ValueAndVReg> MaybeIndex = + getIConstantVRegValWithLookThrough(Extract->getIndexReg(), MRI); + if (!MaybeIndex) + return false; + + GShuffleVector *Shuffle = + cast<GShuffleVector>(getDefIgnoringCopies(Extract->getVectorReg(), MRI)); + + ArrayRef<int> Mask = Shuffle->getMask(); + + unsigned Offset = MaybeIndex->Value.getZExtValue(); + int SrcIdx = Mask[Offset]; + + LLT Src1Type = MRI.getType(Shuffle->getSrc1Reg()); + // At the IR level a <1 x ty> shuffle vector is valid, but we want to extract + // from a vector. + assert(Src1Type.isVector() && "expected to extract from a vector"); + unsigned LHSWidth = Src1Type.isVector() ? Src1Type.getNumElements() : 1; + + // Note that there is no one use check. + Register Dst = Extract->getReg(0); + LLT DstTy = MRI.getType(Dst); + + if (SrcIdx < 0 && + isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); }; + return true; + } + + // If the legality check failed, then we still have to abort. + if (SrcIdx < 0) + return false; + + Register NewVector; + + // We check in which vector and at what offset to look through. + if (SrcIdx < (int)LHSWidth) { + NewVector = Shuffle->getSrc1Reg(); + // SrcIdx unchanged + } else { // SrcIdx >= LHSWidth + NewVector = Shuffle->getSrc2Reg(); + SrcIdx -= LHSWidth; + } + + LLT IdxTy = MRI.getType(Extract->getIndexReg()); + LLT NewVectorTy = MRI.getType(NewVector); + + // We check the legality of the look through. + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_EXTRACT_VECTOR_ELT, {DstTy, NewVectorTy, IdxTy}}) || + !isConstantLegalOrBeforeLegalizer({IdxTy})) + return false; + + // We look through the shuffle vector. + MatchInfo = [=](MachineIRBuilder &B) { + auto Idx = B.buildConstant(IdxTy, SrcIdx); + B.buildExtractVectorElement(Dst, NewVector, Idx); + }; + + return true; +} + +bool CombinerHelper::matchInsertVectorElementOOB(MachineInstr &MI, + BuildFnTy &MatchInfo) { + GInsertVectorElement *Insert = cast<GInsertVectorElement>(&MI); + + Register Dst = Insert->getReg(0); + LLT DstTy = MRI.getType(Dst); + Register Index = Insert->getIndexReg(); + + if (!DstTy.isFixedVector()) + return false; + + std::optional<ValueAndVReg> MaybeIndex = + getIConstantVRegValWithLookThrough(Index, MRI); + + if (MaybeIndex && MaybeIndex->Value.uge(DstTy.getNumElements()) && + isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); }; + return true; + } + + return false; +} + +bool CombinerHelper::matchAddOfVScale(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + GAdd *Add = cast<GAdd>(MRI.getVRegDef(MO.getReg())); + GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getLHSReg())); + GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getRHSReg())); + + Register Dst = Add->getReg(0); + + if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)) || + !MRI.hasOneNonDBGUse(RHSVScale->getReg(0))) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildVScale(Dst, LHSVScale->getSrc() + RHSVScale->getSrc()); + }; + + return true; +} + +bool CombinerHelper::matchMulOfVScale(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + GMul *Mul = cast<GMul>(MRI.getVRegDef(MO.getReg())); + GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Mul->getLHSReg())); + + std::optional<APInt> MaybeRHS = getIConstantVRegVal(Mul->getRHSReg(), MRI); + if (!MaybeRHS) + return false; + + Register Dst = MO.getReg(); + + if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0))) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildVScale(Dst, LHSVScale->getSrc() * *MaybeRHS); + }; + + return true; +} + +bool CombinerHelper::matchSubOfVScale(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + GSub *Sub = cast<GSub>(MRI.getVRegDef(MO.getReg())); + GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Sub->getRHSReg())); + + Register Dst = MO.getReg(); + LLT DstTy = MRI.getType(Dst); + + if (!MRI.hasOneNonDBGUse(RHSVScale->getReg(0)) || + !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, DstTy})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + auto VScale = B.buildVScale(DstTy, -RHSVScale->getSrc()); + B.buildAdd(Dst, Sub->getLHSReg(), VScale, Sub->getFlags()); + }; + + return true; +} + +bool CombinerHelper::matchShlOfVScale(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + GShl *Shl = cast<GShl>(MRI.getVRegDef(MO.getReg())); + GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Shl->getSrcReg())); + + std::optional<APInt> MaybeRHS = getIConstantVRegVal(Shl->getShiftReg(), MRI); + if (!MaybeRHS) + return false; + + Register Dst = MO.getReg(); + LLT DstTy = MRI.getType(Dst); + + if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)) || + !isLegalOrBeforeLegalizer({TargetOpcode::G_VSCALE, DstTy})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildVScale(Dst, LHSVScale->getSrc().shl(*MaybeRHS)); + }; + + return true; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index ea8c20cdcd45..9558247db3c4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -13,11 +13,13 @@ #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" @@ -32,7 +34,7 @@ INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE, GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth) : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), - DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {} + DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {} Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { const MachineInstr *MI = MRI.getVRegDef(R); @@ -64,8 +66,11 @@ KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { KnownBits GISelKnownBits::getKnownBits(Register R) { const LLT Ty = MRI.getType(R); + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. APInt DemandedElts = - Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); + Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); return getKnownBits(R, DemandedElts); } @@ -253,10 +258,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, break; } case TargetOpcode::G_CONSTANT: { - auto CstVal = getIConstantVRegVal(R, MRI); - if (!CstVal) - break; - Known = KnownBits::makeConstant(*CstVal); + Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue()); break; } case TargetOpcode::G_FRAME_INDEX: { @@ -269,8 +271,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Depth + 1); computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, Depth + 1); - Known = KnownBits::computeForAddSub(/*Add*/ false, /*NSW*/ false, Known, - Known2); + Known = KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false, + /* NUW=*/false, Known, Known2); break; } case TargetOpcode::G_XOR: { @@ -296,8 +298,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Depth + 1); computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, Depth + 1); - Known = - KnownBits::computeForAddSub(/*Add*/ true, /*NSW*/ false, Known, Known2); + Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false, + /* NUW=*/false, Known, Known2); break; } case TargetOpcode::G_AND: { @@ -405,17 +407,23 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, } case TargetOpcode::G_LOAD: { const MachineMemOperand *MMO = *MI.memoperands_begin(); - if (const MDNode *Ranges = MMO->getRanges()) { - computeKnownBitsFromRangeMetadata(*Ranges, Known); - } - + KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); + if (const MDNode *Ranges = MMO->getRanges()) + computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); + Known = KnownRange.anyext(Known.getBitWidth()); break; } + case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: { if (DstTy.isVector()) break; - // Everything above the retrieved bits is zero - Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); + const MachineMemOperand *MMO = *MI.memoperands_begin(); + KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); + if (const MDNode *Ranges = MMO->getRanges()) + computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); + Known = Opcode == TargetOpcode::G_SEXTLOAD + ? KnownRange.sext(Known.getBitWidth()) + : KnownRange.zext(Known.getBitWidth()); break; } case TargetOpcode::G_ASHR: { @@ -564,7 +572,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, // right. KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth)); KnownBits ShiftKnown = KnownBits::computeForAddSub( - /*Add*/ false, /*NSW*/ false, ExtKnown, WidthKnown); + /*Add=*/false, /*NSW=*/false, /* NUW=*/false, ExtKnown, WidthKnown); Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown); break; } @@ -588,9 +596,19 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, } break; } + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTLZ_ZERO_UNDEF: { + KnownBits SrcOpKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, + Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros(); + unsigned LowBits = llvm::bit_width(PossibleLZ); + Known.Zero.setBitsFrom(LowBits); + break; + } } - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); LLVM_DEBUG(dumpResult(MI, Known, Depth)); // Update the cache. @@ -608,6 +626,33 @@ unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); } +/// Compute the known number of sign bits with attached range metadata in the +/// memory operand. If this is an extending load, accounts for the behavior of +/// the high bits. +static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, + unsigned TyBits) { + const MDNode *Ranges = Ld->getRanges(); + if (!Ranges) + return 1; + + ConstantRange CR = getConstantRangeFromMetadata(*Ranges); + if (TyBits > CR.getBitWidth()) { + switch (Ld->getOpcode()) { + case TargetOpcode::G_SEXTLOAD: + CR = CR.signExtend(TyBits); + break; + case TargetOpcode::G_ZEXTLOAD: + CR = CR.zeroExtend(TyBits); + break; + default: + break; + } + } + + return std::min(CR.getSignedMin().getNumSignBits(), + CR.getSignedMax().getNumSignBits()); +} + unsigned GISelKnownBits::computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth) { @@ -659,23 +704,56 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned InRegBits = TyBits - SrcBits + 1; return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits); } + case TargetOpcode::G_LOAD: { + GLoad *Ld = cast<GLoad>(&MI); + if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) + break; + + return computeNumSignBitsFromRangeMetadata(Ld, TyBits); + } case TargetOpcode::G_SEXTLOAD: { + GSExtLoad *Ld = cast<GSExtLoad>(&MI); + // FIXME: We need an in-memory type representation. if (DstTy.isVector()) return 1; + unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); + if (NumBits != 1) + return NumBits; + // e.g. i16->i32 = '17' bits known. const MachineMemOperand *MMO = *MI.memoperands_begin(); - return TyBits - MMO->getSizeInBits() + 1; + return TyBits - MMO->getSizeInBits().getValue() + 1; } case TargetOpcode::G_ZEXTLOAD: { + GZExtLoad *Ld = cast<GZExtLoad>(&MI); + // FIXME: We need an in-memory type representation. if (DstTy.isVector()) return 1; + unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); + if (NumBits != 1) + return NumBits; + // e.g. i16->i32 = '16' bits known. const MachineMemOperand *MMO = *MI.memoperands_begin(); - return TyBits - MMO->getSizeInBits(); + return TyBits - MMO->getSizeInBits().getValue(); + } + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: { + Register Src1 = MI.getOperand(1).getReg(); + unsigned Src1NumSignBits = + computeNumSignBits(Src1, DemandedElts, Depth + 1); + if (Src1NumSignBits != 1) { + Register Src2 = MI.getOperand(2).getReg(); + unsigned Src2NumSignBits = + computeNumSignBits(Src2, DemandedElts, Depth + 1); + FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits); + } + break; } case TargetOpcode::G_TRUNC: { Register Src = MI.getOperand(1).getReg(); @@ -781,5 +859,5 @@ GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; Info = std::make_unique<GISelKnownBits>(MF, MaxDepth); } - return *Info.get(); + return *Info; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 1a71c1232c70..68a8a273a1b4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" @@ -28,7 +29,6 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -38,7 +38,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetFrameLowering.h" @@ -48,6 +48,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" @@ -213,8 +214,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) { auto *VRegs = VMap.getVRegs(Val); auto *Offsets = VMap.getOffsets(Val); - assert(Val.getType()->isSized() && - "Don't know how to create an empty vreg"); + if (!Val.getType()->isTokenTy()) + assert(Val.getType()->isSized() && + "Don't know how to create an empty vreg"); SmallVector<LLT, 4> SplitTys; computeValueLLTs(*DL, *Val.getType(), SplitTys, @@ -333,13 +335,11 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { - auto *CI = dyn_cast<CmpInst>(&U); + auto *CI = cast<CmpInst>(&U); Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); - CmpInst::Predicate Pred = - CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>( - cast<ConstantExpr>(U).getPredicate()); + CmpInst::Predicate Pred = CI->getPredicate(); if (CmpInst::isIntPredicate(Pred)) MIRBuilder.buildICmp(Pred, Res, Op0, Op1); else if (Pred == CmpInst::FCMP_FALSE) @@ -596,8 +596,6 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { const Value *CondVal = BrInst.getCondition(); MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1)); - const auto &TLI = *MF->getSubtarget().getTargetLowering(); - // If this is a series of conditions that are or'd or and'd together, emit // this as a sequence of branches instead of setcc's with and/or operations. // As long as jumps are not expensive (exceptions for multi-use logic ops, @@ -617,7 +615,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { // jle foo using namespace PatternMatch; const Instruction *CondI = dyn_cast<Instruction>(CondVal); - if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() && + if (!TLI->isJumpExpensive() && CondI && CondI->hasOneUse() && !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) { Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; Value *Vec; @@ -1363,9 +1361,8 @@ static bool isSwiftError(const Value *V) { bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { const LoadInst &LI = cast<LoadInst>(U); - - unsigned StoreSize = DL->getTypeStoreSize(LI.getType()); - if (StoreSize == 0) + TypeSize StoreSize = DL->getTypeStoreSize(LI.getType()); + if (StoreSize.isZero()) return true; ArrayRef<Register> Regs = getOrCreateVRegs(LI); @@ -1385,9 +1382,8 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { return true; } - auto &TLI = *MF->getSubtarget().getTargetLowering(); MachineMemOperand::Flags Flags = - TLI.getLoadMemOperandFlags(LI, *DL, AC, LibInfo); + TLI->getLoadMemOperandFlags(LI, *DL, AC, LibInfo); if (AA && !(Flags & MachineMemOperand::MOInvariant)) { if (AA->pointsToConstantMemory( MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) { @@ -1415,7 +1411,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { const StoreInst &SI = cast<StoreInst>(U); - if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) + if (DL->getTypeStoreSize(SI.getValueOperand()->getType()).isZero()) return true; ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand()); @@ -1434,8 +1430,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { return true; } - auto &TLI = *MF->getSubtarget().getTargetLowering(); - MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL); + MachineMemOperand::Flags Flags = TLI->getStoreMemOperandFlags(SI, *DL); for (unsigned i = 0; i < Vals.size(); ++i) { Register Addr; @@ -1565,9 +1560,14 @@ bool IRTranslator::translateCast(unsigned Opcode, const User &U, if (U.getType()->getScalarType()->isBFloatTy() || U.getOperand(0)->getType()->getScalarType()->isBFloatTy()) return false; + + uint32_t Flags = 0; + if (const Instruction *I = dyn_cast<Instruction>(&U)) + Flags = MachineInstr::copyFlagsFromInstruction(*I); + Register Op = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); - MIRBuilder.buildInstr(Opcode, {Res}, {Op}); + MIRBuilder.buildInstr(Opcode, {Res}, {Op}, Flags); return true; } @@ -1581,10 +1581,8 @@ bool IRTranslator::translateGetElementPtr(const User &U, LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); uint32_t Flags = 0; - if (isa<Instruction>(U)) { - const Instruction &I = cast<Instruction>(U); - Flags = MachineInstr::copyFlagsFromInstruction(I); - } + if (const Instruction *I = dyn_cast<Instruction>(&U)) + Flags = MachineInstr::copyFlagsFromInstruction(*I); // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. @@ -1602,10 +1600,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, // We might need to splat the base pointer into a vector if the offsets // are vectors. if (WantSplatVector && !PtrTy.isVector()) { - BaseReg = - MIRBuilder - .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg) - .getReg(0); + BaseReg = MIRBuilder + .buildSplatBuildVector(LLT::fixed_vector(VectorWidth, PtrTy), + BaseReg) + .getReg(0); PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth); PtrTy = getLLTForType(*PtrIRTy, *DL); OffsetIRTy = DL->getIndexType(PtrIRTy); @@ -1643,8 +1641,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, LLT IdxTy = MRI->getType(IdxReg); if (IdxTy != OffsetTy) { if (!IdxTy.isVector() && WantSplatVector) { - IdxReg = MIRBuilder.buildSplatVector( - OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); + IdxReg = MIRBuilder + .buildSplatBuildVector(OffsetTy.changeElementType(IdxTy), + IdxReg) + .getReg(0); } IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); @@ -1772,6 +1772,67 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, return true; } +bool IRTranslator::translateTrap(const CallInst &CI, + MachineIRBuilder &MIRBuilder, + unsigned Opcode) { + StringRef TrapFuncName = + CI.getAttributes().getFnAttr("trap-func-name").getValueAsString(); + if (TrapFuncName.empty()) { + if (Opcode == TargetOpcode::G_UBSANTRAP) { + uint64_t Code = cast<ConstantInt>(CI.getOperand(0))->getZExtValue(); + MIRBuilder.buildInstr(Opcode, {}, ArrayRef<llvm::SrcOp>{Code}); + } else { + MIRBuilder.buildInstr(Opcode); + } + return true; + } + + CallLowering::CallLoweringInfo Info; + if (Opcode == TargetOpcode::G_UBSANTRAP) + Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)), + CI.getArgOperand(0)->getType(), 0}); + + Info.Callee = MachineOperand::CreateES(TrapFuncName.data()); + Info.CB = &CI; + Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; + return CLI->lowerCall(MIRBuilder, Info); +} + +bool IRTranslator::translateVectorInterleave2Intrinsic( + const CallInst &CI, MachineIRBuilder &MIRBuilder) { + assert(CI.getIntrinsicID() == Intrinsic::vector_interleave2 && + "This function can only be called on the interleave2 intrinsic!"); + // Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG). + Register Op0 = getOrCreateVReg(*CI.getOperand(0)); + Register Op1 = getOrCreateVReg(*CI.getOperand(1)); + Register Res = getOrCreateVReg(CI); + + LLT OpTy = MRI->getType(Op0); + MIRBuilder.buildShuffleVector(Res, Op0, Op1, + createInterleaveMask(OpTy.getNumElements(), 2)); + + return true; +} + +bool IRTranslator::translateVectorDeinterleave2Intrinsic( + const CallInst &CI, MachineIRBuilder &MIRBuilder) { + assert(CI.getIntrinsicID() == Intrinsic::vector_deinterleave2 && + "This function can only be called on the deinterleave2 intrinsic!"); + // Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to + // SelectionDAG). + Register Op = getOrCreateVReg(*CI.getOperand(0)); + auto Undef = MIRBuilder.buildUndef(MRI->getType(Op)); + ArrayRef<Register> Res = getOrCreateVRegs(CI); + + LLT ResTy = MRI->getType(Res[0]); + MIRBuilder.buildShuffleVector(Res[0], Op, Undef, + createStrideMask(0, 2, ResTy.getNumElements())); + MIRBuilder.buildShuffleVector(Res[1], Op, Undef, + createStrideMask(1, 2, ResTy.getNumElements())); + + return true; +} + void IRTranslator::getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -1779,8 +1840,7 @@ void IRTranslator::getStackGuard(Register DstReg, auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {}); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent()); + Value *Global = TLI->getSDagStackGuard(*MF->getFunction().getParent()); if (!Global) return; @@ -1819,6 +1879,12 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { switch (ID) { default: break; + case Intrinsic::acos: + return TargetOpcode::G_FACOS; + case Intrinsic::asin: + return TargetOpcode::G_FASIN; + case Intrinsic::atan: + return TargetOpcode::G_FATAN; case Intrinsic::bswap: return TargetOpcode::G_BSWAP; case Intrinsic::bitreverse: @@ -1831,6 +1897,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_FCEIL; case Intrinsic::cos: return TargetOpcode::G_FCOS; + case Intrinsic::cosh: + return TargetOpcode::G_FCOSH; case Intrinsic::ctpop: return TargetOpcode::G_CTPOP; case Intrinsic::exp: @@ -1879,16 +1947,26 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_INTRINSIC_ROUNDEVEN; case Intrinsic::sin: return TargetOpcode::G_FSIN; + case Intrinsic::sinh: + return TargetOpcode::G_FSINH; case Intrinsic::sqrt: return TargetOpcode::G_FSQRT; + case Intrinsic::tan: + return TargetOpcode::G_FTAN; + case Intrinsic::tanh: + return TargetOpcode::G_FTANH; case Intrinsic::trunc: return TargetOpcode::G_INTRINSIC_TRUNC; case Intrinsic::readcyclecounter: return TargetOpcode::G_READCYCLECOUNTER; + case Intrinsic::readsteadycounter: + return TargetOpcode::G_READSTEADYCOUNTER; case Intrinsic::ptrmask: return TargetOpcode::G_PTRMASK; case Intrinsic::lrint: return TargetOpcode::G_INTRINSIC_LRINT; + case Intrinsic::llrint: + return TargetOpcode::G_INTRINSIC_LLRINT; // FADD/FMUL require checking the FMF, so are handled elsewhere. case Intrinsic::vector_reduce_fmin: return TargetOpcode::G_VECREDUCE_FMIN; @@ -1916,6 +1994,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_VECREDUCE_UMAX; case Intrinsic::vector_reduce_umin: return TargetOpcode::G_VECREDUCE_UMIN; + case Intrinsic::experimental_vector_compress: + return TargetOpcode::G_VECTOR_COMPRESS; case Intrinsic::lround: return TargetOpcode::G_LROUND; case Intrinsic::llround: @@ -1985,11 +2065,8 @@ bool IRTranslator::translateConstrainedFPIntrinsic( Flags |= MachineInstr::NoFPExcept; SmallVector<llvm::SrcOp, 4> VRegs; - VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0))); - if (!FPI.isUnaryOp()) - VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1))); - if (FPI.isTernaryOp()) - VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2))); + for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I) + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(I))); MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags); return true; @@ -2039,6 +2116,36 @@ bool IRTranslator::translateIfEntryValueArgument(bool isDeclare, Value *Val, return true; } +static unsigned getConvOpcode(Intrinsic::ID ID) { + switch (ID) { + default: + llvm_unreachable("Unexpected intrinsic"); + case Intrinsic::experimental_convergence_anchor: + return TargetOpcode::CONVERGENCECTRL_ANCHOR; + case Intrinsic::experimental_convergence_entry: + return TargetOpcode::CONVERGENCECTRL_ENTRY; + case Intrinsic::experimental_convergence_loop: + return TargetOpcode::CONVERGENCECTRL_LOOP; + } +} + +bool IRTranslator::translateConvergenceControlIntrinsic( + const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { + MachineInstrBuilder MIB = MIRBuilder.buildInstr(getConvOpcode(ID)); + Register OutputReg = getOrCreateConvergenceTokenVReg(CI); + MIB.addDef(OutputReg); + + if (ID == Intrinsic::experimental_convergence_loop) { + auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl); + assert(Bundle && "Expected a convergence control token."); + Register InputReg = + getOrCreateConvergenceTokenVReg(*Bundle->Inputs[0].get()); + MIB.addUse(InputReg); + } + + return true; +} + bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) { @@ -2109,9 +2216,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // does. Simplest intrinsic ever! return true; case Intrinsic::vastart: { - auto &TLI = *MF->getSubtarget().getTargetLowering(); Value *Ptr = CI.getArgOperand(0); - unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8; + unsigned ListSize = TLI->getVaListSizeInBits(*DL) / 8; Align Alignment = getKnownAlignment(Ptr, *DL); MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)}) @@ -2120,6 +2226,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, ListSize, Alignment)); return true; } + case Intrinsic::dbg_assign: + // A dbg.assign is a dbg.value with more information about stack locations, + // typically produced during optimisation of variables with leaked + // addresses. We can treat it like a normal dbg_value intrinsic here; to + // benefit from the full analysis of stack/SSA locations, GlobalISel would + // need to register for and use the AssignmentTrackingAnalysis pass. + [[fallthrough]]; case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst &DI = cast<DbgValueInst>(CI); @@ -2180,14 +2293,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder); case Intrinsic::fmuladd: { const TargetMachine &TM = MF->getTarget(); - const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); Register Dst = getOrCreateVReg(CI); Register Op0 = getOrCreateVReg(*CI.getArgOperand(0)); Register Op1 = getOrCreateVReg(*CI.getArgOperand(1)); Register Op2 = getOrCreateVReg(*CI.getArgOperand(2)); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanFMulAndFAdd(*MF, - TLI.getValueType(*DL, CI.getType()))) { + TLI->isFMAFasterThanFMulAndFAdd(*MF, + TLI->getValueType(*DL, CI.getType()))) { // TODO: Revisit this to see if we should move this part of the // lowering to the combiner. MIRBuilder.buildFMA(Dst, Op0, Op1, Op2, @@ -2245,10 +2357,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getStackGuard(getOrCreateVReg(CI), MIRBuilder); return true; case Intrinsic::stackprotector: { - const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); Register GuardVal; - if (TLI.useLoadStackGuardNode()) { + if (TLI->useLoadStackGuardNode()) { GuardVal = MRI->createGenericVirtualRegister(PtrTy); getStackGuard(GuardVal, MIRBuilder); } else @@ -2340,8 +2451,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg)); MCSymbol *FrameAllocSym = - MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName, - Idx); + MF->getContext().getOrCreateFrameAllocSymbol(EscapedName, Idx); // This should be inserted at the start of the entry block. auto LocalEscape = @@ -2390,22 +2500,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } case Intrinsic::trap: + return translateTrap(CI, MIRBuilder, TargetOpcode::G_TRAP); case Intrinsic::debugtrap: - case Intrinsic::ubsantrap: { - StringRef TrapFuncName = - CI.getAttributes().getFnAttr("trap-func-name").getValueAsString(); - if (TrapFuncName.empty()) - break; // Use the default handling. - CallLowering::CallLoweringInfo Info; - if (ID == Intrinsic::ubsantrap) { - Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)), - CI.getArgOperand(0)->getType(), 0}); - } - Info.Callee = MachineOperand::CreateES(TrapFuncName.data()); - Info.CB = &CI; - Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; - return CLI->lowerCall(MIRBuilder, Info); - } + return translateTrap(CI, MIRBuilder, TargetOpcode::G_DEBUGTRAP); + case Intrinsic::ubsantrap: + return translateTrap(CI, MIRBuilder, TargetOpcode::G_UBSANTRAP); + case Intrinsic::allow_runtime_check: + case Intrinsic::allow_ubsan_check: + MIRBuilder.buildCopy(getOrCreateVReg(CI), + getOrCreateVReg(*ConstantInt::getTrue(CI.getType()))); + return true; case Intrinsic::amdgcn_cs_chain: return translateCallBase(CI, MIRBuilder); case Intrinsic::fptrunc_round: { @@ -2438,24 +2542,34 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } case Intrinsic::set_fpenv: { Value *FPEnv = CI.getOperand(0); - MIRBuilder.buildInstr(TargetOpcode::G_SET_FPENV, {}, - {getOrCreateVReg(*FPEnv)}); + MIRBuilder.buildSetFPEnv(getOrCreateVReg(*FPEnv)); return true; } - case Intrinsic::reset_fpenv: { - MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPENV, {}, {}); + case Intrinsic::reset_fpenv: + MIRBuilder.buildResetFPEnv(); return true; - } case Intrinsic::set_fpmode: { Value *FPState = CI.getOperand(0); - MIRBuilder.buildInstr(TargetOpcode::G_SET_FPMODE, {}, - { getOrCreateVReg(*FPState) }); + MIRBuilder.buildSetFPMode(getOrCreateVReg(*FPState)); return true; } - case Intrinsic::reset_fpmode: { - MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {}); + case Intrinsic::reset_fpmode: + MIRBuilder.buildResetFPMode(); + return true; + case Intrinsic::vscale: { + MIRBuilder.buildVScale(getOrCreateVReg(CI), 1); return true; } + case Intrinsic::scmp: + MIRBuilder.buildSCmp(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getOperand(0)), + getOrCreateVReg(*CI.getOperand(1))); + return true; + case Intrinsic::ucmp: + MIRBuilder.buildUCmp(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getOperand(0)), + getOrCreateVReg(*CI.getOperand(1))); + return true; case Intrinsic::prefetch: { Value *Addr = CI.getOperand(0); unsigned RW = cast<ConstantInt>(CI.getOperand(1))->getZExtValue(); @@ -2471,12 +2585,30 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } + + case Intrinsic::vector_interleave2: + case Intrinsic::vector_deinterleave2: { + // Both intrinsics have at least one operand. + Value *Op0 = CI.getOperand(0); + LLT ResTy = getLLTForType(*Op0->getType(), MIRBuilder.getDataLayout()); + if (!ResTy.isFixedVector()) + return false; + + if (CI.getIntrinsicID() == Intrinsic::vector_interleave2) + return translateVectorInterleave2Intrinsic(CI, MIRBuilder); + + return translateVectorDeinterleave2Intrinsic(CI, MIRBuilder); + } + #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI), MIRBuilder); - + case Intrinsic::experimental_convergence_anchor: + case Intrinsic::experimental_convergence_entry: + case Intrinsic::experimental_convergence_loop: + return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder); } return false; } @@ -2527,12 +2659,39 @@ bool IRTranslator::translateCallBase(const CallBase &CB, } } + std::optional<CallLowering::PtrAuthInfo> PAI; + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_ptrauth)) { + // Functions should never be ptrauth-called directly. + assert(!CB.getCalledFunction() && "invalid direct ptrauth call"); + + const Value *Key = Bundle->Inputs[0]; + const Value *Discriminator = Bundle->Inputs[1]; + + // Look through ptrauth constants to try to eliminate the matching bundle + // and turn this into a direct call with no ptrauth. + // CallLowering will use the raw pointer if it doesn't find the PAI. + const auto *CalleeCPA = dyn_cast<ConstantPtrAuth>(CB.getCalledOperand()); + if (!CalleeCPA || !isa<Function>(CalleeCPA->getPointer()) || + !CalleeCPA->isKnownCompatibleWith(Key, Discriminator, *DL)) { + // If we can't make it direct, package the bundle into PAI. + Register DiscReg = getOrCreateVReg(*Discriminator); + PAI = CallLowering::PtrAuthInfo{cast<ConstantInt>(Key)->getZExtValue(), + DiscReg}; + } + } + + Register ConvergenceCtrlToken = 0; + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) { + const auto &Token = *Bundle->Inputs[0].get(); + ConvergenceCtrlToken = getOrCreateConvergenceTokenVReg(Token); + } + // We don't set HasCalls on MFI here yet because call lowering may decide to // optimize into tail calls. Instead, we defer that to selection where a final // scan is done to check if any instructions are calls. - bool Success = - CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*CB.getCalledOperand()); }); + bool Success = CLI->lowerCall( + MIRBuilder, CB, Res, Args, SwiftErrorVReg, PAI, ConvergenceCtrlToken, + [&]() { return getOrCreateVReg(*CB.getCalledOperand()); }); // Check if we just inserted a tail call. if (Success) { @@ -2626,10 +2785,9 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { } // Add a MachineMemOperand if it is a target mem intrinsic. - const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. - if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { + if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) { Align Alignment = Info.align.value_or( DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); LLT MemTy = Info.memVT.isSimple() @@ -2647,6 +2805,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata())); } + if (CI.isConvergent()) { + if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + Register TokenReg = getOrCreateVReg(*Token); + MIB.addUse(TokenReg, RegState::Implicit); + } + } + return true; } @@ -2721,7 +2887,7 @@ bool IRTranslator::translateInvoke(const User &U, return false; // FIXME: support whatever these are. - if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) + if (I.hasDeoptState()) return false; // FIXME: support control flow guard targets. @@ -2809,10 +2975,9 @@ bool IRTranslator::translateLandingPad(const User &U, // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother. - auto &TLI = *MF->getSubtarget().getTargetLowering(); const Constant *PersonalityFn = MF->getFunction().getPersonalityFn(); - if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && - TLI.getExceptionSelectorRegister(PersonalityFn) == 0) + if (TLI->getExceptionPointerRegister(PersonalityFn) == 0 && + TLI->getExceptionSelectorRegister(PersonalityFn) == 0) return true; // If landingpad's return type is token type, we don't create DAG nodes @@ -2843,7 +3008,7 @@ bool IRTranslator::translateLandingPad(const User &U, assert(Tys.size() == 2 && "Only two-valued landingpads are supported"); // Mark exception register as live in. - Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn); + Register ExceptionReg = TLI->getExceptionPointerRegister(PersonalityFn); if (!ExceptionReg) return false; @@ -2851,7 +3016,7 @@ bool IRTranslator::translateLandingPad(const User &U, ArrayRef<Register> ResRegs = getOrCreateVRegs(LP); MIRBuilder.buildCopy(ResRegs[0], ExceptionReg); - Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn); + Register SelectorReg = TLI->getExceptionSelectorRegister(PersonalityFn); if (!SelectorReg) return false; @@ -2935,20 +3100,18 @@ bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuil return true; auto &UI = cast<UnreachableInst>(U); + // We may be able to ignore unreachable behind a noreturn call. - if (MF->getTarget().Options.NoTrapAfterNoreturn) { - const BasicBlock &BB = *UI.getParent(); - if (&UI != &BB.front()) { - BasicBlock::const_iterator PredI = - std::prev(BasicBlock::const_iterator(UI)); - if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) { - if (Call->doesNotReturn()) - return true; - } - } + if (const CallInst *Call = dyn_cast_or_null<CallInst>(UI.getPrevNode()); + Call && Call->doesNotReturn()) { + if (MF->getTarget().Options.NoTrapAfterNoreturn) + return true; + // Do not emit an additional trap instruction. + if (Call->isNonContinuableTrap()) + return true; } - MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>()); + MIRBuilder.buildTrap(); return true; } @@ -2956,13 +3119,28 @@ bool IRTranslator::translateInsertElement(const User &U, MachineIRBuilder &MIRBuilder) { // If it is a <1 x Ty> vector, use the scalar as it is // not a legal vector type in LLT. - if (cast<FixedVectorType>(U.getType())->getNumElements() == 1) + if (auto *FVT = dyn_cast<FixedVectorType>(U.getType()); + FVT && FVT->getNumElements() == 1) return translateCopy(U, *U.getOperand(1), MIRBuilder); Register Res = getOrCreateVReg(U); Register Val = getOrCreateVReg(*U.getOperand(0)); Register Elt = getOrCreateVReg(*U.getOperand(1)); - Register Idx = getOrCreateVReg(*U.getOperand(2)); + unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits(); + Register Idx; + if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(2))) { + if (CI->getBitWidth() != PreferredVecIdxWidth) { + APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); + auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx); + Idx = getOrCreateVReg(*NewIdxCI); + } + } + if (!Idx) + Idx = getOrCreateVReg(*U.getOperand(2)); + if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { + const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0); + } MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx); return true; } @@ -2976,8 +3154,7 @@ bool IRTranslator::translateExtractElement(const User &U, Register Res = getOrCreateVReg(U); Register Val = getOrCreateVReg(*U.getOperand(0)); - const auto &TLI = *MF->getSubtarget().getTargetLowering(); - unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits(); + unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits(); Register Idx; if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) { if (CI->getBitWidth() != PreferredVecIdxWidth) { @@ -2998,6 +3175,19 @@ bool IRTranslator::translateExtractElement(const User &U, bool IRTranslator::translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder) { + // A ShuffleVector that has operates on scalable vectors is a splat vector + // where the value of the splat vector is the 0th element of the first + // operand, since the index mask operand is the zeroinitializer (undef and + // poison are treated as zeroinitializer here). + if (U.getOperand(0)->getType()->isScalableTy()) { + Value *Op0 = U.getOperand(0); + auto SplatVal = MIRBuilder.buildExtractVectorElementConstant( + LLT::scalar(Op0->getType()->getScalarSizeInBits()), + getOrCreateVReg(*Op0), 0); + MIRBuilder.buildSplatVector(getOrCreateVReg(U), SplatVal); + return true; + } + ArrayRef<int> Mask; if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U)) Mask = SVI->getShuffleMask(); @@ -3029,8 +3219,7 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder) { const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - auto Flags = TLI.getAtomicMemOperandFlags(I, *DL); + auto Flags = TLI->getAtomicMemOperandFlags(I, *DL); auto Res = getOrCreateVRegs(I); Register OldValRes = Res[0]; @@ -3051,8 +3240,7 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, bool IRTranslator::translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder) { const AtomicRMWInst &I = cast<AtomicRMWInst>(U); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - auto Flags = TLI.getAtomicMemOperandFlags(I, *DL); + auto Flags = TLI->getAtomicMemOperandFlags(I, *DL); Register Res = getOrCreateVReg(I); Register Addr = getOrCreateVReg(*I.getPointerOperand()); @@ -3265,25 +3453,35 @@ void IRTranslator::translateDbgDeclareRecord(Value *Address, bool HasArgList, void IRTranslator::translateDbgInfo(const Instruction &Inst, MachineIRBuilder &MIRBuilder) { - for (DPValue &DPV : Inst.getDbgValueRange()) { - const DILocalVariable *Variable = DPV.getVariable(); - const DIExpression *Expression = DPV.getExpression(); - Value *V = DPV.getVariableLocationOp(0); - if (DPV.isDbgDeclare()) - translateDbgDeclareRecord(V, DPV.hasArgList(), Variable, - Expression, DPV.getDebugLoc(), MIRBuilder); + for (DbgRecord &DR : Inst.getDbgRecordRange()) { + if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) { + MIRBuilder.setDebugLoc(DLR->getDebugLoc()); + assert(DLR->getLabel() && "Missing label"); + assert(DLR->getLabel()->isValidLocationForIntrinsic( + MIRBuilder.getDebugLoc()) && + "Expected inlined-at fields to agree"); + MIRBuilder.buildDbgLabel(DLR->getLabel()); + continue; + } + DbgVariableRecord &DVR = cast<DbgVariableRecord>(DR); + const DILocalVariable *Variable = DVR.getVariable(); + const DIExpression *Expression = DVR.getExpression(); + Value *V = DVR.getVariableLocationOp(0); + if (DVR.isDbgDeclare()) + translateDbgDeclareRecord(V, DVR.hasArgList(), Variable, Expression, + DVR.getDebugLoc(), MIRBuilder); else - translateDbgValueRecord(V, DPV.hasArgList(), Variable, - Expression, DPV.getDebugLoc(), MIRBuilder); + translateDbgValueRecord(V, DVR.hasArgList(), Variable, Expression, + DVR.getDebugLoc(), MIRBuilder); } } bool IRTranslator::translate(const Instruction &Inst) { CurBuilder->setDebugLoc(Inst.getDebugLoc()); CurBuilder->setPCSections(Inst.getMetadata(LLVMContext::MD_pcsections)); + CurBuilder->setMMRAMetadata(Inst.getMetadata(LLVMContext::MD_mmra)); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - if (TLI.fallBackToDAGISel(Inst)) + if (TLI->fallBackToDAGISel(Inst)) return false; switch (Inst.getOpcode()) { @@ -3312,7 +3510,11 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { EntryBuilder->buildConstant(Reg, 0); else if (auto GV = dyn_cast<GlobalValue>(&C)) EntryBuilder->buildGlobalValue(Reg, GV); - else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) { + else if (auto CPA = dyn_cast<ConstantPtrAuth>(&C)) { + Register Addr = getOrCreateVReg(*CPA->getPointer()); + Register AddrDisc = getOrCreateVReg(*CPA->getAddrDiscriminator()); + EntryBuilder->buildConstantPtrAuth(Reg, CPA, Addr, AddrDisc); + } else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) { if (!isa<FixedVectorType>(CAZ->getType())) return false; // Return the scalar if it is a <1 x Ty> vector. @@ -3434,9 +3636,8 @@ bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB, // Check if we need to generate stack-protector guard checks. StackProtector &SP = getAnalysis<StackProtector>(); if (SP.shouldEmitSDCheck(BB)) { - const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); bool FunctionBasedInstrumentation = - TLI.getSSPStackGuardCheck(*MF->getFunction().getParent()); + TLI->getSSPStackGuardCheck(*MF->getFunction().getParent()); SPDescriptor.initialize(&BB, &MBB, FunctionBasedInstrumentation); } // Handle stack protector. @@ -3481,10 +3682,9 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB) { CurBuilder->setInsertPt(*ParentBB, ParentBB->end()); // First create the loads to the guard/stack slot for the comparison. - const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext()); const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); - LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL)); + LLT PtrMemTy = getLLTForMVT(TLI->getPointerMemTy(*DL)); MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI.getStackProtectorIndex(); @@ -3502,13 +3702,13 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile) .getReg(0); - if (TLI.useStackGuardXorFP()) { + if (TLI->useStackGuardXorFP()) { LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented"); return false; } // Retrieve guard check function, nullptr if instrumentation is inlined. - if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) { + if (const Function *GuardCheckFn = TLI->getSSPStackGuardCheck(M)) { // This path is currently untestable on GlobalISel, since the only platform // that needs this seems to be Windows, and we fall back on that currently. // The code still lives here in case that changes. @@ -3543,13 +3743,13 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. // Otherwise, emit a volatile load to retrieve the stack guard value. - if (TLI.useLoadStackGuardNode()) { + if (TLI->useLoadStackGuardNode()) { Guard = MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits())); getStackGuard(Guard, *CurBuilder); } else { // TODO: test using android subtarget when we support @llvm.thread.pointer. - const Value *IRGuard = TLI.getSDagStackGuard(M); + const Value *IRGuard = TLI->getSDagStackGuard(M); Register GuardPtr = getOrCreateVReg(*IRGuard); Guard = CurBuilder @@ -3573,13 +3773,12 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD, MachineBasicBlock *FailureBB) { CurBuilder->setInsertPt(*FailureBB, FailureBB->end()); - const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); const RTLIB::Libcall Libcall = RTLIB::STACKPROTECTOR_CHECK_FAIL; - const char *Name = TLI.getLibcallName(Libcall); + const char *Name = TLI->getLibcallName(Libcall); CallLowering::CallLoweringInfo Info; - Info.CallConv = TLI.getLibcallCallingConv(Libcall); + Info.CallConv = TLI->getLibcallCallingConv(Libcall); Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = {Register(), Type::getVoidTy(MF->getFunction().getContext()), 0}; @@ -3642,6 +3841,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences() ? EnableCSEInIRTranslator : TPC->isGISelCSEEnabled(); + TLI = MF->getSubtarget().getTargetLowering(); if (EnableCSE) { EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF); @@ -3657,7 +3857,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { CurBuilder->setMF(*MF); EntryBuilder->setMF(*MF); MRI = &MF->getRegInfo(); - DL = &F.getParent()->getDataLayout(); + DL = &F.getDataLayout(); ORE = std::make_unique<OptimizationRemarkEmitter>(&F); const TargetMachine &TM = MF->getTarget(); TM.resetTargetOptions(F); @@ -3676,12 +3876,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); - const auto &TLI = *MF->getSubtarget().getTargetLowering(); - SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo); - SL->init(TLI, TM, *DL); - - + SL->init(*TLI, TM, *DL); assert(PendingPHIs.empty() && "stale PHIs"); @@ -3790,7 +3986,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { #endif // ifndef NDEBUG // Translate any debug-info attached to the instruction. - translateDbgInfo(Inst, *CurBuilder.get()); + translateDbgInfo(Inst, *CurBuilder); if (translate(Inst)) continue; @@ -3804,7 +4000,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { raw_string_ostream InstStr(InstStrStorage); InstStr << Inst; - R << ": '" << InstStr.str() << "'"; + R << ": '" << InstStrStorage << "'"; } reportTranslationError(*MF, *TPC, *ORE, R); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index 4089a5e941b0..81f25b21a040 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -223,7 +223,7 @@ bool InlineAsmLowering::lowerInlineAsm( MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); - const DataLayout &DL = F.getParent()->getDataLayout(); + const DataLayout &DL = F.getDataLayout(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MachineRegisterInfo *MRI = MIRBuilder.getMRI(); @@ -538,6 +538,21 @@ bool InlineAsmLowering::lowerInlineAsm( } } + // Add rounding control registers as implicit def for inline asm. + if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) { + ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters(); + for (MCPhysReg Reg : RCRegs) + Inst.addReg(Reg, RegState::ImplicitDefine); + } + + if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*Token); + assert(SourceRegs.size() == 1 && + "Expected the control token to fit into a single virtual register"); + Inst.addUse(SourceRegs[0], RegState::Implicit); + } + if (const MDNode *SrcLoc = Call.getMetadata("srcloc")) Inst.addMetadata(SrcLoc); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 30b2430249d2..9a27728dcb4d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -62,14 +62,8 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) -InstructionSelect::InstructionSelect(CodeGenOptLevel OL) - : MachineFunctionPass(ID), OptLevel(OL) {} - -// In order not to crash when calling getAnalysis during testing with -run-pass -// we use the default opt level here instead of None, so that the addRequired() -// calls are made in getAnalysisUsage(). -InstructionSelect::InstructionSelect() - : MachineFunctionPass(ID), OptLevel(CodeGenOptLevel::Default) {} +InstructionSelect::InstructionSelect(CodeGenOptLevel OL, char &PassID) + : MachineFunctionPass(PassID), OptLevel(OL) {} void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); @@ -281,7 +275,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { } const LLT Ty = MRI.getType(VReg); - if (Ty.isValid() && Ty.getSizeInBits() > TRI.getRegSizeInBits(*RC)) { + if (Ty.isValid() && + TypeSize::isKnownGT(Ty.getSizeInBits(), TRI.getRegSizeInBits(*RC))) { reportGISelFailure( MF, TPC, MORE, "gisel-select", "VReg's low-level type and register class have different sizes", *MI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 47d045ac4817..644dbae8f93a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -69,8 +69,9 @@ getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { unsigned EltSize = OrigTy.getScalarSizeInBits(); if (LeftoverSize % EltSize != 0) return {-1, -1}; - LeftoverTy = LLT::scalarOrVector( - ElementCount::getFixed(LeftoverSize / EltSize), EltSize); + LeftoverTy = + LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize), + OrigTy.getElementType()); } else { LeftoverTy = LLT::scalar(LeftoverSize); } @@ -212,7 +213,7 @@ void LegalizerHelper::mergeMixedSubvectors(Register DstReg, appendVectorElts(AllElts, PartRegs[i]); Register Leftover = PartRegs[PartRegs.size() - 1]; - if (MRI.getType(Leftover).isScalar()) + if (!MRI.getType(Leftover).isVector()) AllElts.push_back(Leftover); else appendVectorElts(AllElts, Leftover); @@ -448,6 +449,20 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(SIN_F); case TargetOpcode::G_FCOS: RTLIBCASE(COS_F); + case TargetOpcode::G_FTAN: + RTLIBCASE(TAN_F); + case TargetOpcode::G_FASIN: + RTLIBCASE(ASIN_F); + case TargetOpcode::G_FACOS: + RTLIBCASE(ACOS_F); + case TargetOpcode::G_FATAN: + RTLIBCASE(ATAN_F); + case TargetOpcode::G_FSINH: + RTLIBCASE(SINH_F); + case TargetOpcode::G_FCOSH: + RTLIBCASE(COSH_F); + case TargetOpcode::G_FTANH: + RTLIBCASE(TANH_F); case TargetOpcode::G_FLOG10: RTLIBCASE(LOG10_F); case TargetOpcode::G_FLOG: @@ -472,6 +487,10 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(NEARBYINT_F); case TargetOpcode::G_INTRINSIC_ROUNDEVEN: RTLIBCASE(ROUNDEVEN_F); + case TargetOpcode::G_INTRINSIC_LRINT: + RTLIBCASE(LRINT_F); + case TargetOpcode::G_INTRINSIC_LLRINT: + RTLIBCASE(LLRINT_F); } llvm_unreachable("Unknown libcall function"); } @@ -1032,6 +1051,13 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FREM: case TargetOpcode::G_FCOS: case TargetOpcode::G_FSIN: + case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: @@ -1059,6 +1085,26 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { return Status; break; } + case TargetOpcode::G_INTRINSIC_LRINT: + case TargetOpcode::G_INTRINSIC_LLRINT: { + LLT LLTy = MRI.getType(MI.getOperand(1).getReg()); + unsigned Size = LLTy.getSizeInBits(); + Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); + Type *ITy = IntegerType::get( + Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits()); + if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { + LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); + return UnableToLegalize; + } + auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + LegalizeResult Status = + createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0}, + {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI); + if (Status != Legalized) + return Status; + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_FPOWI: { LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = LLTy.getSizeInBits(); @@ -1095,31 +1141,26 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: { // FIXME: Support other types - unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + Type *FromTy = + getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg())); unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) + if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy) return UnableToLegalize; LegalizeResult Status = conversionLibcall( - MI, MIRBuilder, - ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), - FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), - LocObserver); + MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver); if (Status != Legalized) return Status; break; } case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: { - // FIXME: Support other types unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64)) + Type *ToTy = + getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg())); + if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy) return UnableToLegalize; LegalizeResult Status = conversionLibcall( - MI, MIRBuilder, - ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), - FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), - LocObserver); + MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver); if (Status != Legalized) return Status; break; @@ -1271,7 +1312,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } - + case TargetOpcode::G_CONSTANT_FOLD_BARRIER: case TargetOpcode::G_FREEZE: { if (TypeIdx != 0) return UnableToLegalize; @@ -1285,7 +1326,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, SmallVector<Register, 8> Parts; for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) { Parts.push_back( - MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0)); + MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)}) + .getReg(0)); } MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts); @@ -1317,7 +1359,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (DstTy.isVector()) return UnableToLegalize; - if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) { + if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) { Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO()); MIRBuilder.buildAnyExt(DstReg, TmpReg); @@ -1335,7 +1377,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); auto &MMO = LoadMI.getMMO(); - unsigned MemSize = MMO.getSizeInBits(); + unsigned MemSize = MMO.getSizeInBits().getValue(); if (MemSize == NarrowSize) { MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); @@ -1368,7 +1410,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (SrcTy.isVector() && LeftoverBits != 0) return UnableToLegalize; - if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) { + if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) { Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); MIRBuilder.buildTrunc(TmpReg, SrcReg); MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO()); @@ -1558,6 +1600,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_FCMP: + if (TypeIdx != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_SEXT_INREG: { if (TypeIdx != 0) return UnableToLegalize; @@ -1690,6 +1741,20 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_FLDEXP: case TargetOpcode::G_STRICT_FLDEXP: return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_VSCALE: { + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + + // Assume VSCALE(1) fits into a legal integer + const APInt One(NarrowTy.getSizeInBits(), 1); + auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One); + auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase); + auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm()); + MIRBuilder.buildMul(Dst, ZExt, C); + + MI.eraseFromParent(); + return Legalized; + } } } @@ -1709,8 +1774,7 @@ Register LegalizerHelper::coerceToScalar(Register Val) { Register NewVal = Val; assert(Ty.isVector()); - LLT EltTy = Ty.getElementType(); - if (EltTy.isPointer()) + if (Ty.isPointerVector()) NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0); return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0); } @@ -2412,13 +2476,22 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF; } + unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); + + if (MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { + // An optimization where the result is the CTLZ after the left shift by + // (Difference in widety and current ty), that is, + // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy)) + // Result = ctlz MIBSrc + MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc, + MIRBuilder.buildConstant(WideTy, SizeDiff)); + } + // Perform the operation at the larger size. auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc}); // This is already the correct result for CTPOP and CTTZs - if (MI.getOpcode() == TargetOpcode::G_CTLZ || - MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { + if (MI.getOpcode() == TargetOpcode::G_CTLZ) { // The correct result is NewOp - (Difference in widety and current ty). - unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); MIBNewOp = MIRBuilder.buildSub( WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)); } @@ -2468,6 +2541,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; } case TargetOpcode::G_FREEZE: + case TargetOpcode::G_CONSTANT_FOLD_BARRIER: Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); widenScalarDst(MI, WideTy); @@ -2487,6 +2561,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_OR: case TargetOpcode::G_XOR: case TargetOpcode::G_SUB: + case TargetOpcode::G_SHUFFLE_VECTOR: // Perform operation at larger width (any extension is fines here, high bits // don't affect the result) and then truncate the result back to the // original type. @@ -2616,6 +2691,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_INTRINSIC_LRINT: + case TargetOpcode::G_INTRINSIC_LLRINT: case TargetOpcode::G_IS_FPCLASS: Observer.changingInstr(MI); @@ -2841,6 +2918,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FFLOOR: case TargetOpcode::G_FCOS: case TargetOpcode::G_FSIN: + case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: @@ -2957,7 +3041,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_VECREDUCE_FMIN: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMINIMUM: - case TargetOpcode::G_VECREDUCE_FMAXIMUM: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: { if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -2971,6 +3055,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_VSCALE: { + MachineOperand &SrcMO = MI.getOperand(1); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); + const APInt &SrcVal = SrcMO.getCImm()->getValue(); + // The CImm is always a signed value + const APInt Val = SrcVal.sext(WideTy.getSizeInBits()); + Observer.changingInstr(MI); + SrcMO.setCImm(ConstantInt::get(Ctx, Val)); + widenScalarDst(MI, WideTy); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_SPLAT_VECTOR: { + if (TypeIdx != 1) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + Observer.changedInstr(MI); + return Legalized; + } + } } static void getUnmergePieces(SmallVectorImpl<Register> &Pieces, @@ -2980,27 +3086,45 @@ static void getUnmergePieces(SmallVectorImpl<Register> &Pieces, Pieces.push_back(Unmerge.getReg(I)); } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerFConstant(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - +static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, + MachineIRBuilder &MIRBuilder) { + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); MachineFunction &MF = MIRBuilder.getMF(); const DataLayout &DL = MIRBuilder.getDataLayout(); - unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); - Align Alignment = Align(DL.getABITypeAlign( - getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst)))); + LLT DstLLT = MRI.getType(DstReg); + + Align Alignment(DL.getABITypeAlign(ConstVal->getType())); auto Addr = MIRBuilder.buildConstantPool( - AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex( - MI.getOperand(1).getFPImm(), Alignment)); + AddrPtrTy, + MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment)); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - MRI.getType(Dst), Alignment); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), + MachineMemOperand::MOLoad, DstLLT, Alignment); + + MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerConstant(MachineInstr &MI) { + const MachineOperand &ConstOperand = MI.getOperand(1); + const Constant *ConstantVal = ConstOperand.getCImm(); + + emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder); + MI.eraseFromParent(); + + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFConstant(MachineInstr &MI) { + const MachineOperand &ConstOperand = MI.getOperand(1); + const Constant *ConstantVal = ConstOperand.getFPImm(); - MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO); + emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder); MI.eraseFromParent(); return Legalized; @@ -3294,6 +3418,54 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, return UnableToLegalize; } +// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly +// those that have smaller than legal operands. +// +// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8> +// +// ===> +// +// s32 = G_BITCAST <4 x s8> +// s32 = G_BITCAST <4 x s8> +// s32 = G_BITCAST <4 x s8> +// s32 = G_BITCAST <4 x s8> +// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32 +// <16 x s8> = G_BITCAST <4 x s32> +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + // Convert it to CONCAT instruction + auto ConcatMI = dyn_cast<GConcatVectors>(&MI); + if (!ConcatMI) { + return UnableToLegalize; + } + + // Check if bitcast is Legal + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); + LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits()); + + // Check if the build vector is Legal + if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) { + return UnableToLegalize; + } + + // Bitcast the sources + SmallVector<Register> BitcastRegs; + for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) { + BitcastRegs.push_back( + MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i)) + .getReg(0)); + } + + // Build the scalar values into a vector + Register BuildReg = + MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0); + MIRBuilder.buildBitcast(DstReg, BuildReg); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT Register DstReg = LoadMI.getDstReg(); @@ -3545,6 +3717,9 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { Observer.changingInstr(MI); bitcastDst(MI, CastTy, 0); MMO.setType(CastTy); + // The range metadata is no longer valid when reinterpreted as a different + // type. + MMO.clearRanges(); Observer.changedInstr(MI); return Legalized; } @@ -3595,6 +3770,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { return bitcastExtractVectorElt(MI, TypeIdx, CastTy); case TargetOpcode::G_INSERT_VECTOR_ELT: return bitcastInsertVectorElt(MI, TypeIdx, CastTy); + case TargetOpcode::G_CONCAT_VECTORS: + return bitcastConcatVector(MI, TypeIdx, CastTy); default: return UnableToLegalize; } @@ -3714,9 +3891,11 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs(); - MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, + Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes); + MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal, **MI.memoperands_begin()); - MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal); + MIRBuilder.buildCopy(OldValRes, NewOldValRes); MI.eraseFromParent(); return Legalized; } @@ -3735,8 +3914,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_UADDO: { auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs(); - MIRBuilder.buildAdd(Res, LHS, RHS); - MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); + Register NewRes = MRI.cloneVirtualRegister(Res); + + MIRBuilder.buildAdd(NewRes, LHS, RHS); + MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS); + + MIRBuilder.buildCopy(Res, NewRes); MI.eraseFromParent(); return Legalized; @@ -3746,6 +3929,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { const LLT CondTy = MRI.getType(CarryOut); const LLT Ty = MRI.getType(Res); + Register NewRes = MRI.cloneVirtualRegister(Res); + // Initial add of the two operands. auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS); @@ -3754,15 +3939,18 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { // Add the sum and the carry. auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn); - MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); + MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn); // Second check for carry. We can only carry if the initial sum is all 1s // and the carry is set, resulting in a new sum of 0. auto Zero = MIRBuilder.buildConstant(Ty, 0); - auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero); + auto ResEqZero = + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero); auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn); MIRBuilder.buildOr(CarryOut, Carry, Carry2); + MIRBuilder.buildCopy(Res, NewRes); + MI.eraseFromParent(); return Legalized; } @@ -3818,6 +4006,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_UMIN: case G_UMAX: return lowerMinMax(MI); + case G_SCMP: + case G_UCMP: + return lowerThreewayCompare(MI); case G_FCOPYSIGN: return lowerFCopySign(MI); case G_FMINNUM: @@ -3846,6 +4037,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerExtractInsertVectorElt(MI); case G_SHUFFLE_VECTOR: return lowerShuffleVector(MI); + case G_VECTOR_COMPRESS: + return lowerVECTOR_COMPRESS(MI); case G_DYN_STACKALLOC: return lowerDynStackAlloc(MI); case G_STACKSAVE: @@ -3945,14 +4138,18 @@ LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); } -static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, - LLT VecTy) { - int64_t IdxVal; - if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) - return IdxReg; - +static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, + LLT VecTy) { LLT IdxTy = B.getMRI()->getType(IdxReg); unsigned NElts = VecTy.getNumElements(); + + int64_t IdxVal; + if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) { + if (IdxVal < VecTy.getNumElements()) + return IdxReg; + // If a constant index would be out of bounds, clamp it as well. + } + if (isPowerOf2_32(NElts)) { APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); @@ -3971,9 +4168,16 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, assert(EltSize * 8 == EltTy.getSizeInBits() && "Converting bits to bytes lost precision"); - Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); + Index = clampVectorIndex(MIRBuilder, Index, VecTy); + + // Convert index to the correct size for the address space. + const DataLayout &DL = MIRBuilder.getDataLayout(); + unsigned AS = MRI.getType(VecPtr).getAddressSpace(); + unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8; + LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits); + if (IdxTy != MRI.getType(Index)) + Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0); - LLT IdxTy = MRI.getType(Index); auto Mul = MIRBuilder.buildMul(IdxTy, Index, MIRBuilder.buildConstant(IdxTy, EltSize)); @@ -4422,7 +4626,7 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, LLT ValTy = MRI.getType(ValReg); // FIXME: Do we need a distinct NarrowMemory legalize action? - if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) { + if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) { LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n"); return UnableToLegalize; } @@ -4545,6 +4749,13 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_INTRINSIC_TRUNC: case G_FCOS: case G_FSIN: + case G_FTAN: + case G_FACOS: + case G_FASIN: + case G_FATAN: + case G_FCOSH: + case G_FSINH: + case G_FTANH: case G_FSQRT: case G_BSWAP: case G_BITREVERSE: @@ -4651,11 +4862,46 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy); case G_FPOWI: return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/}); + case G_BITCAST: + return fewerElementsBitcast(MI, TypeIdx, NarrowTy); + case G_INTRINSIC_FPTRUNC_ROUND: + return fewerElementsVectorMultiEltType(GMI, NumElts, {2}); default: return UnableToLegalize; } } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx, + LLT NarrowTy) { + assert(MI.getOpcode() == TargetOpcode::G_BITCAST && + "Not a bitcast operation"); + + if (TypeIdx != 0) + return UnableToLegalize; + + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); + + unsigned SrcScalSize = SrcTy.getScalarSizeInBits(); + LLT SrcNarrowTy = + LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize); + + // Split the Src and Dst Reg into smaller registers + SmallVector<Register> SrcVRegs, BitcastVRegs; + if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy) + return UnableToLegalize; + + // Build new smaller bitcast instructions + // Not supporting Leftover types for now but will have to + for (unsigned i = 0; i < SrcVRegs.size(); i++) + BitcastVRegs.push_back( + MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0)); + + MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); @@ -5157,6 +5403,43 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce( + unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) { + assert(Ty.isScalar() && "Expected scalar type to make neutral element for"); + + switch (Opcode) { + default: + llvm_unreachable( + "getNeutralElementForVecReduce called with invalid opcode!"); + case TargetOpcode::G_VECREDUCE_ADD: + case TargetOpcode::G_VECREDUCE_OR: + case TargetOpcode::G_VECREDUCE_XOR: + case TargetOpcode::G_VECREDUCE_UMAX: + return MIRBuilder.buildConstant(Ty, 0); + case TargetOpcode::G_VECREDUCE_MUL: + return MIRBuilder.buildConstant(Ty, 1); + case TargetOpcode::G_VECREDUCE_AND: + case TargetOpcode::G_VECREDUCE_UMIN: + return MIRBuilder.buildConstant( + Ty, APInt::getAllOnes(Ty.getScalarSizeInBits())); + case TargetOpcode::G_VECREDUCE_SMAX: + return MIRBuilder.buildConstant( + Ty, APInt::getSignedMinValue(Ty.getSizeInBits())); + case TargetOpcode::G_VECREDUCE_SMIN: + return MIRBuilder.buildConstant( + Ty, APInt::getSignedMaxValue(Ty.getSizeInBits())); + case TargetOpcode::G_VECREDUCE_FADD: + return MIRBuilder.buildFConstant(Ty, -0.0); + case TargetOpcode::G_VECREDUCE_FMUL: + return MIRBuilder.buildFConstant(Ty, 1.0); + case TargetOpcode::G_VECREDUCE_FMINIMUM: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + assert(false && "getNeutralElementForVecReduce unimplemented for " + "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!"); + } + llvm_unreachable("switch expected to return!"); +} + LegalizerHelper::LegalizeResult LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy) { @@ -5188,6 +5471,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: + case TargetOpcode::G_FCOPYSIGN: case TargetOpcode::G_UADDSAT: case TargetOpcode::G_USUBSAT: case TargetOpcode::G_SADDSAT: @@ -5251,6 +5535,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_BSWAP: case TargetOpcode::G_FCANONICALIZE: case TargetOpcode::G_SEXT_INREG: + case TargetOpcode::G_ABS: if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -5305,6 +5590,9 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: case TargetOpcode::G_TRUNC: case TargetOpcode::G_FPTRUNC: case TargetOpcode::G_FPEXT: @@ -5312,28 +5600,92 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_FPTOUI: case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: { - if (TypeIdx != 0) - return UnableToLegalize; Observer.changingInstr(MI); - LLT SrcTy = LLT::fixed_vector( - MoreTy.getNumElements(), - MRI.getType(MI.getOperand(1).getReg()).getElementType()); - moreElementsVectorSrc(MI, SrcTy, 1); - moreElementsVectorDst(MI, MoreTy, 0); + LLT SrcExtTy; + LLT DstExtTy; + if (TypeIdx == 0) { + DstExtTy = MoreTy; + SrcExtTy = LLT::fixed_vector( + MoreTy.getNumElements(), + MRI.getType(MI.getOperand(1).getReg()).getElementType()); + } else { + DstExtTy = LLT::fixed_vector( + MoreTy.getNumElements(), + MRI.getType(MI.getOperand(0).getReg()).getElementType()); + SrcExtTy = MoreTy; + } + moreElementsVectorSrc(MI, SrcExtTy, 1); + moreElementsVectorDst(MI, DstExtTy, 0); Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_ICMP: { - // TODO: the symmetric MoreTy works for targets like, e.g. NEON. - // For targets, like e.g. MVE, the result is a predicated vector (i1). - // This will need some refactoring. + case TargetOpcode::G_ICMP: + case TargetOpcode::G_FCMP: { + if (TypeIdx != 1) + return UnableToLegalize; + Observer.changingInstr(MI); moreElementsVectorSrc(MI, MoreTy, 2); moreElementsVectorSrc(MI, MoreTy, 3); + LLT CondTy = LLT::fixed_vector( + MoreTy.getNumElements(), + MRI.getType(MI.getOperand(0).getReg()).getElementType()); + moreElementsVectorDst(MI, CondTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_BITCAST: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements(); + if (coefficient % DstTy.getNumElements() != 0) + return UnableToLegalize; + + coefficient = coefficient / DstTy.getNumElements(); + + LLT NewTy = SrcTy.changeElementCount( + ElementCount::get(coefficient, MoreTy.isScalable())); + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, NewTy, 1); moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMUL: + case TargetOpcode::G_VECREDUCE_ADD: + case TargetOpcode::G_VECREDUCE_MUL: + case TargetOpcode::G_VECREDUCE_AND: + case TargetOpcode::G_VECREDUCE_OR: + case TargetOpcode::G_VECREDUCE_XOR: + case TargetOpcode::G_VECREDUCE_SMAX: + case TargetOpcode::G_VECREDUCE_SMIN: + case TargetOpcode::G_VECREDUCE_UMAX: + case TargetOpcode::G_VECREDUCE_UMIN: { + LLT OrigTy = MRI.getType(MI.getOperand(1).getReg()); + MachineOperand &MO = MI.getOperand(1); + auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO); + auto NeutralElement = getNeutralElementForVecReduce( + MI.getOpcode(), MIRBuilder, MoreTy.getElementType()); + + LLT IdxTy(TLI.getVectorIdxTy(MIRBuilder.getDataLayout())); + for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements(); + i != e; i++) { + auto Idx = MIRBuilder.buildConstant(IdxTy, i); + NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec, + NeutralElement, Idx); + } + + Observer.changingInstr(MI); + MO.setReg(NewVec.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + default: return UnableToLegalize; } @@ -6189,12 +6541,26 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks. auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01))); - auto ResTmp = B.buildMul(Ty, B8Count, MulMask); // Shift count result from 8 high bits to low bits. auto C_SizeM8 = B.buildConstant(Ty, Size - 8); - B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); + auto IsMulSupported = [this](const LLT Ty) { + auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action; + return Action == Legal || Action == WidenScalar || Action == Custom; + }; + if (IsMulSupported(Ty)) { + auto ResTmp = B.buildMul(Ty, B8Count, MulMask); + B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); + } else { + auto ResTmp = B8Count; + for (unsigned Shift = 8; Shift < Size; Shift *= 2) { + auto ShiftC = B.buildConstant(Ty, Shift); + auto Shl = B.buildShl(Ty, ResTmp, ShiftC); + ResTmp = B.buildAdd(Ty, ResTmp, Shl); + } + B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); + } MI.eraseFromParent(); return Legalized; } @@ -6868,8 +7234,6 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { return UnableToLegalize; } -// TODO: If RHS is a constant SelectionDAGBuilder expands this into a -// multiplication tree. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { auto [Dst, Src0, Src1] = MI.getFirst3Regs(); LLT Ty = MRI.getType(Dst); @@ -6909,6 +7273,36 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { } LegalizerHelper::LegalizeResult +LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) { + GSUCmp *Cmp = cast<GSUCmp>(&MI); + + Register Dst = Cmp->getReg(0); + LLT DstTy = MRI.getType(Dst); + LLT CmpTy = DstTy.changeElementSize(1); + + CmpInst::Predicate LTPredicate = Cmp->isSigned() + ? CmpInst::Predicate::ICMP_SLT + : CmpInst::Predicate::ICMP_ULT; + CmpInst::Predicate GTPredicate = Cmp->isSigned() + ? CmpInst::Predicate::ICMP_SGT + : CmpInst::Predicate::ICMP_UGT; + + auto One = MIRBuilder.buildConstant(DstTy, 1); + auto Zero = MIRBuilder.buildConstant(DstTy, 0); + auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(), + Cmp->getRHSReg()); + auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero); + + auto MinusOne = MIRBuilder.buildConstant(DstTy, -1); + auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(), + Cmp->getRHSReg()); + MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFCopySign(MachineInstr &MI) { auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs(); const int Src0Size = Src0Ty.getScalarSizeInBits(); @@ -6940,6 +7334,10 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) { // constants are a nan and -0.0, but the final result should preserve // everything. unsigned Flags = MI.getFlags(); + + // We masked the sign bit and the not-sign bit, so these are disjoint. + Flags |= MachineInstr::Disjoint; + MIRBuilder.buildOr(Dst, And0, And1, Flags); MI.eraseFromParent(); @@ -7236,6 +7634,93 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr &MI) { + auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] = + MI.getFirst4RegLLTs(); + + if (VecTy.isScalableVector()) + report_fatal_error("Cannot expand masked_compress for scalable vectors."); + + Align VecAlign = getStackTemporaryAlignment(VecTy); + MachinePointerInfo PtrInfo; + Register StackPtr = + createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, + PtrInfo) + .getReg(0); + MachinePointerInfo ValPtrInfo = + MachinePointerInfo::getUnknownStack(*MI.getMF()); + + LLT IdxTy = LLT::scalar(32); + LLT ValTy = VecTy.getElementType(); + Align ValAlign = getStackTemporaryAlignment(ValTy); + + auto OutPos = MIRBuilder.buildConstant(IdxTy, 0); + + bool HasPassthru = + MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF; + + if (HasPassthru) + MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign); + + Register LastWriteVal; + std::optional<APInt> PassthruSplatVal = + isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI); + + if (PassthruSplatVal.has_value()) { + LastWriteVal = + MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0); + } else if (HasPassthru) { + auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask); + Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD, + {LLT::scalar(32)}, {Popcount}); + + Register LastElmtPtr = + getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0)); + LastWriteVal = + MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign) + .getReg(0); + } + + unsigned NumElmts = VecTy.getNumElements(); + for (unsigned I = 0; I < NumElmts; ++I) { + auto Idx = MIRBuilder.buildConstant(IdxTy, I); + auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx); + Register ElmtPtr = + getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0)); + MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign); + + LLT MaskITy = MaskTy.getElementType(); + auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx); + if (MaskITy.getSizeInBits() > 1) + MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI); + + MaskI = MIRBuilder.buildZExt(IdxTy, MaskI); + OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI); + + if (HasPassthru && I == NumElmts - 1) { + auto EndOfVector = + MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1); + auto AllLanesSelected = MIRBuilder.buildICmp( + CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector); + OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy}, + {OutPos, EndOfVector}); + ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0)); + + LastWriteVal = + MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal) + .getReg(0); + MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign); + } + } + + // TODO: Use StackPtr's FrameIndex alignment. + MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign); + + MI.eraseFromParent(); + return Legalized; +} + Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, @@ -7457,10 +7942,12 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { LLT Ty = Dst0Ty; LLT BoolTy = Dst1Ty; + Register NewDst0 = MRI.cloneVirtualRegister(Dst0); + if (IsAdd) - MIRBuilder.buildAdd(Dst0, LHS, RHS); + MIRBuilder.buildAdd(NewDst0, LHS, RHS); else - MIRBuilder.buildSub(Dst0, LHS, RHS); + MIRBuilder.buildSub(NewDst0, LHS, RHS); // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. @@ -7473,12 +7960,15 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { // (LHS) if and only if the other operand (RHS) is (non-zero) positive, // otherwise there will be overflow. auto ResultLowerThanLHS = - MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); + MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS); auto ConditionRHS = MIRBuilder.buildICmp( IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); + + MIRBuilder.buildCopy(Dst0, NewDst0); MI.eraseFromParent(); + return Legalized; } @@ -7690,7 +8180,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) { //{ (Src & Mask) >> N } | { (Src << N) & Mask } static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, - MachineInstrBuilder Src, APInt Mask) { + MachineInstrBuilder Src, const APInt &Mask) { const LLT Ty = Dst.getLLTTy(*B.getMRI()); MachineInstrBuilder C_N = B.buildConstant(Ty, N); MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask); @@ -7703,27 +8193,51 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitreverse(MachineInstr &MI) { auto [Dst, Src] = MI.getFirst2Regs(); const LLT Ty = MRI.getType(Src); - unsigned Size = Ty.getSizeInBits(); - - MachineInstrBuilder BSWAP = - MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src}); - - // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654 - // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4] - // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0] - MachineInstrBuilder Swap4 = - SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0))); - - // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76 - // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2] - // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC] - MachineInstrBuilder Swap2 = - SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC))); + unsigned Size = Ty.getScalarSizeInBits(); + + if (Size >= 8) { + MachineInstrBuilder BSWAP = + MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src}); + + // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654 + // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4] + // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0] + MachineInstrBuilder Swap4 = + SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0))); + + // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76 + // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2] + // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC] + MachineInstrBuilder Swap2 = + SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC))); + + // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 + // 6|7 + // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1] + // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA] + SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); + } else { + // Expand bitreverse for types smaller than 8 bits. + MachineInstrBuilder Tmp; + for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) { + MachineInstrBuilder Tmp2; + if (I < J) { + auto ShAmt = MIRBuilder.buildConstant(Ty, J - I); + Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt); + } else { + auto ShAmt = MIRBuilder.buildConstant(Ty, I - J); + Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt); + } - // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7 - // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1] - // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA] - SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); + auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J); + Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask); + if (I == 0) + Tmp = Tmp2; + else + Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2); + } + MIRBuilder.buildCopy(Dst, Tmp); + } MI.eraseFromParent(); return Legalized; @@ -7952,13 +8466,11 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { } LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { - // Implement vector G_SELECT in terms of XOR, AND, OR. + // Implement G_SELECT in terms of XOR, AND, OR. auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] = MI.getFirst4RegLLTs(); - if (!DstTy.isVector()) - return UnableToLegalize; - bool IsEltPtr = DstTy.getElementType().isPointer(); + bool IsEltPtr = DstTy.isPointerOrPointerVector(); if (IsEltPtr) { LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits()); LLT NewTy = DstTy.changeElementType(ScalarPtrTy); @@ -7968,7 +8480,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { } if (MaskTy.isScalar()) { - // Turn the scalar condition into a vector condition mask. + // Turn the scalar condition into a vector condition mask if needed. Register MaskElt = MaskReg; @@ -7978,13 +8490,20 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0); // Continue the sign extension (or truncate) to match the data type. - MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(), - MaskElt).getReg(0); + MaskElt = + MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0); - // Generate a vector splat idiom. - auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); - MaskReg = ShufSplat.getReg(0); + if (DstTy.isVector()) { + // Generate a vector splat idiom. + auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); + MaskReg = ShufSplat.getReg(0); + } else { + MaskReg = MaskElt; + } MaskTy = DstTy; + } else if (!DstTy.isVector()) { + // Cannot handle the case that mask is a vector and dst is a scalar. + return UnableToLegalize; } if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) { @@ -8045,9 +8564,22 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) { // %res = G_SMAX %a, %v2 Register SrcReg = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(SrcReg); + auto Zero = MIRBuilder.buildConstant(Ty, 0); + auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg); + MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) { + Register SrcReg = MI.getOperand(1).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1); auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0); auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0); - MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub); + auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero); + MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub); MI.eraseFromParent(); return Legalized; } @@ -8071,8 +8603,6 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) { return UnableToLegalize; } -static Type *getTypeForLLT(LLT Ty, LLVMContext &C); - LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) { MachineFunction &MF = *MI.getMF(); const DataLayout &DL = MIRBuilder.getDataLayout(); @@ -8196,13 +8726,6 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, return true; } -static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { - if (Ty.isVector()) - return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), - Ty.getNumElements()); - return IntegerType::get(C, Ty.getSizeInBits()); -} - // Get a vectorized representation of the memset value operand, GISel edition. static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { MachineRegisterInfo &MRI = *MIB.getMRI(); @@ -8231,7 +8754,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { // For vector types create a G_BUILD_VECTOR. if (Ty.isVector()) - Val = MIB.buildSplatVector(Ty, Val).getReg(0); + Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0); return Val; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index de9931d1c240..c9ee35373cd4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -13,11 +13,11 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/Debug.h" @@ -154,7 +154,8 @@ static bool mutationIsSane(const LegalizeRule &Rule, case WidenScalar: { if (OldTy.isVector()) { // Number of elements should not change. - if (!NewTy.isVector() || OldTy.getNumElements() != NewTy.getNumElements()) + if (!NewTy.isVector() || + OldTy.getElementCount() != NewTy.getElementCount()) return false; } else { // Both types must be vectors diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index ee499c41c558..0d0c093648eb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -116,8 +116,8 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid()) return false; - int64_t Size1 = LdSt1->getMemSize(); - int64_t Size2 = LdSt2->getMemSize(); + LocationSize Size1 = LdSt1->getMemSize(); + LocationSize Size2 = LdSt2->getMemSize(); int64_t PtrDiff; if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() && @@ -128,20 +128,18 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, // vector objects on the stack. // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the // following situations arise: - if (PtrDiff >= 0 && - Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { + if (PtrDiff >= 0 && Size1.hasValue() && !Size1.isScalable()) { // [----BasePtr0----] // [---BasePtr1--] // ========PtrDiff========> - IsAlias = !(Size1 <= PtrDiff); + IsAlias = !((int64_t)Size1.getValue() <= PtrDiff); return true; } - if (PtrDiff < 0 && - Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { + if (PtrDiff < 0 && Size2.hasValue() && !Size2.isScalable()) { // [----BasePtr0----] // [---BasePtr1--] // =====(-PtrDiff)====> - IsAlias = !((PtrDiff + Size2) <= 0); + IsAlias = !((PtrDiff + (int64_t)Size2.getValue()) <= 0); return true; } return false; @@ -196,7 +194,7 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI, bool IsAtomic; Register BasePtr; int64_t Offset; - uint64_t NumBytes; + LocationSize NumBytes; MachineMemOperand *MMO; }; @@ -212,16 +210,17 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI, Offset = 0; } - uint64_t Size = MemoryLocation::getSizeOrUnknown( - LS->getMMO().getMemoryType().getSizeInBytes()); - return {LS->isVolatile(), LS->isAtomic(), BaseReg, - Offset /*base offset*/, Size, &LS->getMMO()}; + LocationSize Size = LS->getMMO().getSize(); + return {LS->isVolatile(), LS->isAtomic(), BaseReg, + Offset /*base offset*/, Size, &LS->getMMO()}; } // FIXME: support recognizing lifetime instructions. // Default. return {false /*isvolatile*/, - /*isAtomic*/ false, Register(), - (int64_t)0 /*offset*/, 0 /*size*/, + /*isAtomic*/ false, + Register(), + (int64_t)0 /*offset*/, + LocationSize::beforeOrAfterPointer() /*size*/, (MachineMemOperand *)nullptr}; }; MemUseCharacteristics MUC0 = getCharacteristics(&MI), @@ -249,10 +248,20 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI, return false; } + // If NumBytes is scalable and offset is not 0, conservatively return may + // alias + if ((MUC0.NumBytes.isScalable() && MUC0.Offset != 0) || + (MUC1.NumBytes.isScalable() && MUC1.Offset != 0)) + return true; + + const bool BothNotScalable = + !MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable(); + // Try to prove that there is aliasing, or that there is no aliasing. Either // way, we can return now. If nothing can be proved, proceed with more tests. bool IsAlias; - if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) + if (BothNotScalable && + GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) return IsAlias; // The following all rely on MMO0 and MMO1 being valid. @@ -262,19 +271,24 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI, // FIXME: port the alignment based alias analysis from SDAG's isAlias(). int64_t SrcValOffset0 = MUC0.MMO->getOffset(); int64_t SrcValOffset1 = MUC1.MMO->getOffset(); - uint64_t Size0 = MUC0.NumBytes; - uint64_t Size1 = MUC1.NumBytes; - if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && - Size0 != MemoryLocation::UnknownSize && - Size1 != MemoryLocation::UnknownSize) { + LocationSize Size0 = MUC0.NumBytes; + LocationSize Size1 = MUC1.NumBytes; + if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0.hasValue() && + Size1.hasValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset; - int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset; - if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0, - MUC0.MMO->getAAInfo()), - MemoryLocation(MUC1.MMO->getValue(), Overlap1, - MUC1.MMO->getAAInfo()))) + int64_t Overlap0 = + Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset; + int64_t Overlap1 = + Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset; + LocationSize Loc0 = + Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0); + LocationSize Loc1 = + Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1); + + if (AA->isNoAlias( + MemoryLocation(MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()), + MemoryLocation(MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo()))) return false; } @@ -304,7 +318,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { assert(MRI->getType(StoreMI->getValueReg()) == OrigTy); #endif - const auto &DL = MF->getFunction().getParent()->getDataLayout(); + const auto &DL = MF->getFunction().getDataLayout(); bool AnyMerged = false; do { unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size()); @@ -941,7 +955,7 @@ void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) { // Need to reserve at least MaxStoreSizeToForm + 1 bits. BitVector LegalSizes(MaxStoreSizeToForm * 2); const auto &LI = *MF->getSubtarget().getLegalizerInfo(); - const auto &DL = MF->getFunction().getParent()->getDataLayout(); + const auto &DL = MF->getFunction().getDataLayout(); Type *IRPtrTy = PointerType::get(MF->getFunction().getContext(), AddrSpace); LLT PtrTy = getLLTForType(*IRPtrTy, DL); // We assume that we're not going to be generating any stores wider than diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index a5827c26c04f..7eb6cd4e0d79 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -28,6 +28,7 @@ void MachineIRBuilder::setMF(MachineFunction &MF) { State.TII = MF.getSubtarget().getInstrInfo(); State.DL = DebugLoc(); State.PCSections = nullptr; + State.MMRA = nullptr; State.II = MachineBasicBlock::iterator(); State.Observer = nullptr; } @@ -37,7 +38,8 @@ void MachineIRBuilder::setMF(MachineFunction &MF) { //------------------------------------------------------------------------------ MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) { - return BuildMI(getMF(), {getDL(), getPCSections()}, getTII().get(Opcode)); + return BuildMI(getMF(), {getDL(), getPCSections(), getMMRAMetadata()}, + getTII().get(Opcode)); } MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) { @@ -199,7 +201,7 @@ void MachineIRBuilder::validateShiftOp(const LLT Res, const LLT Op0, MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional<unsigned> Flags) { - assert(Res.getLLTTy(*getMRI()).getScalarType().isPointer() && + assert(Res.getLLTTy(*getMRI()).isPointerOrPointerVector() && Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type"); @@ -269,14 +271,19 @@ MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res, LLT ResTy = Res.getLLTTy(*getMRI()); LLT Op0Ty = Op0.getLLTTy(*getMRI()); - assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); - assert((ResTy.getElementType() == Op0Ty.getElementType()) && + assert(Op0Ty.isVector() && "Non vector type"); + assert(((ResTy.isScalar() && (ResTy == Op0Ty.getElementType())) || + (ResTy.isVector() && + (ResTy.getElementType() == Op0Ty.getElementType()))) && "Different vector element types"); - assert((ResTy.getNumElements() < Op0Ty.getNumElements()) && - "Op0 has fewer elements"); + assert( + (ResTy.isScalar() || (ResTy.getNumElements() < Op0Ty.getNumElements())) && + "Op0 has fewer elements"); - SmallVector<Register, 8> Regs; auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + if (ResTy.isScalar()) + return buildCopy(Res, Unmerge.getReg(0)); + SmallVector<Register, 8> Regs; for (unsigned i = 0; i < ResTy.getNumElements(); ++i) Regs.push_back(Unmerge.getReg(i)); return buildMergeLikeInstr(Res, Regs); @@ -321,7 +328,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, auto Const = buildInstr(TargetOpcode::G_CONSTANT) .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addCImm(&Val); - return buildSplatVector(Res, Const); + return buildSplatBuildVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_CONSTANT); @@ -358,7 +365,7 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, .addDef(getMRI()->createGenericVirtualRegister(EltTy)) .addFPImm(&Val); - return buildSplatVector(Res, Const); + return buildSplatBuildVector(Res, Const); } auto Const = buildInstr(TargetOpcode::G_FCONSTANT); @@ -390,6 +397,19 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, return buildFConstant(Res, *CFP); } +MachineInstrBuilder +MachineIRBuilder::buildConstantPtrAuth(const DstOp &Res, + const ConstantPtrAuth *CPA, + Register Addr, Register AddrDisc) { + auto MIB = buildInstr(TargetOpcode::G_PTRAUTH_GLOBAL_VALUE); + Res.addDefToMIB(*getMRI(), MIB); + MIB.addUse(Addr); + MIB.addImm(CPA->getKey()->getZExtValue()); + MIB.addUse(AddrDisc); + MIB.addImm(CPA->getDiscriminator()->getZExtValue()); + return MIB; +} + MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst, MachineBasicBlock &Dest) { assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type"); @@ -483,8 +503,9 @@ MachineInstrBuilder MachineIRBuilder::buildSExt(const DstOp &Res, } MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res, - const SrcOp &Op) { - return buildInstr(TargetOpcode::G_ZEXT, Res, Op); + const SrcOp &Op, + std::optional<unsigned> Flags) { + return buildInstr(TargetOpcode::G_ZEXT, Res, Op, Flags); } unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const { @@ -706,8 +727,8 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } -MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, - const SrcOp &Src) { +MachineInstrBuilder MachineIRBuilder::buildSplatBuildVector(const DstOp &Res, + const SrcOp &Src) { SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } @@ -737,6 +758,13 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); } +MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, + const SrcOp &Src) { + assert(Src.getLLTTy(*getMRI()) == Res.getLLTTy(*getMRI()).getElementType() && + "Expected Src to match Dst elt ty"); + return buildInstr(TargetOpcode::G_SPLAT_VECTOR, Res, Src); +} + MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, @@ -781,6 +809,31 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res, return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)}); } +MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res, + unsigned MinElts) { + + auto IntN = IntegerType::get(getMF().getFunction().getContext(), + Res.getLLTTy(*getMRI()).getScalarSizeInBits()); + ConstantInt *CI = ConstantInt::get(IntN, MinElts); + return buildVScale(Res, *CI); +} + +MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res, + const ConstantInt &MinElts) { + auto VScale = buildInstr(TargetOpcode::G_VSCALE); + VScale->setDebugLoc(DebugLoc()); + Res.addDefToMIB(*getMRI(), VScale); + VScale.addCImm(&MinElts); + return VScale; +} + +MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res, + const APInt &MinElts) { + ConstantInt *CI = + ConstantInt::get(getMF().getFunction().getContext(), MinElts); + return buildVScale(Res, *CI); +} + static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) { if (HasSideEffects && IsConvergent) return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS; @@ -830,9 +883,10 @@ MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, return buildIntrinsic(ID, Results, HasSideEffects, isConvergent); } -MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res, - const SrcOp &Op) { - return buildInstr(TargetOpcode::G_TRUNC, Res, Op); +MachineInstrBuilder +MachineIRBuilder::buildTrunc(const DstOp &Res, const SrcOp &Op, + std::optional<unsigned> Flags) { + return buildInstr(TargetOpcode::G_TRUNC, Res, Op, Flags); } MachineInstrBuilder @@ -857,6 +911,18 @@ MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags); } +MachineInstrBuilder MachineIRBuilder::buildSCmp(const DstOp &Res, + const SrcOp &Op0, + const SrcOp &Op1) { + return buildInstr(TargetOpcode::G_SCMP, Res, {Op0, Op1}); +} + +MachineInstrBuilder MachineIRBuilder::buildUCmp(const DstOp &Res, + const SrcOp &Op0, + const SrcOp &Op1) { + return buildInstr(TargetOpcode::G_UCMP, Res, {Op0, Op1}); +} + MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, @@ -865,6 +931,21 @@ MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst, return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags); } +MachineInstrBuilder MachineIRBuilder::buildInsertSubvector(const DstOp &Res, + const SrcOp &Src0, + const SrcOp &Src1, + unsigned Idx) { + return buildInstr(TargetOpcode::G_INSERT_SUBVECTOR, Res, + {Src0, Src1, uint64_t(Idx)}); +} + +MachineInstrBuilder MachineIRBuilder::buildExtractSubvector(const DstOp &Res, + const SrcOp &Src, + unsigned Idx) { + return buildInstr(TargetOpcode::G_INSERT_SUBVECTOR, Res, + {Src, uint64_t(Idx)}); +} + MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx) { @@ -878,14 +959,14 @@ MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, } MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess( - Register OldValRes, Register SuccessRes, Register Addr, Register CmpVal, - Register NewVal, MachineMemOperand &MMO) { + const DstOp &OldValRes, const DstOp &SuccessRes, const SrcOp &Addr, + const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO) { #ifndef NDEBUG - LLT OldValResTy = getMRI()->getType(OldValRes); - LLT SuccessResTy = getMRI()->getType(SuccessRes); - LLT AddrTy = getMRI()->getType(Addr); - LLT CmpValTy = getMRI()->getType(CmpVal); - LLT NewValTy = getMRI()->getType(NewVal); + LLT OldValResTy = OldValRes.getLLTTy(*getMRI()); + LLT SuccessResTy = SuccessRes.getLLTTy(*getMRI()); + LLT AddrTy = Addr.getLLTTy(*getMRI()); + LLT CmpValTy = CmpVal.getLLTTy(*getMRI()); + LLT NewValTy = NewVal.getLLTTy(*getMRI()); assert(OldValResTy.isScalar() && "invalid operand type"); assert(SuccessResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); @@ -895,24 +976,25 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess( assert(OldValResTy == NewValTy && "type mismatch"); #endif - return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) - .addDef(OldValRes) - .addDef(SuccessRes) - .addUse(Addr) - .addUse(CmpVal) - .addUse(NewVal) - .addMemOperand(&MMO); + auto MIB = buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS); + OldValRes.addDefToMIB(*getMRI(), MIB); + SuccessRes.addDefToMIB(*getMRI(), MIB); + Addr.addSrcToMIB(MIB); + CmpVal.addSrcToMIB(MIB); + NewVal.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; } MachineInstrBuilder -MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr, - Register CmpVal, Register NewVal, +MachineIRBuilder::buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, + const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO) { #ifndef NDEBUG - LLT OldValResTy = getMRI()->getType(OldValRes); - LLT AddrTy = getMRI()->getType(Addr); - LLT CmpValTy = getMRI()->getType(CmpVal); - LLT NewValTy = getMRI()->getType(NewVal); + LLT OldValResTy = OldValRes.getLLTTy(*getMRI()); + LLT AddrTy = Addr.getLLTTy(*getMRI()); + LLT CmpValTy = CmpVal.getLLTTy(*getMRI()); + LLT NewValTy = NewVal.getLLTTy(*getMRI()); assert(OldValResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); assert(CmpValTy.isValid() && "invalid operand type"); @@ -921,12 +1003,13 @@ MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr, assert(OldValResTy == NewValTy && "type mismatch"); #endif - return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG) - .addDef(OldValRes) - .addUse(Addr) - .addUse(CmpVal) - .addUse(NewVal) - .addMemOperand(&MMO); + auto MIB = buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG); + OldValRes.addDefToMIB(*getMRI(), MIB); + Addr.addSrcToMIB(MIB); + CmpVal.addSrcToMIB(MIB); + NewVal.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; } MachineInstrBuilder MachineIRBuilder::buildAtomicRMW( @@ -938,7 +1021,6 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicRMW( LLT OldValResTy = OldValRes.getLLTTy(*getMRI()); LLT AddrTy = Addr.getLLTTy(*getMRI()); LLT ValTy = Val.getLLTTy(*getMRI()); - assert(OldValResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); assert(ValTy.isValid() && "invalid operand type"); assert(OldValResTy == ValTy && "type mismatch"); @@ -1108,7 +1190,7 @@ void MachineIRBuilder::validateSelectOp(const LLT ResTy, const LLT TstTy, else assert((TstTy.isScalar() || (TstTy.isVector() && - TstTy.getNumElements() == Op0Ty.getNumElements())) && + TstTy.getElementCount() == Op0Ty.getElementCount())) && "type mismatch"); #endif } @@ -1224,7 +1306,7 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, return DstTy.isScalar(); else return DstTy.isVector() && - DstTy.getNumElements() == Op0Ty.getNumElements(); + DstTy.getElementCount() == Op0Ty.getElementCount(); }() && "Type Mismatch"); break; } @@ -1282,8 +1364,8 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, SrcOps[1].getLLTTy(*getMRI()) && "Type mismatch"); assert(SrcOps[2].getLLTTy(*getMRI()).isScalar() && "Invalid index"); - assert(DstOps[0].getLLTTy(*getMRI()).getNumElements() == - SrcOps[0].getLLTTy(*getMRI()).getNumElements() && + assert(DstOps[0].getLLTTy(*getMRI()).getElementCount() == + SrcOps[0].getLLTTy(*getMRI()).getElementCount() && "Type mismatch"); break; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index bb5363fb2527..e386647daa65 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -62,8 +62,8 @@ char RegBankSelect::ID = 0; INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false); -INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, @@ -85,8 +85,8 @@ void RegBankSelect::init(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); TPC = &getAnalysis<TargetPassConfig>(); if (OptMode != Mode::Fast) { - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); - MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); + MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); } else { MBFI = nullptr; MBPI = nullptr; @@ -99,8 +99,8 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { if (OptMode != Mode::Fast) { // We could preserve the information from these two analysis but // the APIs do not allow to do so yet. - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); } AU.addRequired<TargetPassConfig>(); getSelectionDAGFallbackAnalysisUsage(AU); @@ -420,7 +420,8 @@ void RegBankSelect::tryAvoidingSplit( // If the next terminator uses Reg, this means we have // to split right after MI and thus we need a way to ask // which outgoing edges are affected. - assert(!Next->readsRegister(Reg) && "Need to split between terminators"); + assert(!Next->readsRegister(Reg, /*TRI=*/nullptr) && + "Need to split between terminators"); // We will split all the edges and repair there. } else { // This is a virtual register defined by a terminator. @@ -918,19 +919,19 @@ bool RegBankSelect::InstrInsertPoint::isSplit() const { uint64_t RegBankSelect::InstrInsertPoint::frequency(const Pass &P) const { // Even if we need to split, because we insert between terminators, // this split has actually the same frequency as the instruction. - const MachineBlockFrequencyInfo *MBFI = - P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); - if (!MBFI) + const auto *MBFIWrapper = + P.getAnalysisIfAvailable<MachineBlockFrequencyInfoWrapperPass>(); + if (!MBFIWrapper) return 1; - return MBFI->getBlockFreq(Instr.getParent()).getFrequency(); + return MBFIWrapper->getMBFI().getBlockFreq(Instr.getParent()).getFrequency(); } uint64_t RegBankSelect::MBBInsertPoint::frequency(const Pass &P) const { - const MachineBlockFrequencyInfo *MBFI = - P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); - if (!MBFI) + const auto *MBFIWrapper = + P.getAnalysisIfAvailable<MachineBlockFrequencyInfoWrapperPass>(); + if (!MBFIWrapper) return 1; - return MBFI->getBlockFreq(&MBB).getFrequency(); + return MBFIWrapper->getMBFI().getBlockFreq(&MBB).getFrequency(); } void RegBankSelect::EdgeInsertPoint::materialize() { @@ -947,15 +948,18 @@ void RegBankSelect::EdgeInsertPoint::materialize() { } uint64_t RegBankSelect::EdgeInsertPoint::frequency(const Pass &P) const { - const MachineBlockFrequencyInfo *MBFI = - P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); - if (!MBFI) + const auto *MBFIWrapper = + P.getAnalysisIfAvailable<MachineBlockFrequencyInfoWrapperPass>(); + if (!MBFIWrapper) return 1; + const auto *MBFI = &MBFIWrapper->getMBFI(); if (WasMaterialized) return MBFI->getBlockFreq(DstOrSplit).getFrequency(); + auto *MBPIWrapper = + P.getAnalysisIfAvailable<MachineBranchProbabilityInfoWrapperPass>(); const MachineBranchProbabilityInfo *MBPI = - P.getAnalysisIfAvailable<MachineBranchProbabilityInfo>(); + MBPIWrapper ? &MBPIWrapper->getMBPI() : nullptr; if (!MBPI) return 1; // The basic block will be on the edge. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp index aed826a9cbc5..c906f3a7c922 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" @@ -28,6 +29,7 @@ #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" @@ -311,13 +313,22 @@ llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) { namespace { -typedef std::function<bool(const MachineInstr *)> IsOpcodeFn; -typedef std::function<std::optional<APInt>(const MachineInstr *MI)> GetAPCstFn; - -std::optional<ValueAndVReg> getConstantVRegValWithLookThrough( - Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode, - GetAPCstFn getAPCstValue, bool LookThroughInstrs = true, - bool LookThroughAnyExt = false) { +// This function is used in many places, and as such, it has some +// micro-optimizations to try and make it as fast as it can be. +// +// - We use template arguments to avoid an indirect call caused by passing a +// function_ref/std::function +// - GetAPCstValue does not return std::optional<APInt> as that's expensive. +// Instead it returns true/false and places the result in a pre-constructed +// APInt. +// +// Please change this function carefully and benchmark your changes. +template <bool (*IsConstantOpcode)(const MachineInstr *), + bool (*GetAPCstValue)(const MachineInstr *MI, APInt &)> +std::optional<ValueAndVReg> +getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, + bool LookThroughInstrs = true, + bool LookThroughAnyExt = false) { SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes; MachineInstr *MI; @@ -351,27 +362,25 @@ std::optional<ValueAndVReg> getConstantVRegValWithLookThrough( if (!MI || !IsConstantOpcode(MI)) return std::nullopt; - std::optional<APInt> MaybeVal = getAPCstValue(MI); - if (!MaybeVal) + APInt Val; + if (!GetAPCstValue(MI, Val)) return std::nullopt; - APInt &Val = *MaybeVal; - while (!SeenOpcodes.empty()) { - std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val(); - switch (OpcodeAndSize.first) { + for (auto &Pair : reverse(SeenOpcodes)) { + switch (Pair.first) { case TargetOpcode::G_TRUNC: - Val = Val.trunc(OpcodeAndSize.second); + Val = Val.trunc(Pair.second); break; case TargetOpcode::G_ANYEXT: case TargetOpcode::G_SEXT: - Val = Val.sext(OpcodeAndSize.second); + Val = Val.sext(Pair.second); break; case TargetOpcode::G_ZEXT: - Val = Val.zext(OpcodeAndSize.second); + Val = Val.zext(Pair.second); break; } } - return ValueAndVReg{Val, VReg}; + return ValueAndVReg{std::move(Val), VReg}; } bool isIConstant(const MachineInstr *MI) { @@ -393,42 +402,46 @@ bool isAnyConstant(const MachineInstr *MI) { return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT; } -std::optional<APInt> getCImmAsAPInt(const MachineInstr *MI) { +bool getCImmAsAPInt(const MachineInstr *MI, APInt &Result) { const MachineOperand &CstVal = MI->getOperand(1); - if (CstVal.isCImm()) - return CstVal.getCImm()->getValue(); - return std::nullopt; + if (!CstVal.isCImm()) + return false; + Result = CstVal.getCImm()->getValue(); + return true; } -std::optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) { +bool getCImmOrFPImmAsAPInt(const MachineInstr *MI, APInt &Result) { const MachineOperand &CstVal = MI->getOperand(1); if (CstVal.isCImm()) - return CstVal.getCImm()->getValue(); - if (CstVal.isFPImm()) - return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); - return std::nullopt; + Result = CstVal.getCImm()->getValue(); + else if (CstVal.isFPImm()) + Result = CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + else + return false; + return true; } } // end anonymous namespace std::optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { - return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant, - getCImmAsAPInt, LookThroughInstrs); + return getConstantVRegValWithLookThrough<isIConstant, getCImmAsAPInt>( + VReg, MRI, LookThroughInstrs); } std::optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, bool LookThroughAnyExt) { - return getConstantVRegValWithLookThrough( - VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs, - LookThroughAnyExt); + return getConstantVRegValWithLookThrough<isAnyConstant, + getCImmOrFPImmAsAPInt>( + VReg, MRI, LookThroughInstrs, LookThroughAnyExt); } std::optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { - auto Reg = getConstantVRegValWithLookThrough( - VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs); + auto Reg = + getConstantVRegValWithLookThrough<isFConstant, getCImmOrFPImmAsAPInt>( + VReg, MRI, LookThroughInstrs); if (!Reg) return std::nullopt; return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(), @@ -660,8 +673,11 @@ std::optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, default: break; case TargetOpcode::G_ADD: - case TargetOpcode::G_PTR_ADD: return C1 + C2; + case TargetOpcode::G_PTR_ADD: + // Types can be of different width here. + // Result needs to be the same width as C1, so trunc or sext C2. + return C1 + C2.sextOrTrunc(C1.getBitWidth()); case TargetOpcode::G_AND: return C1 & C2; case TargetOpcode::G_ASHR: @@ -817,6 +833,13 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, case TargetOpcode::G_FREM: case TargetOpcode::G_FSIN: case TargetOpcode::G_FCOS: + case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FMA: case TargetOpcode::G_FMAD: if (SNaN) @@ -964,14 +987,15 @@ llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src, } std::optional<SmallVector<unsigned>> -llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) { +llvm::ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI, + std::function<unsigned(APInt)> CB) { LLT Ty = MRI.getType(Src); SmallVector<unsigned> FoldedCTLZs; auto tryFoldScalar = [&](Register R) -> std::optional<unsigned> { auto MaybeCst = getIConstantVRegVal(R, MRI); if (!MaybeCst) return std::nullopt; - return MaybeCst->countl_zero(); + return CB(*MaybeCst); }; if (Ty.isVector()) { // Try to constant fold each element. @@ -994,6 +1018,74 @@ llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) { return std::nullopt; } +std::optional<SmallVector<APInt>> +llvm::ConstantFoldICmp(unsigned Pred, const Register Op1, const Register Op2, + const MachineRegisterInfo &MRI) { + LLT Ty = MRI.getType(Op1); + if (Ty != MRI.getType(Op2)) + return std::nullopt; + + auto TryFoldScalar = [&MRI, Pred](Register LHS, + Register RHS) -> std::optional<APInt> { + auto LHSCst = getIConstantVRegVal(LHS, MRI); + auto RHSCst = getIConstantVRegVal(RHS, MRI); + if (!LHSCst || !RHSCst) + return std::nullopt; + + switch (Pred) { + case CmpInst::Predicate::ICMP_EQ: + return APInt(/*numBits=*/1, LHSCst->eq(*RHSCst)); + case CmpInst::Predicate::ICMP_NE: + return APInt(/*numBits=*/1, LHSCst->ne(*RHSCst)); + case CmpInst::Predicate::ICMP_UGT: + return APInt(/*numBits=*/1, LHSCst->ugt(*RHSCst)); + case CmpInst::Predicate::ICMP_UGE: + return APInt(/*numBits=*/1, LHSCst->uge(*RHSCst)); + case CmpInst::Predicate::ICMP_ULT: + return APInt(/*numBits=*/1, LHSCst->ult(*RHSCst)); + case CmpInst::Predicate::ICMP_ULE: + return APInt(/*numBits=*/1, LHSCst->ule(*RHSCst)); + case CmpInst::Predicate::ICMP_SGT: + return APInt(/*numBits=*/1, LHSCst->sgt(*RHSCst)); + case CmpInst::Predicate::ICMP_SGE: + return APInt(/*numBits=*/1, LHSCst->sge(*RHSCst)); + case CmpInst::Predicate::ICMP_SLT: + return APInt(/*numBits=*/1, LHSCst->slt(*RHSCst)); + case CmpInst::Predicate::ICMP_SLE: + return APInt(/*numBits=*/1, LHSCst->sle(*RHSCst)); + default: + return std::nullopt; + } + }; + + SmallVector<APInt> FoldedICmps; + + if (Ty.isVector()) { + // Try to constant fold each element. + auto *BV1 = getOpcodeDef<GBuildVector>(Op1, MRI); + auto *BV2 = getOpcodeDef<GBuildVector>(Op2, MRI); + if (!BV1 || !BV2) + return std::nullopt; + assert(BV1->getNumSources() == BV2->getNumSources() && "Invalid vectors"); + for (unsigned I = 0; I < BV1->getNumSources(); ++I) { + if (auto MaybeFold = + TryFoldScalar(BV1->getSourceReg(I), BV2->getSourceReg(I))) { + FoldedICmps.emplace_back(*MaybeFold); + continue; + } + return std::nullopt; + } + return FoldedICmps; + } + + if (auto MaybeCst = TryFoldScalar(Op1, Op2)) { + FoldedICmps.emplace_back(*MaybeCst); + return FoldedICmps; + } + + return std::nullopt; +} + bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, GISelKnownBits *KB) { std::optional<DefinitionAndSourceRegister> DefSrcReg = @@ -1071,58 +1163,85 @@ void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { } LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { - const unsigned OrigSize = OrigTy.getSizeInBits(); - const unsigned TargetSize = TargetTy.getSizeInBits(); - - if (OrigSize == TargetSize) + if (OrigTy.getSizeInBits() == TargetTy.getSizeInBits()) return OrigTy; - if (OrigTy.isVector()) { - const LLT OrigElt = OrigTy.getElementType(); - - if (TargetTy.isVector()) { - const LLT TargetElt = TargetTy.getElementType(); + if (OrigTy.isVector() && TargetTy.isVector()) { + LLT OrigElt = OrigTy.getElementType(); + LLT TargetElt = TargetTy.getElementType(); - if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { - int GCDElts = - std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements()); - // Prefer the original element type. - ElementCount Mul = OrigTy.getElementCount() * TargetTy.getNumElements(); - return LLT::vector(Mul.divideCoefficientBy(GCDElts), - OrigTy.getElementType()); - } - } else { - if (OrigElt.getSizeInBits() == TargetSize) - return OrigTy; + // TODO: The docstring for this function says the intention is to use this + // function to build MERGE/UNMERGE instructions. It won't be the case that + // we generate a MERGE/UNMERGE between fixed and scalable vector types. We + // could implement getLCMType between the two in the future if there was a + // need, but it is not worth it now as this function should not be used in + // that way. + assert(((OrigTy.isScalableVector() && !TargetTy.isFixedVector()) || + (OrigTy.isFixedVector() && !TargetTy.isScalableVector())) && + "getLCMType not implemented between fixed and scalable vectors."); + + if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { + int GCDMinElts = std::gcd(OrigTy.getElementCount().getKnownMinValue(), + TargetTy.getElementCount().getKnownMinValue()); + // Prefer the original element type. + ElementCount Mul = OrigTy.getElementCount().multiplyCoefficientBy( + TargetTy.getElementCount().getKnownMinValue()); + return LLT::vector(Mul.divideCoefficientBy(GCDMinElts), + OrigTy.getElementType()); } - - unsigned LCMSize = std::lcm(OrigSize, TargetSize); - return LLT::fixed_vector(LCMSize / OrigElt.getSizeInBits(), OrigElt); + unsigned LCM = std::lcm(OrigTy.getSizeInBits().getKnownMinValue(), + TargetTy.getSizeInBits().getKnownMinValue()); + return LLT::vector( + ElementCount::get(LCM / OrigElt.getSizeInBits(), OrigTy.isScalable()), + OrigElt); } - if (TargetTy.isVector()) { - unsigned LCMSize = std::lcm(OrigSize, TargetSize); - return LLT::fixed_vector(LCMSize / OrigSize, OrigTy); + // One type is scalar, one type is vector + if (OrigTy.isVector() || TargetTy.isVector()) { + LLT VecTy = OrigTy.isVector() ? OrigTy : TargetTy; + LLT ScalarTy = OrigTy.isVector() ? TargetTy : OrigTy; + LLT EltTy = VecTy.getElementType(); + LLT OrigEltTy = OrigTy.isVector() ? OrigTy.getElementType() : OrigTy; + + // Prefer scalar type from OrigTy. + if (EltTy.getSizeInBits() == ScalarTy.getSizeInBits()) + return LLT::vector(VecTy.getElementCount(), OrigEltTy); + + // Different size scalars. Create vector with the same total size. + // LCM will take fixed/scalable from VecTy. + unsigned LCM = std::lcm(EltTy.getSizeInBits().getFixedValue() * + VecTy.getElementCount().getKnownMinValue(), + ScalarTy.getSizeInBits().getFixedValue()); + // Prefer type from OrigTy + return LLT::vector(ElementCount::get(LCM / OrigEltTy.getSizeInBits(), + VecTy.getElementCount().isScalable()), + OrigEltTy); } - unsigned LCMSize = std::lcm(OrigSize, TargetSize); - + // At this point, both types are scalars of different size + unsigned LCM = std::lcm(OrigTy.getSizeInBits().getFixedValue(), + TargetTy.getSizeInBits().getFixedValue()); // Preserve pointer types. - if (LCMSize == OrigSize) + if (LCM == OrigTy.getSizeInBits()) return OrigTy; - if (LCMSize == TargetSize) + if (LCM == TargetTy.getSizeInBits()) return TargetTy; - - return LLT::scalar(LCMSize); + return LLT::scalar(LCM); } LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) { + + if ((OrigTy.isScalableVector() && TargetTy.isFixedVector()) || + (OrigTy.isFixedVector() && TargetTy.isScalableVector())) + llvm_unreachable( + "getCoverTy not implemented between fixed and scalable vectors."); + if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy || (OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits())) return getLCMType(OrigTy, TargetTy); - unsigned OrigTyNumElts = OrigTy.getNumElements(); - unsigned TargetTyNumElts = TargetTy.getNumElements(); + unsigned OrigTyNumElts = OrigTy.getElementCount().getKnownMinValue(); + unsigned TargetTyNumElts = TargetTy.getElementCount().getKnownMinValue(); if (OrigTyNumElts % TargetTyNumElts == 0) return OrigTy; @@ -1132,45 +1251,56 @@ LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) { } LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { - const unsigned OrigSize = OrigTy.getSizeInBits(); - const unsigned TargetSize = TargetTy.getSizeInBits(); - - if (OrigSize == TargetSize) + if (OrigTy.getSizeInBits() == TargetTy.getSizeInBits()) return OrigTy; - if (OrigTy.isVector()) { + if (OrigTy.isVector() && TargetTy.isVector()) { LLT OrigElt = OrigTy.getElementType(); - if (TargetTy.isVector()) { - LLT TargetElt = TargetTy.getElementType(); - if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { - int GCD = std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements()); - return LLT::scalarOrVector(ElementCount::getFixed(GCD), OrigElt); - } - } else { - // If the source is a vector of pointers, return a pointer element. - if (OrigElt.getSizeInBits() == TargetSize) - return OrigElt; - } - unsigned GCD = std::gcd(OrigSize, TargetSize); + // TODO: The docstring for this function says the intention is to use this + // function to build MERGE/UNMERGE instructions. It won't be the case that + // we generate a MERGE/UNMERGE between fixed and scalable vector types. We + // could implement getGCDType between the two in the future if there was a + // need, but it is not worth it now as this function should not be used in + // that way. + assert(((OrigTy.isScalableVector() && !TargetTy.isFixedVector()) || + (OrigTy.isFixedVector() && !TargetTy.isScalableVector())) && + "getGCDType not implemented between fixed and scalable vectors."); + + unsigned GCD = std::gcd(OrigTy.getSizeInBits().getKnownMinValue(), + TargetTy.getSizeInBits().getKnownMinValue()); if (GCD == OrigElt.getSizeInBits()) - return OrigElt; + return LLT::scalarOrVector(ElementCount::get(1, OrigTy.isScalable()), + OrigElt); - // If we can't produce the original element type, we have to use a smaller - // scalar. + // Cannot produce original element type, but both have vscale in common. if (GCD < OrigElt.getSizeInBits()) - return LLT::scalar(GCD); - return LLT::fixed_vector(GCD / OrigElt.getSizeInBits(), OrigElt); - } + return LLT::scalarOrVector(ElementCount::get(1, OrigTy.isScalable()), + GCD); - if (TargetTy.isVector()) { - // Try to preserve the original element type. - LLT TargetElt = TargetTy.getElementType(); - if (TargetElt.getSizeInBits() == OrigSize) - return OrigTy; + return LLT::vector( + ElementCount::get(GCD / OrigElt.getSizeInBits().getFixedValue(), + OrigTy.isScalable()), + OrigElt); } - unsigned GCD = std::gcd(OrigSize, TargetSize); + // If one type is vector and the element size matches the scalar size, then + // the gcd is the scalar type. + if (OrigTy.isVector() && + OrigTy.getElementType().getSizeInBits() == TargetTy.getSizeInBits()) + return OrigTy.getElementType(); + if (TargetTy.isVector() && + TargetTy.getElementType().getSizeInBits() == OrigTy.getSizeInBits()) + return OrigTy; + + // At this point, both types are either scalars of different type or one is a + // vector and one is a scalar. If both types are scalars, the GCD type is the + // GCD between the two scalar sizes. If one is vector and one is scalar, then + // the GCD type is the GCD between the scalar and the vector element size. + LLT OrigScalar = OrigTy.getScalarType(); + LLT TargetScalar = TargetTy.getScalarType(); + unsigned GCD = std::gcd(OrigScalar.getSizeInBits().getFixedValue(), + TargetScalar.getSizeInBits().getFixedValue()); return LLT::scalar(GCD); } @@ -1556,3 +1686,294 @@ void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) { } } } + +bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) { + switch (Opc) { + case TargetOpcode::G_FABS: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FCOPYSIGN: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPOW: + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FREM: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_INTRINSIC_TRUNC: + return true; + default: + return false; + } +} + +/// Shifts return poison if shiftwidth is larger than the bitwidth. +static bool shiftAmountKnownInRange(Register ShiftAmount, + const MachineRegisterInfo &MRI) { + LLT Ty = MRI.getType(ShiftAmount); + + if (Ty.isScalableVector()) + return false; // Can't tell, just return false to be safe + + if (Ty.isScalar()) { + std::optional<ValueAndVReg> Val = + getIConstantVRegValWithLookThrough(ShiftAmount, MRI); + if (!Val) + return false; + return Val->Value.ult(Ty.getScalarSizeInBits()); + } + + GBuildVector *BV = getOpcodeDef<GBuildVector>(ShiftAmount, MRI); + if (!BV) + return false; + + unsigned Sources = BV->getNumSources(); + for (unsigned I = 0; I < Sources; ++I) { + std::optional<ValueAndVReg> Val = + getIConstantVRegValWithLookThrough(BV->getSourceReg(I), MRI); + if (!Val) + return false; + if (!Val->Value.ult(Ty.getScalarSizeInBits())) + return false; + } + + return true; +} + +namespace { +enum class UndefPoisonKind { + PoisonOnly = (1 << 0), + UndefOnly = (1 << 1), + UndefOrPoison = PoisonOnly | UndefOnly, +}; +} + +static bool includesPoison(UndefPoisonKind Kind) { + return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0; +} + +static bool includesUndef(UndefPoisonKind Kind) { + return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0; +} + +static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI, + bool ConsiderFlagsAndMetadata, + UndefPoisonKind Kind) { + MachineInstr *RegDef = MRI.getVRegDef(Reg); + + if (ConsiderFlagsAndMetadata && includesPoison(Kind)) + if (auto *GMI = dyn_cast<GenericMachineInstr>(RegDef)) + if (GMI->hasPoisonGeneratingFlags()) + return true; + + // Check whether opcode is a poison/undef-generating operation. + switch (RegDef->getOpcode()) { + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_CONSTANT_FOLD_BARRIER: + return false; + case TargetOpcode::G_SHL: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + return includesPoison(Kind) && + !shiftAmountKnownInRange(RegDef->getOperand(2).getReg(), MRI); + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + // fptosi/ui yields poison if the resulting value does not fit in the + // destination type. + return true; + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_ABS: + case TargetOpcode::G_CTPOP: + case TargetOpcode::G_BSWAP: + case TargetOpcode::G_BITREVERSE: + case TargetOpcode::G_FSHL: + case TargetOpcode::G_FSHR: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_UMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_PTRMASK: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_USUBO: + case TargetOpcode::G_SMULO: + case TargetOpcode::G_UMULO: + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_SSUBSAT: + case TargetOpcode::G_USUBSAT: + return false; + case TargetOpcode::G_SSHLSAT: + case TargetOpcode::G_USHLSAT: + return includesPoison(Kind) && + !shiftAmountKnownInRange(RegDef->getOperand(2).getReg(), MRI); + case TargetOpcode::G_INSERT_VECTOR_ELT: { + GInsertVectorElement *Insert = cast<GInsertVectorElement>(RegDef); + if (includesPoison(Kind)) { + std::optional<ValueAndVReg> Index = + getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI); + if (!Index) + return true; + LLT VecTy = MRI.getType(Insert->getVectorReg()); + return Index->Value.uge(VecTy.getElementCount().getKnownMinValue()); + } + return false; + } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + GExtractVectorElement *Extract = cast<GExtractVectorElement>(RegDef); + if (includesPoison(Kind)) { + std::optional<ValueAndVReg> Index = + getIConstantVRegValWithLookThrough(Extract->getIndexReg(), MRI); + if (!Index) + return true; + LLT VecTy = MRI.getType(Extract->getVectorReg()); + return Index->Value.uge(VecTy.getElementCount().getKnownMinValue()); + } + return false; + } + case TargetOpcode::G_SHUFFLE_VECTOR: { + GShuffleVector *Shuffle = cast<GShuffleVector>(RegDef); + ArrayRef<int> Mask = Shuffle->getMask(); + return includesPoison(Kind) && is_contained(Mask, -1); + } + case TargetOpcode::G_FNEG: + case TargetOpcode::G_PHI: + case TargetOpcode::G_SELECT: + case TargetOpcode::G_UREM: + case TargetOpcode::G_SREM: + case TargetOpcode::G_FREEZE: + case TargetOpcode::G_ICMP: + case TargetOpcode::G_FCMP: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FREM: + case TargetOpcode::G_PTR_ADD: + return false; + default: + return !isa<GCastOp>(RegDef) && !isa<GBinOp>(RegDef); + } +} + +static bool isGuaranteedNotToBeUndefOrPoison(Register Reg, + const MachineRegisterInfo &MRI, + unsigned Depth, + UndefPoisonKind Kind) { + if (Depth >= MaxAnalysisRecursionDepth) + return false; + + MachineInstr *RegDef = MRI.getVRegDef(Reg); + + switch (RegDef->getOpcode()) { + case TargetOpcode::G_FREEZE: + return true; + case TargetOpcode::G_IMPLICIT_DEF: + return !includesUndef(Kind); + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: + return true; + case TargetOpcode::G_BUILD_VECTOR: { + GBuildVector *BV = cast<GBuildVector>(RegDef); + unsigned NumSources = BV->getNumSources(); + for (unsigned I = 0; I < NumSources; ++I) + if (!::isGuaranteedNotToBeUndefOrPoison(BV->getSourceReg(I), MRI, + Depth + 1, Kind)) + return false; + return true; + } + case TargetOpcode::G_PHI: { + GPhi *Phi = cast<GPhi>(RegDef); + unsigned NumIncoming = Phi->getNumIncomingValues(); + for (unsigned I = 0; I < NumIncoming; ++I) + if (!::isGuaranteedNotToBeUndefOrPoison(Phi->getIncomingValue(I), MRI, + Depth + 1, Kind)) + return false; + return true; + } + default: { + auto MOCheck = [&](const MachineOperand &MO) { + if (!MO.isReg()) + return true; + return ::isGuaranteedNotToBeUndefOrPoison(MO.getReg(), MRI, Depth + 1, + Kind); + }; + return !::canCreateUndefOrPoison(Reg, MRI, + /*ConsiderFlagsAndMetadata=*/true, Kind) && + all_of(RegDef->uses(), MOCheck); + } + } +} + +bool llvm::canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI, + bool ConsiderFlagsAndMetadata) { + return ::canCreateUndefOrPoison(Reg, MRI, ConsiderFlagsAndMetadata, + UndefPoisonKind::UndefOrPoison); +} + +bool canCreatePoison(Register Reg, const MachineRegisterInfo &MRI, + bool ConsiderFlagsAndMetadata = true) { + return ::canCreateUndefOrPoison(Reg, MRI, ConsiderFlagsAndMetadata, + UndefPoisonKind::PoisonOnly); +} + +bool llvm::isGuaranteedNotToBeUndefOrPoison(Register Reg, + const MachineRegisterInfo &MRI, + unsigned Depth) { + return ::isGuaranteedNotToBeUndefOrPoison(Reg, MRI, Depth, + UndefPoisonKind::UndefOrPoison); +} + +bool llvm::isGuaranteedNotToBePoison(Register Reg, + const MachineRegisterInfo &MRI, + unsigned Depth) { + return ::isGuaranteedNotToBeUndefOrPoison(Reg, MRI, Depth, + UndefPoisonKind::PoisonOnly); +} + +bool llvm::isGuaranteedNotToBeUndef(Register Reg, + const MachineRegisterInfo &MRI, + unsigned Depth) { + return ::isGuaranteedNotToBeUndefOrPoison(Reg, MRI, Depth, + UndefPoisonKind::UndefOnly); +} + +Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) { + if (Ty.isVector()) + return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), + Ty.getElementCount()); + return IntegerType::get(C, Ty.getSizeInBits()); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp index a2b5cbf7bad9..65bf7161441b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp @@ -63,6 +63,7 @@ #include "llvm/CodeGen/GlobalMerge.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -134,6 +135,12 @@ static cl::opt<cl::boolOrDefault> EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage")); +static cl::opt<unsigned> + GlobalMergeMinDataSize("global-merge-min-data-size", + cl::desc("The minimum size in bytes of each global " + "that should considered in merging."), + cl::init(0), cl::Hidden); + STATISTIC(NumMerged, "Number of globals merged"); namespace { @@ -198,6 +205,19 @@ public: } bool doInitialization(Module &M) override { + auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> { + Metadata *SDL = M.getModuleFlag("SmallDataLimit"); + if (!SDL) + return std::nullopt; + return mdconst::extract<ConstantInt>(SDL)->getZExtValue(); + }; + if (GlobalMergeMinDataSize.getNumOccurrences()) + Opt.MinSize = GlobalMergeMinDataSize; + else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0) + Opt.MinSize = *SDL + 1; + else + Opt.MinSize = 0; + GlobalMergeImpl P(TM, Opt); return P.run(M); } @@ -309,10 +329,9 @@ bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals, for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) { GlobalVariable *GV = Globals[GI]; - // Reset the encountered sets for this global... - std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0); - // ...and grow it in case we created new sets for the previous global. - EncounteredUGS.resize(UsedGlobalSets.size()); + // Reset the encountered sets for this global and grow it in case we created + // new sets for the previous global. + EncounteredUGS.assign(UsedGlobalSets.size(), 0); // We might need to create a set that only consists of the current global. // Keep track of its index into UsedGlobalSets. @@ -623,7 +642,7 @@ bool GlobalMergeImpl::run(Module &M) { IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO(); auto &DL = M.getDataLayout(); - DenseMap<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 16>> + MapVector<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 0>> Globals, ConstGlobals, BSSGlobals; bool Changed = false; setMustKeepGlobalVariables(M); @@ -641,7 +660,7 @@ bool GlobalMergeImpl::run(Module &M) { continue; // It's not safe to merge globals that may be preempted - if (TM && !TM->shouldAssumeDSOLocal(M, &GV)) + if (TM && !TM->shouldAssumeDSOLocal(&GV)) continue; if (!(Opt.MergeExternal && GV.hasExternalLinkage()) && @@ -671,7 +690,8 @@ bool GlobalMergeImpl::run(Module &M) { continue; Type *Ty = GV.getValueType(); - if (DL.getTypeAllocSize(Ty) < Opt.MaxOffset) { + TypeSize AllocSize = DL.getTypeAllocSize(Ty); + if (AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize) { if (TM && TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSS()) BSSGlobals[{AddressSpace, Section}].push_back(&GV); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp index e7b14d700a44..9205eabcf568 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp @@ -240,7 +240,7 @@ bool HardwareLoopsLegacy::runOnFunction(Function &F) { auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - auto &DL = F.getParent()->getDataLayout(); + auto &DL = F.getDataLayout(); auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; @@ -275,7 +275,7 @@ PreservedAnalyses HardwareLoopsPass::run(Function &F, auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - auto &DL = F.getParent()->getDataLayout(); + auto &DL = F.getDataLayout(); HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts); bool Changed = Impl.run(F); @@ -291,7 +291,7 @@ PreservedAnalyses HardwareLoopsPass::run(Function &F, } bool HardwareLoopsImpl::run(Function &F) { - LLVMContext &Ctx = F.getParent()->getContext(); + LLVMContext &Ctx = F.getContext(); for (Loop *L : LI) if (L->isOutermost()) TryConvertLoop(L, Ctx); @@ -503,6 +503,8 @@ Value *HardwareLoop::InitLoopCount() { Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { IRBuilder<> Builder(BeginBB->getTerminator()); + if (BeginBB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP)) + Builder.setIsFPConstrained(true); Type *Ty = LoopCountInit->getType(); bool UsePhi = UsePHICounter || Opts.ForcePhi; Intrinsic::ID ID = UseLoopGuard @@ -535,6 +537,9 @@ Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { void HardwareLoop::InsertLoopDec() { IRBuilder<> CondBuilder(ExitBranch); + if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr( + Attribute::StrictFP)) + CondBuilder.setIsFPConstrained(true); Function *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::loop_decrement, @@ -557,6 +562,9 @@ void HardwareLoop::InsertLoopDec() { Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) { IRBuilder<> CondBuilder(ExitBranch); + if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr( + Attribute::StrictFP)) + CondBuilder.setIsFPConstrained(true); Function *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg, @@ -572,7 +580,7 @@ PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = ExitBranch->getParent(); - IRBuilder<> Builder(Header->getFirstNonPHI()); + IRBuilder<> Builder(Header, Header->getFirstNonPHIIt()); PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2); Index->addIncoming(NumElts, Preheader); Index->addIncoming(EltsRem, Latch); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp index e8e276a8558d..f3789569b78f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp @@ -209,8 +209,8 @@ namespace { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -432,7 +432,7 @@ char IfConverter::ID = 0; char &llvm::IfConverterID = IfConverter::ID; INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false) @@ -444,8 +444,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = ST.getTargetLowering(); TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); - MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>()); - MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBFIWrapper MBFI( + getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()); + MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); ProfileSummaryInfo *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); MRI = &MF.getRegInfo(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp index f7b931a3bdac..05a7387b1232 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp @@ -100,7 +100,7 @@ FunctionPass *llvm::createIndirectBrExpandPass() { } bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) { - auto &DL = F.getParent()->getDataLayout(); + auto &DL = F.getDataLayout(); SmallVector<IndirectBrInst *, 1> IndirectBrs; @@ -113,7 +113,7 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) { // Handle the degenerate case of no successors by replacing the indirectbr // with unreachable as there is no successor available. if (IBr->getNumSuccessors() == 0) { - (void)new UnreachableInst(F.getContext(), IBr); + (void)new UnreachableInst(F.getContext(), IBr->getIterator()); IBr->eraseFromParent(); continue; } @@ -183,7 +183,7 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) { for (BasicBlock *SuccBB : IBr->successors()) Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB}); } - (void)new UnreachableInst(F.getContext(), IBr); + (void)new UnreachableInst(F.getContext(), IBr->getIterator()); IBr->eraseFromParent(); } if (DTU) { @@ -207,9 +207,10 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) { } auto GetSwitchValue = [CommonITy](IndirectBrInst *IBr) { - return CastInst::CreatePointerCast( - IBr->getAddress(), CommonITy, - Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr); + return CastInst::CreatePointerCast(IBr->getAddress(), CommonITy, + Twine(IBr->getAddress()->getName()) + + ".switch_cast", + IBr->getIterator()); }; SmallVector<DominatorTree::UpdateType, 8> Updates; @@ -243,7 +244,7 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) { Updates.reserve(IndirectBrs.size() + 2 * IndirectBrSuccs.size()); for (auto *IBr : IndirectBrs) { SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent()); - BranchInst::Create(SwitchBB, IBr); + BranchInst::Create(SwitchBB, IBr->getIterator()); if (DTU) { Updates.push_back({DominatorTree::Insert, IBr->getParent(), SwitchBB}); for (BasicBlock *SuccBB : IBr->successors()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InitUndef.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InitUndef.cpp new file mode 100644 index 000000000000..51c50ff872ef --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/InitUndef.cpp @@ -0,0 +1,277 @@ +//===- InitUndef.cpp - Initialize undef value to pseudo ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function pass that initializes undef value to +// temporary pseudo instruction to prevent register allocation resulting in a +// constraint violated result for the particular instruction. It also rewrites +// the NoReg tied operand back to an IMPLICIT_DEF. +// +// Certain instructions have register overlapping constraints, and +// will cause illegal instruction trap if violated, we use early clobber to +// model this constraint, but it can't prevent register allocator allocating +// same or overlapped if the input register is undef value, so convert +// IMPLICIT_DEF to temporary pseudo instruction and remove it later could +// prevent that happen, it's not best way to resolve this, and it might +// change the order of program or increase the register pressure, so ideally we +// should model the constraint right, but before we model the constraint right, +// it's the only way to prevent that happen. +// +// When we enable the subregister liveness option, it will also trigger the same +// issue due to the partial of register is undef. If we pseudoinit the whole +// register, then it will generate redundant COPY instruction. Currently, it +// will generate INSERT_SUBREG to make sure the whole register is occupied +// when program encounter operation that has early-clobber constraint. +// +// +// See also: https://github.com/llvm/llvm-project/issues/50157 +// +// Additionally, this pass rewrites tied operands of instructions +// from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of +// operands to the above.) We use NoReg to side step a MachineCSE +// optimization quality problem but need to convert back before +// TwoAddressInstruction. See pr64282 for context. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/DetectDeadLanes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCRegister.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "init-undef" +#define INIT_UNDEF_NAME "Init Undef Pass" + +namespace { + +class InitUndef : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + const TargetSubtargetInfo *ST; + const TargetRegisterInfo *TRI; + + // Newly added vregs, assumed to be fully rewritten + SmallSet<Register, 8> NewRegs; + SmallVector<MachineInstr *, 8> DeadInsts; + +public: + static char ID; + + InitUndef() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return INIT_UNDEF_NAME; } + +private: + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, + const DeadLaneDetector &DLD); + bool handleSubReg(MachineFunction &MF, MachineInstr &MI, + const DeadLaneDetector &DLD); + bool fixupIllOperand(MachineInstr *MI, MachineOperand &MO); + bool handleReg(MachineInstr *MI); +}; + +} // end anonymous namespace + +char InitUndef::ID = 0; +INITIALIZE_PASS(InitUndef, DEBUG_TYPE, INIT_UNDEF_NAME, false, false) +char &llvm::InitUndefID = InitUndef::ID; + +static bool isEarlyClobberMI(MachineInstr &MI) { + return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) { + return DefMO.isReg() && DefMO.isEarlyClobber(); + }); +} + +static bool findImplictDefMIFromReg(Register Reg, MachineRegisterInfo *MRI) { + for (auto &DefMI : MRI->def_instructions(Reg)) { + if (DefMI.getOpcode() == TargetOpcode::IMPLICIT_DEF) + return true; + } + return false; +} + +bool InitUndef::handleReg(MachineInstr *MI) { + bool Changed = false; + for (auto &UseMO : MI->uses()) { + if (!UseMO.isReg()) + continue; + if (UseMO.isTied()) + continue; + if (!UseMO.getReg().isVirtual()) + continue; + if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg()))) + continue; + + if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI)) + Changed |= fixupIllOperand(MI, UseMO); + } + return Changed; +} + +bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, + const DeadLaneDetector &DLD) { + bool Changed = false; + + for (MachineOperand &UseMO : MI.uses()) { + if (!UseMO.isReg()) + continue; + if (!UseMO.getReg().isVirtual()) + continue; + if (UseMO.isTied()) + continue; + if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg()))) + continue; + + Register Reg = UseMO.getReg(); + if (NewRegs.count(Reg)) + continue; + DeadLaneDetector::VRegInfo Info = + DLD.getVRegInfo(Register::virtReg2Index(Reg)); + + if (Info.UsedLanes == Info.DefinedLanes) + continue; + + const TargetRegisterClass *TargetRegClass = + TRI->getLargestSuperClass(MRI->getRegClass(Reg)); + + LaneBitmask NeedDef = Info.UsedLanes & ~Info.DefinedLanes; + + LLVM_DEBUG({ + dbgs() << "Instruction has undef subregister.\n"; + dbgs() << printReg(Reg, nullptr) + << " Used: " << PrintLaneMask(Info.UsedLanes) + << " Def: " << PrintLaneMask(Info.DefinedLanes) + << " Need Def: " << PrintLaneMask(NeedDef) << "\n"; + }); + + SmallVector<unsigned> SubRegIndexNeedInsert; + TRI->getCoveringSubRegIndexes(*MRI, TargetRegClass, NeedDef, + SubRegIndexNeedInsert); + + Register LatestReg = Reg; + for (auto ind : SubRegIndexNeedInsert) { + Changed = true; + const TargetRegisterClass *SubRegClass = TRI->getLargestSuperClass( + TRI->getSubRegisterClass(TargetRegClass, ind)); + Register TmpInitSubReg = MRI->createVirtualRegister(SubRegClass); + LLVM_DEBUG(dbgs() << "Register Class ID" << SubRegClass->getID() << "\n"); + BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), + TII->get(TII->getUndefInitOpcode(SubRegClass->getID())), + TmpInitSubReg); + Register NewReg = MRI->createVirtualRegister(TargetRegClass); + BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), + TII->get(TargetOpcode::INSERT_SUBREG), NewReg) + .addReg(LatestReg) + .addReg(TmpInitSubReg) + .addImm(ind); + LatestReg = NewReg; + } + + UseMO.setReg(LatestReg); + } + + return Changed; +} + +bool InitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) { + + LLVM_DEBUG( + dbgs() << "Emitting PseudoInitUndef Instruction for implicit register " + << MO.getReg() << '\n'); + + const TargetRegisterClass *TargetRegClass = + TRI->getLargestSuperClass(MRI->getRegClass(MO.getReg())); + LLVM_DEBUG(dbgs() << "Register Class ID" << TargetRegClass->getID() << "\n"); + unsigned Opcode = TII->getUndefInitOpcode(TargetRegClass->getID()); + Register NewReg = MRI->createVirtualRegister(TargetRegClass); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(Opcode), NewReg); + MO.setReg(NewReg); + if (MO.isUndef()) + MO.setIsUndef(false); + return true; +} + +bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, + const DeadLaneDetector &DLD) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { + MachineInstr &MI = *I; + + // If we used NoReg to represent the passthru, switch this back to being + // an IMPLICIT_DEF before TwoAddressInstructions. + unsigned UseOpIdx; + if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { + MachineOperand &UseMO = MI.getOperand(UseOpIdx); + if (UseMO.getReg() == MCRegister::NoRegister) { + const TargetRegisterClass *RC = + TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF); + Register NewDest = MRI->createVirtualRegister(RC); + // We don't have a way to update dead lanes, so keep track of the + // new register so that we avoid querying it later. + NewRegs.insert(NewDest); + BuildMI(MBB, I, I->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), + NewDest); + UseMO.setReg(NewDest); + Changed = true; + } + } + + if (isEarlyClobberMI(MI)) { + if (MRI->subRegLivenessEnabled()) + Changed |= handleSubReg(MF, MI, DLD); + Changed |= handleReg(&MI); + } + } + return Changed; +} + +bool InitUndef::runOnMachineFunction(MachineFunction &MF) { + ST = &MF.getSubtarget(); + + // supportsInitUndef is implemented to reflect if an architecture has support + // for the InitUndef pass. Support comes from having the relevant Pseudo + // instructions that can be used to initialize the register. The function + // returns false by default so requires an implementation per architecture. + // Support can be added by overriding the function in a way that best fits + // the architecture. + if (!ST->supportsInitUndef()) + return false; + + MRI = &MF.getRegInfo(); + TII = ST->getInstrInfo(); + TRI = MRI->getTargetRegisterInfo(); + + bool Changed = false; + DeadLaneDetector DLD(MRI, TRI); + DLD.computeSubRegisterLaneBitInfo(); + + for (MachineBasicBlock &BB : MF) + Changed |= processBasicBlock(MF, BB, DLD); + + for (auto *DeadMI : DeadInsts) + DeadMI->eraseFromParent(); + DeadInsts.clear(); + + return Changed; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp index c46b1fe18ca7..81ae805d64e1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp @@ -70,8 +70,6 @@ STATISTIC(NumFolded, "Number of folded stack accesses"); STATISTIC(NumFoldedLoads, "Number of folded loads"); STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); -static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, - cl::desc("Disable inline spill hoisting")); static cl::opt<bool> RestrictStatepointRemat("restrict-statepoint-remat", cl::init(false), cl::Hidden, @@ -133,12 +131,13 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { public: HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), + : MF(mf), LIS(pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS()), LSS(pass.getAnalysis<LiveStacks>()), - MDT(pass.getAnalysis<MachineDominatorTree>()), VRM(vrm), - MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), + MDT(pass.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()), + VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), TRI(*mf.getSubtarget().getRegisterInfo()), - MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), + MBFI( + pass.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()), IPA(LIS, mf.getNumBlockIDs()) {} void addToMergeableSpills(MachineInstr &Spill, int StackSlot, @@ -190,12 +189,13 @@ class InlineSpiller : public Spiller { public: InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI) - : MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()), + : MF(MF), LIS(Pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS()), LSS(Pass.getAnalysis<LiveStacks>()), - MDT(Pass.getAnalysis<MachineDominatorTree>()), VRM(VRM), - MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), + MDT(Pass.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()), + VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), TRI(*MF.getSubtarget().getRegisterInfo()), - MBFI(Pass.getAnalysis<MachineBlockFrequencyInfo>()), + MBFI( + Pass.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()), HSpiller(Pass, MF, VRM), VRAI(VRAI) {} void spill(LiveRangeEdit &) override; @@ -869,7 +869,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, // destination that is marked as an early clobber, print the // early-clobber slot index. if (VReg) { - MachineOperand *MO = I->findRegisterDefOperand(VReg); + MachineOperand *MO = I->findRegisterDefOperand(VReg, /*TRI=*/nullptr); if (MO && MO->isEarlyClobber()) Idx = Idx.getRegSlot(true); } @@ -1381,7 +1381,7 @@ void HoistSpillHelper::rmRedundantSpills( // earlier spill with smaller SlotIndex. for (auto *const CurrentSpill : Spills) { MachineBasicBlock *Block = CurrentSpill->getParent(); - MachineDomTreeNode *Node = MDT.getBase().getNode(Block); + MachineDomTreeNode *Node = MDT.getNode(Block); MachineInstr *PrevSpill = SpillBBToSpill[Node]; if (PrevSpill) { SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill); @@ -1389,9 +1389,9 @@ void HoistSpillHelper::rmRedundantSpills( MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill; MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill; SpillsToRm.push_back(SpillToRm); - SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep; + SpillBBToSpill[MDT.getNode(Block)] = SpillToKeep; } else { - SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill; + SpillBBToSpill[MDT.getNode(Block)] = CurrentSpill; } } for (auto *const SpillToRm : SpillsToRm) @@ -1465,7 +1465,7 @@ void HoistSpillHelper::getVisitOrders( // Sort the nodes in WorkSet in top-down order and save the nodes // in Orders. Orders will be used for hoisting in runHoistSpills. unsigned idx = 0; - Orders.push_back(MDT.getBase().getNode(Root)); + Orders.push_back(MDT.getNode(Root)); do { MachineDomTreeNode *Node = Orders[idx++]; for (MachineDomTreeNode *Child : Node->children()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp index ae197ee5553a..fb76f44c2501 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp @@ -136,14 +136,12 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { // Use advanceTo only when possible. if (PrevPos != Start) { if (!PrevPos.isValid() || Start < PrevPos) { - for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { - RegUnitInfo &RUI = RegUnits[i]; + for (RegUnitInfo &RUI : RegUnits) { RUI.VirtI.find(Start); RUI.FixedI = RUI.Fixed->find(Start); } } else { - for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { - RegUnitInfo &RUI = RegUnits[i]; + for (RegUnitInfo &RUI : RegUnits) { RUI.VirtI.advanceTo(Start); if (RUI.FixedI != RUI.Fixed->end()) RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start); @@ -162,8 +160,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI->First = BI->Last = SlotIndex(); // Check for first interference from virtregs. - for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { - LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; + for (RegUnitInfo &RUI : RegUnits) { + LiveIntervalUnion::SegmentIter &I = RUI.VirtI; if (!I.valid()) continue; SlotIndex StartI = I.start(); @@ -174,9 +172,9 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { } // Same thing for fixed interference. - for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { - LiveInterval::const_iterator I = RegUnits[i].FixedI; - LiveInterval::const_iterator E = RegUnits[i].Fixed->end(); + for (RegUnitInfo &RUI : RegUnits) { + LiveInterval::const_iterator I = RUI.FixedI; + LiveInterval::const_iterator E = RUI.Fixed->end(); if (I == E) continue; SlotIndex StartI = I->start; @@ -213,8 +211,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { } // Check for last interference in block. - for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { - LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; + for (RegUnitInfo &RUI : RegUnits) { + LiveIntervalUnion::SegmentIter &I = RUI.VirtI; if (!I.valid() || I.start() >= Stop) continue; I.advanceTo(Stop); @@ -229,9 +227,9 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { } // Fixed interference. - for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { - LiveInterval::iterator &I = RegUnits[i].FixedI; - LiveRange *LR = RegUnits[i].Fixed; + for (RegUnitInfo &RUI : RegUnits) { + LiveInterval::iterator &I = RUI.FixedI; + LiveRange *LR = RUI.Fixed; if (I == LR->end() || I->start >= Stop) continue; I = LR->advanceTo(I, Stop); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 2a0daf404c97..8c9065aec7fa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -200,28 +200,6 @@ FunctionPass *llvm::createInterleavedAccessPass() { return new InterleavedAccess(); } -/// Check if the mask is a DE-interleave mask of the given factor -/// \p Factor like: -/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor> -static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor, - unsigned &Index) { - // Check all potential start indices from 0 to (Factor - 1). - for (Index = 0; Index < Factor; Index++) { - unsigned i = 0; - - // Check that elements are in ascending order by Factor. Ignore undef - // elements. - for (; i < Mask.size(); i++) - if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor) - break; - - if (i == Mask.size()) - return true; - } - - return false; -} - /// Check if the mask is a DE-interleave mask for an interleaved load. /// /// E.g. DE-interleave masks (Factor = 2) could be: @@ -238,7 +216,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, // Make sure we don't produce a load wider than the input load. if (Mask.size() * Factor > NumLoadElements) return false; - if (isDeInterleaveMaskOfFactor(Mask, Factor, Index)) + if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index)) return true; } @@ -333,8 +311,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( for (auto *Shuffle : Shuffles) { if (Shuffle->getType() != VecTy) return false; - if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor, - Index)) + if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor( + Shuffle->getShuffleMask(), Factor, Index)) return false; assert(Shuffle->getShuffleMask().size() <= NumLoadElements); @@ -343,8 +321,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( for (auto *Shuffle : BinOpShuffles) { if (Shuffle->getType() != VecTy) return false; - if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor, - Index)) + if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor( + Shuffle->getShuffleMask(), Factor, Index)) return false; assert(Shuffle->getShuffleMask().size() <= NumLoadElements); @@ -388,14 +366,15 @@ bool InterleavedAccessImpl::replaceBinOpShuffles( return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements(); })); + BasicBlock::iterator insertPos = SVI->getIterator(); auto *NewSVI1 = new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty), - Mask, SVI->getName(), SVI); + Mask, SVI->getName(), insertPos); auto *NewSVI2 = new ShuffleVectorInst( BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask, - SVI->getName(), SVI); + SVI->getName(), insertPos); BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags( - BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI); + BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), insertPos); SVI->replaceAllUsesWith(NewBI); LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI << "\n With : " << *NewSVI1 << "\n And : " @@ -556,9 +535,9 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) { if (auto *II = dyn_cast<IntrinsicInst>(&I)) { // At present, we only have intrinsics to represent (de)interleaving // with a factor of 2. - if (II->getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2) + if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2) Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts); - if (II->getIntrinsicID() == Intrinsic::experimental_vector_interleave2) + if (II->getIntrinsicID() == Intrinsic::vector_interleave2) Changed |= lowerInterleaveIntrinsic(II, DeadInsts); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index bbb0b654dc67..10208bb91799 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -64,10 +64,10 @@ struct VectorInfo; struct InterleavedLoadCombineImpl { public: InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA, + const TargetTransformInfo &TTI, const TargetMachine &TM) : F(F), DT(DT), MSSA(MSSA), - TLI(*TM.getSubtargetImpl(F)->getTargetLowering()), - TTI(TM.getTargetTransformInfo(F)) {} + TLI(*TM.getSubtargetImpl(F)->getTargetLowering()), TTI(TTI) {} /// Scan the function for interleaved load candidates and execute the /// replacement if applicable. @@ -87,7 +87,7 @@ private: const TargetLowering &TLI; /// Target Transform Information - const TargetTransformInfo TTI; + const TargetTransformInfo &TTI; /// Find the instruction in sets LIs that dominates all others, return nullptr /// if there is none. @@ -893,7 +893,7 @@ public: ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0), ConstantInt::get(Type::getInt32Ty(LI->getContext()), i), }; - int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, ArrayRef(Idx, 2)); + int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, Idx); Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr); } @@ -1256,7 +1256,7 @@ bool InterleavedLoadCombineImpl::run() { bool changed = false; unsigned MaxFactor = TLI.getMaxSupportedInterleaveFactor(); - auto &DL = F.getParent()->getDataLayout(); + auto &DL = F.getDataLayout(); // Start with the highest factor to avoid combining and recombining. for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) { @@ -1329,6 +1329,7 @@ struct InterleavedLoadCombine : public FunctionPass { return InterleavedLoadCombineImpl( F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(), getAnalysis<MemorySSAWrapperPass>().getMSSA(), + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), TPC->getTM<TargetMachine>()) .run(); } @@ -1336,6 +1337,7 @@ struct InterleavedLoadCombine : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MemorySSAWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); FunctionPass::getAnalysisUsage(AU); } @@ -1348,7 +1350,8 @@ InterleavedLoadCombinePass::run(Function &F, FunctionAnalysisManager &FAM) { auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); auto &MemSSA = FAM.getResult<MemorySSAAnalysis>(F).getMSSA(); - bool Changed = InterleavedLoadCombineImpl(F, DT, MemSSA, *TM).run(); + auto &TTI = FAM.getResult<TargetIRAnalysis>(F); + bool Changed = InterleavedLoadCombineImpl(F, DT, MemSSA, TTI, *TM).run(); return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } @@ -1360,6 +1363,7 @@ INITIALIZE_PASS_BEGIN( false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END( InterleavedLoadCombine, DEBUG_TYPE, "Combine interleaved loads into wide loads and shufflevector instructions", diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp index 61920a0e04ab..45fba4341ad0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -243,6 +243,11 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } + case Intrinsic::allow_runtime_check: + case Intrinsic::allow_ubsan_check: + CI->replaceAllUsesWith(ConstantInt::getTrue(CI->getType())); + return; + case Intrinsic::ctpop: CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI)); break; @@ -312,6 +317,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); break; } + case Intrinsic::readsteadycounter: { + errs() << "WARNING: this target does not support the llvm.readsteadycounter" + << " intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); + break; + } case Intrinsic::dbg_declare: case Intrinsic::dbg_label: @@ -466,7 +477,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { Function *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); Value *Op = CI->getArgOperand(0); - Op = CallInst::Create(Int, Op, CI->getName(), CI); + Op = CallInst::Create(Int, Op, CI->getName(), CI->getIterator()); CI->replaceAllUsesWith(Op); CI->eraseFromParent(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp index 62a381918875..e2aaebedf5a4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp @@ -227,7 +227,7 @@ bool runImpl(Module &M) { // FIXME: it would be nice to make CI scheduling boundary, although in // practice it does not matter much. auto *CI = CallInst::Create(getCheckFunctionType(Ctx), CheckFunction, - {Flag}, "", &*F.begin()->getFirstInsertionPt()); + {Flag}, "", F.begin()->getFirstInsertionPt()); CI->addParamAttr(0, Attribute::NoUndef); if (UseX86FastCall) { CI->setCallingConv(CallingConv::X86_FastCall); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp b/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp index bffa02ca8afd..af19319bc1bb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -88,7 +89,7 @@ bool KCFI::emitCheck(MachineBasicBlock &MBB, } bool KCFI::runOnMachineFunction(MachineFunction &MF) { - const Module *M = MF.getMMI().getModule(); + const Module *M = MF.getFunction().getParent(); if (!M->getModuleFlag("kcfi")) return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 42cabb58e518..d0dfafeaef56 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -77,10 +77,6 @@ void LLVMTargetMachine::initAsmInfo() { TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments); - TmpAsmInfo->setCompressDebugSections(Options.CompressDebugSections); - - TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations); - TmpAsmInfo->setFullRegisterNames(Options.MCOptions.PPCUseFullRegisterNames); if (Options.ExceptionModel != ExceptionHandling::None) @@ -154,9 +150,6 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer( raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context) { - if (Options.MCOptions.MCSaveTempLabels) - Context.setAllowTemporaryLabels(false); - const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCAsmInfo &MAI = *getMCAsmInfo(); const MCRegisterInfo &MRI = *getMCRegisterInfo(); @@ -174,26 +167,11 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer( if (Options.MCOptions.ShowMCEncoding) MCE.reset(getTarget().createMCCodeEmitter(MII, Context)); - bool UseDwarfDirectory = false; - switch (Options.MCOptions.MCUseDwarfDirectory) { - case MCTargetOptions::DisableDwarfDirectory: - UseDwarfDirectory = false; - break; - case MCTargetOptions::EnableDwarfDirectory: - UseDwarfDirectory = true; - break; - case MCTargetOptions::DefaultDwarfDirectory: - UseDwarfDirectory = MAI.enableDwarfFileDirectoryDefault(); - break; - } - std::unique_ptr<MCAsmBackend> MAB( getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); auto FOut = std::make_unique<formatted_raw_ostream>(Out); MCStreamer *S = getTarget().createAsmStreamer( - Context, std::move(FOut), Options.MCOptions.AsmVerbose, - UseDwarfDirectory, InstPrinter, std::move(MCE), std::move(MAB), - Options.MCOptions.ShowMCInst); + Context, std::move(FOut), InstPrinter, std::move(MCE), std::move(MAB)); AsmStreamer.reset(S); break; } @@ -215,9 +193,7 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer( T, Context, std::unique_ptr<MCAsmBackend>(MAB), DwoOut ? MAB->createDwoObjectWriter(Out, *DwoOut) : MAB->createObjectWriter(Out), - std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll, - Options.MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); + std::unique_ptr<MCCodeEmitter>(MCE), STI)); break; } case CodeGenFileType::Null: @@ -276,8 +252,6 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, // libunwind is unable to load compact unwind dynamically, so we must generate // DWARF unwind info for the JIT. Options.MCOptions.EmitDwarfUnwind = EmitDwarfUnwindType::Always; - if (Options.MCOptions.MCSaveTempLabels) - Ctx->setAllowTemporaryLabels(false); // Create the code emitter for the target if it exists. If not, .o file // emission fails. @@ -285,17 +259,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, const MCRegisterInfo &MRI = *getMCRegisterInfo(); std::unique_ptr<MCCodeEmitter> MCE( getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx)); - std::unique_ptr<MCAsmBackend> MAB( - getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); + MCAsmBackend *MAB = + getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); if (!MCE || !MAB) return true; const Triple &T = getTargetTriple(); std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( - T, *Ctx, std::move(MAB), MAB->createObjectWriter(Out), std::move(MCE), - STI, Options.MCOptions.MCRelaxAll, - Options.MCOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ true)); + T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(Out), + std::move(MCE), STI)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 39b44b917d9e..2561f2e5c9bb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -23,8 +23,8 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE, "Lazy Machine Block Frequency Analysis", true, true) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_END(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE, "Lazy Machine Block Frequency Analysis", true, true) @@ -36,14 +36,9 @@ LazyMachineBlockFrequencyInfoPass::LazyMachineBlockFrequencyInfoPass() *PassRegistry::getPassRegistry()); } -void LazyMachineBlockFrequencyInfoPass::print(raw_ostream &OS, - const Module *M) const { - getBFI().print(OS, M); -} - void LazyMachineBlockFrequencyInfoPass::getAnalysisUsage( AnalysisUsage &AU) const { - AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -56,15 +51,18 @@ void LazyMachineBlockFrequencyInfoPass::releaseMemory() { MachineBlockFrequencyInfo & LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const { - auto *MBFI = getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); - if (MBFI) { + auto *MBFIWrapper = + getAnalysisIfAvailable<MachineBlockFrequencyInfoWrapperPass>(); + if (MBFIWrapper) { LLVM_DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n"); - return *MBFI; + return MBFIWrapper->getMBFI(); } - auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>(); - auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); - auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + auto &MBPI = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); + auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>(); + auto *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr; + auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); + auto *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; LLVM_DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n"); LLVM_DEBUG(if (MLI) dbgs() << "LoopInfo is available\n"); @@ -82,7 +80,7 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const { // Generate LoopInfo from it. OwnedMLI = std::make_unique<MachineLoopInfo>(); - OwnedMLI->getBase().analyze(MDT->getBase()); + OwnedMLI->analyze(MDT->getBase()); MLI = OwnedMLI.get(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp index 47c19c3d8ec4..6dbd2ca00f31 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp @@ -340,8 +340,8 @@ LLVM_DUMP_METHOD void LexicalScope::dump(unsigned Indent) const { if (!Children.empty()) err << std::string(Indent + 2, ' ') << "Children ...\n"; - for (unsigned i = 0, e = Children.size(); i != e; ++i) - if (Children[i] != this) - Children[i]->dump(Indent + 2); + for (const LexicalScope *Child : Children) + if (Child != this) + Child->dump(Indent + 2); } #endif diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index cfc8c28b99e5..0a6ce6a13581 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -183,6 +183,7 @@ public: /// information from it. (XXX make it const?) MLocTracker *MTracker; MachineFunction &MF; + const DebugVariableMap &DVMap; bool ShouldEmitDebugEntryValues; /// Record of all changes in variable locations at a block position. Awkwardly @@ -191,7 +192,9 @@ public: struct Transfer { MachineBasicBlock::instr_iterator Pos; /// Position to insert DBG_VALUes MachineBasicBlock *MBB; /// non-null if we should insert after. - SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert. + /// Vector of DBG_VALUEs to insert. Store with their DebugVariableID so that + /// they can be sorted into a stable order for emission at a later time. + SmallVector<std::pair<DebugVariableID, MachineInstr *>, 4> Insts; }; /// Stores the resolved operands (machine locations and constants) and @@ -227,15 +230,15 @@ public: /// Map from LocIdxes to which DebugVariables are based that location. /// Mantained while stepping through the block. Not accurate if /// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx]. - DenseMap<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs; + DenseMap<LocIdx, SmallSet<DebugVariableID, 4>> ActiveMLocs; /// Map from DebugVariable to it's current location and qualifying meta /// information. To be used in conjunction with ActiveMLocs to construct /// enough information for the DBG_VALUEs for a particular LocIdx. - DenseMap<DebugVariable, ResolvedDbgValue> ActiveVLocs; + DenseMap<DebugVariableID, ResolvedDbgValue> ActiveVLocs; /// Temporary cache of DBG_VALUEs to be entered into the Transfers collection. - SmallVector<MachineInstr *, 4> PendingDbgValues; + SmallVector<std::pair<DebugVariableID, MachineInstr *>, 4> PendingDbgValues; /// Record of a use-before-def: created when a value that's live-in to the /// current block isn't available in any machine location, but it will be @@ -244,12 +247,12 @@ public: /// Value of this variable, def'd in block. SmallVector<DbgOp> Values; /// Identity of this variable. - DebugVariable Var; + DebugVariableID VarID; /// Additional variable properties. DbgValueProperties Properties; - UseBeforeDef(ArrayRef<DbgOp> Values, const DebugVariable &Var, + UseBeforeDef(ArrayRef<DbgOp> Values, DebugVariableID VarID, const DbgValueProperties &Properties) - : Values(Values.begin(), Values.end()), Var(Var), + : Values(Values.begin(), Values.end()), VarID(VarID), Properties(Properties) {} }; @@ -260,15 +263,16 @@ public: /// The set of variables that are in UseBeforeDefs and can become a location /// once the relevant value is defined. An element being erased from this /// collection prevents the use-before-def materializing. - DenseSet<DebugVariable> UseBeforeDefVariables; + DenseSet<DebugVariableID> UseBeforeDefVariables; const TargetRegisterInfo &TRI; const BitVector &CalleeSavedRegs; TransferTracker(const TargetInstrInfo *TII, MLocTracker *MTracker, - MachineFunction &MF, const TargetRegisterInfo &TRI, + MachineFunction &MF, const DebugVariableMap &DVMap, + const TargetRegisterInfo &TRI, const BitVector &CalleeSavedRegs, const TargetPassConfig &TPC) - : TII(TII), MTracker(MTracker), MF(MF), TRI(TRI), + : TII(TII), MTracker(MTracker), MF(MF), DVMap(DVMap), TRI(TRI), CalleeSavedRegs(CalleeSavedRegs) { TLI = MF.getSubtarget().getTargetLowering(); auto &TM = TPC.getTM<TargetMachine>(); @@ -316,6 +320,13 @@ public: bool isBest() const { return getQuality() == LocationQuality::Best; } }; + using ValueLocPair = std::pair<ValueIDNum, LocationAndQuality>; + + static inline bool ValueToLocSort(const ValueLocPair &A, + const ValueLocPair &B) { + return A.first < B.first; + }; + // Returns the LocationQuality for the location L iff the quality of L is // is strictly greater than the provided minimum quality. std::optional<LocationQuality> @@ -344,8 +355,8 @@ public: /// \p DbgOpStore is the map containing the DbgOpID->DbgOp mapping needed to /// determine the values used by Value. void loadVarInloc(MachineBasicBlock &MBB, DbgOpIDMap &DbgOpStore, - const DenseMap<ValueIDNum, LocationAndQuality> &ValueToLoc, - DebugVariable Var, DbgValue Value) { + const SmallVectorImpl<ValueLocPair> &ValueToLoc, + DebugVariableID VarID, DbgValue Value) { SmallVector<DbgOp> DbgOps; SmallVector<ResolvedDbgOp> ResolvedDbgOps; bool IsValueValid = true; @@ -373,9 +384,17 @@ public: continue; } - // If the value has no location, we can't make a variable location. + // Search for the desired ValueIDNum, to examine the best location found + // for it. Use an empty ValueLocPair to search for an entry in ValueToLoc. const ValueIDNum &Num = Op.ID; - auto ValuesPreferredLoc = ValueToLoc.find(Num); + ValueLocPair Probe(Num, LocationAndQuality()); + auto ValuesPreferredLoc = std::lower_bound( + ValueToLoc.begin(), ValueToLoc.end(), Probe, ValueToLocSort); + + // There must be a legitimate entry found for Num. + assert(ValuesPreferredLoc != ValueToLoc.end() && + ValuesPreferredLoc->first == Num); + if (ValuesPreferredLoc->second.isIllegal()) { // If it's a def that occurs in this block, register it as a // use-before-def to be resolved as we step through the block. @@ -386,7 +405,7 @@ public: static_cast<unsigned>(Num.getInst())); continue; } - recoverAsEntryValue(Var, Value.Properties, Num); + recoverAsEntryValue(VarID, Value.Properties, Num); IsValueValid = false; break; } @@ -404,8 +423,7 @@ public: // Add UseBeforeDef entry for the last value to be defined in this block. if (LastUseBeforeDef) { - addUseBeforeDef(Var, Value.Properties, DbgOps, - LastUseBeforeDef); + addUseBeforeDef(VarID, Value.Properties, DbgOps, LastUseBeforeDef); return; } @@ -413,13 +431,15 @@ public: // the transfer. for (const ResolvedDbgOp &Op : ResolvedDbgOps) if (!Op.IsConst) - ActiveMLocs[Op.Loc].insert(Var); + ActiveMLocs[Op.Loc].insert(VarID); auto NewValue = ResolvedDbgValue{ResolvedDbgOps, Value.Properties}; - auto Result = ActiveVLocs.insert(std::make_pair(Var, NewValue)); + auto Result = ActiveVLocs.insert(std::make_pair(VarID, NewValue)); if (!Result.second) Result.first->second = NewValue; + auto &[Var, DILoc] = DVMap.lookupDVID(VarID); PendingDbgValues.push_back( - MTracker->emitLoc(ResolvedDbgOps, Var, Value.Properties)); + std::make_pair(VarID, &*MTracker->emitLoc(ResolvedDbgOps, Var, DILoc, + Value.Properties))); } /// Load object with live-in variable values. \p mlocs contains the live-in @@ -430,7 +450,7 @@ public: /// FIXME: could just examine mloctracker instead of passing in \p mlocs? void loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs, DbgOpIDMap &DbgOpStore, - const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs, + const SmallVectorImpl<std::pair<DebugVariableID, DbgValue>> &VLocs, unsigned NumLocs) { ActiveMLocs.clear(); ActiveVLocs.clear(); @@ -439,8 +459,9 @@ public: UseBeforeDefs.clear(); UseBeforeDefVariables.clear(); - // Map of the preferred location for each value. - DenseMap<ValueIDNum, LocationAndQuality> ValueToLoc; + // Mapping of the preferred locations for each value. Collected into this + // vector then sorted for easy searching. + SmallVector<ValueLocPair, 16> ValueToLoc; // Initialized the preferred-location map with illegal locations, to be // filled in later. @@ -448,8 +469,10 @@ public: if (VLoc.second.Kind == DbgValue::Def) for (DbgOpID OpID : VLoc.second.getDbgOpIDs()) if (!OpID.ID.IsConst) - ValueToLoc.insert({DbgOpStore.find(OpID).ID, LocationAndQuality()}); + ValueToLoc.push_back( + {DbgOpStore.find(OpID).ID, LocationAndQuality()}); + llvm::sort(ValueToLoc, ValueToLocSort); ActiveMLocs.reserve(VLocs.size()); ActiveVLocs.reserve(VLocs.size()); @@ -464,8 +487,10 @@ public: VarLocs.push_back(VNum); // Is there a variable that wants a location for this value? If not, skip. - auto VIt = ValueToLoc.find(VNum); - if (VIt == ValueToLoc.end()) + ValueLocPair Probe(VNum, LocationAndQuality()); + auto VIt = std::lower_bound(ValueToLoc.begin(), ValueToLoc.end(), Probe, + ValueToLocSort); + if (VIt == ValueToLoc.end() || VIt->first != VNum) continue; auto &Previous = VIt->second; @@ -486,11 +511,11 @@ public: /// Record that \p Var has value \p ID, a value that becomes available /// later in the function. - void addUseBeforeDef(const DebugVariable &Var, + void addUseBeforeDef(DebugVariableID VarID, const DbgValueProperties &Properties, const SmallVectorImpl<DbgOp> &DbgOps, unsigned Inst) { - UseBeforeDefs[Inst].emplace_back(DbgOps, Var, Properties); - UseBeforeDefVariables.insert(Var); + UseBeforeDefs[Inst].emplace_back(DbgOps, VarID, Properties); + UseBeforeDefVariables.insert(VarID); } /// After the instruction at index \p Inst and position \p pos has been @@ -509,7 +534,7 @@ public: // Populate ValueToLoc with illegal default mappings for every value used by // any UseBeforeDef variables for this instruction. for (auto &Use : MIt->second) { - if (!UseBeforeDefVariables.count(Use.Var)) + if (!UseBeforeDefVariables.count(Use.VarID)) continue; for (DbgOp &Op : Use.Values) { @@ -548,7 +573,7 @@ public: // Using the map of values to locations, produce a final set of values for // this variable. for (auto &Use : MIt->second) { - if (!UseBeforeDefVariables.count(Use.Var)) + if (!UseBeforeDefVariables.count(Use.VarID)) continue; SmallVector<ResolvedDbgOp> DbgOps; @@ -571,8 +596,9 @@ public: continue; // Otherwise, we're good to go. - PendingDbgValues.push_back( - MTracker->emitLoc(DbgOps, Use.Var, Use.Properties)); + auto &[Var, DILoc] = DVMap.lookupDVID(Use.VarID); + PendingDbgValues.push_back(std::make_pair( + Use.VarID, MTracker->emitLoc(DbgOps, Var, DILoc, Use.Properties))); } flushDbgValues(pos, nullptr); } @@ -622,7 +648,7 @@ public: return Reg != SP && Reg != FP; } - bool recoverAsEntryValue(const DebugVariable &Var, + bool recoverAsEntryValue(DebugVariableID VarID, const DbgValueProperties &Prop, const ValueIDNum &Num) { // Is this variable location a candidate to be an entry value. First, @@ -643,6 +669,8 @@ public: DIExpr = *NonVariadicExpression; } + auto &[Var, DILoc] = DVMap.lookupDVID(VarID); + // Is the variable appropriate for entry values (i.e., is a parameter). if (!isEntryValueVariable(Var, DIExpr)) return false; @@ -656,9 +684,8 @@ public: DIExpression::prepend(DIExpr, DIExpression::EntryValue); Register Reg = MTracker->LocIdxToLocID[Num.getLoc()]; MachineOperand MO = MachineOperand::CreateReg(Reg, false); - - PendingDbgValues.push_back( - emitMOLoc(MO, Var, {NewExpr, Prop.Indirect, false})); + PendingDbgValues.push_back(std::make_pair( + VarID, &*emitMOLoc(MO, Var, {NewExpr, Prop.Indirect, false}))); return true; } @@ -667,19 +694,20 @@ public: DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); DbgValueProperties Properties(MI); + DebugVariableID VarID = DVMap.getDVID(Var); // Ignore non-register locations, we don't transfer those. if (MI.isUndefDebugValue() || all_of(MI.debug_operands(), [](const MachineOperand &MO) { return !MO.isReg(); })) { - auto It = ActiveVLocs.find(Var); + auto It = ActiveVLocs.find(VarID); if (It != ActiveVLocs.end()) { for (LocIdx Loc : It->second.loc_indices()) - ActiveMLocs[Loc].erase(Var); + ActiveMLocs[Loc].erase(VarID); ActiveVLocs.erase(It); } // Any use-before-defs no longer apply. - UseBeforeDefVariables.erase(Var); + UseBeforeDefVariables.erase(VarID); return; } @@ -705,14 +733,15 @@ public: SmallVectorImpl<ResolvedDbgOp> &NewLocs) { DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); + DebugVariableID VarID = DVMap.getDVID(Var); // Any use-before-defs no longer apply. - UseBeforeDefVariables.erase(Var); + UseBeforeDefVariables.erase(VarID); // Erase any previous location. - auto It = ActiveVLocs.find(Var); + auto It = ActiveVLocs.find(VarID); if (It != ActiveVLocs.end()) { for (LocIdx Loc : It->second.loc_indices()) - ActiveMLocs[Loc].erase(Var); + ActiveMLocs[Loc].erase(VarID); } // If there _is_ no new location, all we had to do was erase. @@ -722,7 +751,7 @@ public: return; } - SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs; + SmallVector<std::pair<LocIdx, DebugVariableID>> LostMLocs; for (ResolvedDbgOp &Op : NewLocs) { if (Op.IsConst) continue; @@ -749,17 +778,17 @@ public: for (const auto &LostMLoc : LostMLocs) ActiveMLocs[LostMLoc.first].erase(LostMLoc.second); LostMLocs.clear(); - It = ActiveVLocs.find(Var); + It = ActiveVLocs.find(VarID); ActiveMLocs[NewLoc.asU64()].clear(); VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc); } - ActiveMLocs[NewLoc].insert(Var); + ActiveMLocs[NewLoc].insert(VarID); } if (It == ActiveVLocs.end()) { ActiveVLocs.insert( - std::make_pair(Var, ResolvedDbgValue(NewLocs, Properties))); + std::make_pair(VarID, ResolvedDbgValue(NewLocs, Properties))); } else { It->second.Ops.assign(NewLocs); It->second.Properties = Properties; @@ -802,21 +831,21 @@ public: // explicitly undef, then stop here. if (!NewLoc && !MakeUndef) { // Try and recover a few more locations with entry values. - for (const auto &Var : ActiveMLocIt->second) { - auto &Prop = ActiveVLocs.find(Var)->second.Properties; - recoverAsEntryValue(Var, Prop, OldValue); + for (DebugVariableID VarID : ActiveMLocIt->second) { + auto &Prop = ActiveVLocs.find(VarID)->second.Properties; + recoverAsEntryValue(VarID, Prop, OldValue); } flushDbgValues(Pos, nullptr); return; } // Examine all the variables based on this location. - DenseSet<DebugVariable> NewMLocs; + DenseSet<DebugVariableID> NewMLocs; // If no new location has been found, every variable that depends on this // MLoc is dead, so end their existing MLoc->Var mappings as well. - SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs; - for (const auto &Var : ActiveMLocIt->second) { - auto ActiveVLocIt = ActiveVLocs.find(Var); + SmallVector<std::pair<LocIdx, DebugVariableID>> LostMLocs; + for (DebugVariableID VarID : ActiveMLocIt->second) { + auto ActiveVLocIt = ActiveVLocs.find(VarID); // Re-state the variable location: if there's no replacement then NewLoc // is std::nullopt and a $noreg DBG_VALUE will be created. Otherwise, a // DBG_VALUE identifying the alternative location will be emitted. @@ -835,19 +864,21 @@ public: replace_copy(ActiveVLocIt->second.Ops, DbgOps.begin(), OldOp, NewOp); } - PendingDbgValues.push_back(MTracker->emitLoc(DbgOps, Var, Properties)); + auto &[Var, DILoc] = DVMap.lookupDVID(VarID); + PendingDbgValues.push_back(std::make_pair( + VarID, &*MTracker->emitLoc(DbgOps, Var, DILoc, Properties))); // Update machine locations <=> variable locations maps. Defer updating // ActiveMLocs to avoid invalidating the ActiveMLocIt iterator. if (!NewLoc) { for (LocIdx Loc : ActiveVLocIt->second.loc_indices()) { if (Loc != MLoc) - LostMLocs.emplace_back(Loc, Var); + LostMLocs.emplace_back(Loc, VarID); } ActiveVLocs.erase(ActiveVLocIt); } else { ActiveVLocIt->second.Ops = DbgOps; - NewMLocs.insert(Var); + NewMLocs.insert(VarID); } } @@ -871,8 +902,8 @@ public: // Commit ActiveMLoc changes. ActiveMLocIt->second.clear(); if (!NewMLocs.empty()) - for (auto &Var : NewMLocs) - ActiveMLocs[*NewLoc].insert(Var); + for (DebugVariableID VarID : NewMLocs) + ActiveMLocs[*NewLoc].insert(VarID); } /// Transfer variables based on \p Src to be based on \p Dst. This handles @@ -895,17 +926,18 @@ public: // For each variable based on Src; create a location at Dst. ResolvedDbgOp SrcOp(Src); ResolvedDbgOp DstOp(Dst); - for (const auto &Var : MovingVars) { - auto ActiveVLocIt = ActiveVLocs.find(Var); + for (DebugVariableID VarID : MovingVars) { + auto ActiveVLocIt = ActiveVLocs.find(VarID); assert(ActiveVLocIt != ActiveVLocs.end()); // Update all instances of Src in the variable's tracked values to Dst. std::replace(ActiveVLocIt->second.Ops.begin(), ActiveVLocIt->second.Ops.end(), SrcOp, DstOp); - MachineInstr *MI = MTracker->emitLoc(ActiveVLocIt->second.Ops, Var, + auto &[Var, DILoc] = DVMap.lookupDVID(VarID); + MachineInstr *MI = MTracker->emitLoc(ActiveVLocIt->second.Ops, Var, DILoc, ActiveVLocIt->second.Properties); - PendingDbgValues.push_back(MI); + PendingDbgValues.push_back(std::make_pair(VarID, MI)); } ActiveMLocs[Src].clear(); flushDbgValues(Pos, nullptr); @@ -1156,11 +1188,9 @@ LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() { MachineInstrBuilder MLocTracker::emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps, - const DebugVariable &Var, + const DebugVariable &Var, const DILocation *DILoc, const DbgValueProperties &Properties) { - DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0, - Var.getVariable()->getScope(), - const_cast<DILocation *>(Var.getInlinedAt())); + DebugLoc DL = DebugLoc(DILoc); const MCInstrDesc &Desc = Properties.IsVariadic ? TII.get(TargetOpcode::DBG_VALUE_LIST) @@ -1356,10 +1386,11 @@ InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { // from the stack at some point. Happily the memory operand will tell us // the size written to the stack. auto *MemOperand = *MI.memoperands_begin(); - unsigned SizeInBits = MemOperand->getSizeInBits(); + LocationSize SizeInBits = MemOperand->getSizeInBits(); + assert(SizeInBits.hasValue() && "Expected to find a valid size!"); // Find that position in the stack indexes we're tracking. - auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits, 0}); + auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits.getValue(), 0}); if (IdxIt == MTracker->StackSlotIdxes.end()) // That index is not tracked. This is suprising, and unlikely to ever // occur, but the safe action is to indicate the variable is optimised out. @@ -1705,7 +1736,8 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, LastUseBeforeDef = std::max(LastUseBeforeDef, NewID.getInst()); } if (IsValidUseBeforeDef) { - TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false, true}, + DebugVariableID VID = DVMap.insertDVID(V, MI.getDebugLoc().get()); + TTracker->addUseBeforeDef(VID, {MI.getDebugExpression(), false, true}, DbgOps, LastUseBeforeDef); } } @@ -1714,9 +1746,11 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // This DBG_VALUE is potentially a $noreg / undefined location, if // FoundLoc is illegal. // (XXX -- could morph the DBG_INSTR_REF in the future). - MachineInstr *DbgMI = MTracker->emitLoc(NewLocs, V, Properties); + MachineInstr *DbgMI = + MTracker->emitLoc(NewLocs, V, MI.getDebugLoc().get(), Properties); + DebugVariableID ID = DVMap.getDVID(V); - TTracker->PendingDbgValues.push_back(DbgMI); + TTracker->PendingDbgValues.push_back(std::make_pair(ID, DbgMI)); TTracker->flushDbgValues(MI.getIterator(), nullptr); return true; } @@ -3091,7 +3125,8 @@ void InstrRefBasedLDV::getBlocksForScope( } void InstrRefBasedLDV::buildVLocValueMap( - const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout, + const DILocation *DILoc, + const SmallSet<DebugVariableID, 4> &VarsWeCareAbout, SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs, SmallVectorImpl<VLocTracker> &AllTheVLocs) { @@ -3108,12 +3143,8 @@ void InstrRefBasedLDV::buildVLocValueMap( SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore; // The order in which to examine them (RPO). - SmallVector<MachineBasicBlock *, 8> BlockOrders; - - // RPO ordering function. - auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) { - return BBToOrder[A] < BBToOrder[B]; - }; + SmallVector<MachineBasicBlock *, 16> BlockOrders; + SmallVector<unsigned, 32> BlockOrderNums; getBlocksForScope(DILoc, BlocksToExplore, AssignBlocks); @@ -3131,11 +3162,16 @@ void InstrRefBasedLDV::buildVLocValueMap( for (const auto *MBB : BlocksToExplore) MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB)); - // Picks out relevants blocks RPO order and sort them. + // Picks out relevants blocks RPO order and sort them. Sort their + // order-numbers and map back to MBB pointers later, to avoid repeated + // DenseMap queries during comparisons. for (const auto *MBB : BlocksToExplore) - BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB)); + BlockOrderNums.push_back(BBToOrder[MBB]); - llvm::sort(BlockOrders, Cmp); + llvm::sort(BlockOrderNums); + for (unsigned int I : BlockOrderNums) + BlockOrders.push_back(OrderToBB[I]); + BlockOrderNums.clear(); unsigned NumBlocks = BlockOrders.size(); // Allocate some vectors for storing the live ins and live outs. Large. @@ -3166,7 +3202,7 @@ void InstrRefBasedLDV::buildVLocValueMap( // between blocks. This keeps the locality of working on one lexical scope at // at time, but avoids re-processing variable values because some other // variable has been assigned. - for (const auto &Var : VarsWeCareAbout) { + for (DebugVariableID VarID : VarsWeCareAbout) { // Re-initialize live-ins and live-outs, to clear the remains of previous // variables live-ins / live-outs. for (unsigned int I = 0; I < NumBlocks; ++I) { @@ -3180,7 +3216,7 @@ void InstrRefBasedLDV::buildVLocValueMap( SmallPtrSet<MachineBasicBlock *, 32> DefBlocks; for (const MachineBasicBlock *ExpMBB : BlocksToExplore) { auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars; - if (TransferFunc.contains(Var)) + if (TransferFunc.contains(VarID)) DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB)); } @@ -3190,7 +3226,7 @@ void InstrRefBasedLDV::buildVLocValueMap( // only one value definition, things are very simple. if (DefBlocks.size() == 1) { placePHIsForSingleVarDefinition(MutBlocksToExplore, *DefBlocks.begin(), - AllTheVLocs, Var, Output); + AllTheVLocs, VarID, Output); continue; } @@ -3263,7 +3299,7 @@ void InstrRefBasedLDV::buildVLocValueMap( // Do transfer function. auto &VTracker = AllTheVLocs[MBB->getNumber()]; - auto TransferIt = VTracker.Vars.find(Var); + auto TransferIt = VTracker.Vars.find(VarID); if (TransferIt != VTracker.Vars.end()) { // Erase on empty transfer (DBG_VALUE $noreg). if (TransferIt->second.Kind == DbgValue::Undef) { @@ -3325,9 +3361,11 @@ void InstrRefBasedLDV::buildVLocValueMap( continue; if (BlockLiveIn->Kind == DbgValue::VPHI) BlockLiveIn->Kind = DbgValue::Def; + [[maybe_unused]] auto &[Var, DILoc] = DVMap.lookupDVID(VarID); assert(BlockLiveIn->Properties.DIExpr->getFragmentInfo() == - Var.getFragment() && "Fragment info missing during value prop"); - Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn)); + Var.getFragment() && + "Fragment info missing during value prop"); + Output[MBB->getNumber()].push_back(std::make_pair(VarID, *BlockLiveIn)); } } // Per-variable loop. @@ -3338,7 +3376,7 @@ void InstrRefBasedLDV::buildVLocValueMap( void InstrRefBasedLDV::placePHIsForSingleVarDefinition( const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks, MachineBasicBlock *AssignMBB, SmallVectorImpl<VLocTracker> &AllTheVLocs, - const DebugVariable &Var, LiveInsT &Output) { + DebugVariableID VarID, LiveInsT &Output) { // If there is a single definition of the variable, then working out it's // value everywhere is very simple: it's every block dominated by the // definition. At the dominance frontier, the usual algorithm would: @@ -3351,7 +3389,7 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition( // Pick out the variables value from the block transfer function. VLocTracker &VLocs = AllTheVLocs[AssignMBB->getNumber()]; - auto ValueIt = VLocs.Vars.find(Var); + auto ValueIt = VLocs.Vars.find(VarID); const DbgValue &Value = ValueIt->second; // If it's an explicit assignment of "undef", that means there is no location @@ -3366,7 +3404,7 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition( if (!DomTree->properlyDominates(AssignMBB, ScopeBlock)) continue; - Output[ScopeBlock->getNumber()].push_back({Var, Value}); + Output[ScopeBlock->getNumber()].push_back({VarID, Value}); } // All blocks that aren't dominated have no live-in value, thus no variable @@ -3395,16 +3433,24 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { return DL.getLine() != 0; return false; }; - // Collect a set of all the artificial blocks. - for (auto &MBB : MF) + + // Collect a set of all the artificial blocks. Collect the size too, ilist + // size calls are O(n). + unsigned int Size = 0; + for (auto &MBB : MF) { + ++Size; if (none_of(MBB.instrs(), hasNonArtificialLocation)) ArtificialBlocks.insert(&MBB); + } // Compute mappings of block <=> RPO order. ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); unsigned int RPONumber = 0; + OrderToBB.reserve(Size); + BBToOrder.reserve(Size); + BBNumToRPO.reserve(Size); auto processMBB = [&](MachineBasicBlock *MBB) { - OrderToBB[RPONumber] = MBB; + OrderToBB.push_back(MBB); BBToOrder[MBB] = RPONumber; BBNumToRPO[MBB->getNumber()] = RPONumber; ++RPONumber; @@ -3485,9 +3531,9 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToAssignBlocks, LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs, SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF, - DenseMap<DebugVariable, unsigned> &AllVarsNumbering, const TargetPassConfig &TPC) { - TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC); + TTracker = + new TransferTracker(TII, MTracker, MF, DVMap, *TRI, CalleeSavedRegs, TPC); unsigned NumLocs = MTracker->getNumLocs(); VTracker = nullptr; @@ -3592,31 +3638,24 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( if (MInLocs.hasTableFor(*MBB)) EjectBlock(*MBB); - return emitTransfers(AllVarsNumbering); + return emitTransfers(); } -bool InstrRefBasedLDV::emitTransfers( - DenseMap<DebugVariable, unsigned> &AllVarsNumbering) { +bool InstrRefBasedLDV::emitTransfers() { // Go through all the transfers recorded in the TransferTracker -- this is // both the live-ins to a block, and any movements of values that happen // in the middle. - for (const auto &P : TTracker->Transfers) { + for (auto &P : TTracker->Transfers) { // We have to insert DBG_VALUEs in a consistent order, otherwise they // appear in DWARF in different orders. Use the order that they appear // when walking through each block / each instruction, stored in - // AllVarsNumbering. - SmallVector<std::pair<unsigned, MachineInstr *>> Insts; - for (MachineInstr *MI : P.Insts) { - DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(), - MI->getDebugLoc()->getInlinedAt()); - Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI); - } - llvm::sort(Insts, llvm::less_first()); + // DVMap. + llvm::sort(P.Insts, llvm::less_first()); // Insert either before or after the designated point... if (P.MBB) { MachineBasicBlock &MBB = *P.MBB; - for (const auto &Pair : Insts) + for (const auto &Pair : P.Insts) MBB.insert(P.Pos, Pair.second); } else { // Terminators, like tail calls, can clobber things. Don't try and place @@ -3625,7 +3664,7 @@ bool InstrRefBasedLDV::emitTransfers( continue; MachineBasicBlock &MBB = *P.Pos->getParent(); - for (const auto &Pair : Insts) + for (const auto &Pair : P.Insts) MBB.insertAfterBundle(P.Pos, Pair.second); } } @@ -3680,7 +3719,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, initialSetup(MF); MLocTransfer.resize(MaxNumBlocks); - vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr)); + vlocs.resize(MaxNumBlocks, VLocTracker(DVMap, OverlapFragments, EmptyExpr)); SavedLiveIns.resize(MaxNumBlocks); produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks); @@ -3723,24 +3762,19 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // Walk back through each block / instruction, collecting DBG_VALUE // instructions and recording what machine value their operands refer to. - for (auto &OrderPair : OrderToBB) { - MachineBasicBlock &MBB = *OrderPair.second; - CurBB = MBB.getNumber(); + for (MachineBasicBlock *MBB : OrderToBB) { + CurBB = MBB->getNumber(); VTracker = &vlocs[CurBB]; - VTracker->MBB = &MBB; - MTracker->loadFromArray(MInLocs[MBB], CurBB); + VTracker->MBB = MBB; + MTracker->loadFromArray(MInLocs[*MBB], CurBB); CurInst = 1; - for (auto &MI : MBB) { + for (auto &MI : *MBB) { process(MI, &MOutLocs, &MInLocs); ++CurInst; } MTracker->reset(); } - // Number all variables in the order that they appear, to be used as a stable - // insertion order later. - DenseMap<DebugVariable, unsigned> AllVarsNumbering; - // Map from one LexicalScope to all the variables in that scope. ScopeToVarsT ScopeToVars; @@ -3759,16 +3793,15 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, auto *VTracker = &vlocs[MBB->getNumber()]; // Collect each variable with a DBG_VALUE in this block. for (auto &idx : VTracker->Vars) { - const auto &Var = idx.first; - const DILocation *ScopeLoc = VTracker->Scopes[Var]; + DebugVariableID VarID = idx.first; + const DILocation *ScopeLoc = VTracker->Scopes[VarID]; assert(ScopeLoc != nullptr); auto *Scope = LS.findLexicalScope(ScopeLoc); // No insts in scope -> shouldn't have been recorded. assert(Scope != nullptr); - AllVarsNumbering.insert(std::make_pair(Var, AllVarsNumbering.size())); - ScopeToVars[Scope].insert(Var); + ScopeToVars[Scope].insert(VarID); ScopeToAssignBlocks[Scope].insert(VTracker->MBB); ScopeToDILocation[Scope] = ScopeLoc; ++VarAssignCount; @@ -3792,7 +3825,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // the "else" block of this condition. Changed = depthFirstVLocAndEmit( MaxNumBlocks, ScopeToDILocation, ScopeToVars, ScopeToAssignBlocks, - SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, AllVarsNumbering, *TPC); + SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, *TPC); } delete MTracker; @@ -3811,6 +3844,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, SeenFragments.clear(); SeenDbgPHIs.clear(); DbgOpStore.clear(); + DVMap.clear(); return Changed; } @@ -3906,7 +3940,7 @@ public: DenseMap<BlockValueNum, LDVSSAPhi *> PHIs; /// Map of which blocks generate Undef values -- blocks that are not /// dominated by any Def. - DenseMap<MachineBasicBlock *, BlockValueNum> UndefMap; + DenseMap<MachineBasicBlock *, BlockValueNum> PoisonMap; /// Map of machine blocks to our own records of them. DenseMap<MachineBasicBlock *, LDVSSABlock *> BlockMap; /// Machine location where any PHI must occur. @@ -3922,7 +3956,7 @@ public: delete Block.second; PHIs.clear(); - UndefMap.clear(); + PoisonMap.clear(); BlockMap.clear(); } @@ -4016,15 +4050,15 @@ public: Preds->push_back(BB->Updater.getSSALDVBlock(Pred)); } - /// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new + /// GetPoisonVal - Normally creates an IMPLICIT_DEF instruction with a new /// register. For LiveDebugValues, represents a block identified as not having /// any DBG_PHI predecessors. - static BlockValueNum GetUndefVal(LDVSSABlock *BB, LDVSSAUpdater *Updater) { + static BlockValueNum GetPoisonVal(LDVSSABlock *BB, LDVSSAUpdater *Updater) { // Create a value number for this block -- it needs to be unique and in the - // "undef" collection, so that we know it's not real. Use a number + // "poison" collection, so that we know it's not real. Use a number // representing a PHI into this block. BlockValueNum Num = ValueIDNum(BB->BB.getNumber(), 0, Updater->Loc).asU64(); - Updater->UndefMap[&BB->BB] = Num; + Updater->PoisonMap[&BB->BB] = Num; return Num; } @@ -4187,7 +4221,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( // Are all these things actually defined? for (auto &PHIIt : PHI->IncomingValues) { // Any undef input means DBG_PHIs didn't dominate the use point. - if (Updater.UndefMap.contains(&PHIIt.first->BB)) + if (Updater.PoisonMap.contains(&PHIIt.first->BB)) return std::nullopt; ValueIDNum ValueToCheck; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index 6d77a6972f09..d9851ad13eab 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -35,6 +35,44 @@ class DbgOpIDMap; using namespace llvm; +using DebugVariableID = unsigned; +using VarAndLoc = std::pair<DebugVariable, const DILocation *>; + +/// Mapping from DebugVariable to/from a unique identifying number. Each +/// DebugVariable consists of three pointers, and after a small amount of +/// work to identify overlapping fragments of variables we mostly only use +/// DebugVariables as identities of variables. It's much more compile-time +/// efficient to use an ID number instead, which this class provides. +class DebugVariableMap { + DenseMap<DebugVariable, unsigned> VarToIdx; + SmallVector<VarAndLoc> IdxToVar; + +public: + DebugVariableID getDVID(const DebugVariable &Var) const { + auto It = VarToIdx.find(Var); + assert(It != VarToIdx.end()); + return It->second; + } + + DebugVariableID insertDVID(DebugVariable &Var, const DILocation *Loc) { + unsigned Size = VarToIdx.size(); + auto ItPair = VarToIdx.insert({Var, Size}); + if (ItPair.second) { + IdxToVar.push_back({Var, Loc}); + return Size; + } + + return ItPair.first->second; + } + + const VarAndLoc &lookupDVID(DebugVariableID ID) const { return IdxToVar[ID]; } + + void clear() { + VarToIdx.clear(); + IdxToVar.clear(); + } +}; + /// Handle-class for a particular "location". This value-type uniquely /// symbolises a register or stack location, allowing manipulation of locations /// without concern for where that location is. Practically, this allows us to @@ -985,7 +1023,7 @@ public: /// information in \pProperties, for variable Var. Don't insert it anywhere, /// just return the builder for it. MachineInstrBuilder emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps, - const DebugVariable &Var, + const DebugVariable &Var, const DILocation *DILoc, const DbgValueProperties &Properties); }; @@ -1003,38 +1041,45 @@ using OverlapMap = /// identified. class VLocTracker { public: + /// Ref to function-wide map of DebugVariable <=> ID-numbers. + DebugVariableMap &DVMap; /// Map DebugVariable to the latest Value it's defined to have. /// Needs to be a MapVector because we determine order-in-the-input-MIR from - /// the order in this container. + /// the order in this container. (FIXME: likely no longer true as the ordering + /// is now provided by DebugVariableMap). /// We only retain the last DbgValue in each block for each variable, to /// determine the blocks live-out variable value. The Vars container forms the /// transfer function for this block, as part of the dataflow analysis. The /// movement of values between locations inside of a block is handled at a /// much later stage, in the TransferTracker class. - MapVector<DebugVariable, DbgValue> Vars; - SmallDenseMap<DebugVariable, const DILocation *, 8> Scopes; + MapVector<DebugVariableID, DbgValue> Vars; + SmallDenseMap<DebugVariableID, const DILocation *, 8> Scopes; MachineBasicBlock *MBB = nullptr; const OverlapMap &OverlappingFragments; DbgValueProperties EmptyProperties; public: - VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr) - : OverlappingFragments(O), EmptyProperties(EmptyExpr, false, false) {} + VLocTracker(DebugVariableMap &DVMap, const OverlapMap &O, + const DIExpression *EmptyExpr) + : DVMap(DVMap), OverlappingFragments(O), + EmptyProperties(EmptyExpr, false, false) {} void defVar(const MachineInstr &MI, const DbgValueProperties &Properties, const SmallVectorImpl<DbgOpID> &DebugOps) { assert(MI.isDebugValueLike()); DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); + // Either insert or fetch an ID number for this variable. + DebugVariableID VarID = DVMap.insertDVID(Var, MI.getDebugLoc().get()); DbgValue Rec = (DebugOps.size() > 0) ? DbgValue(DebugOps, Properties) : DbgValue(Properties, DbgValue::Undef); // Attempt insertion; overwrite if it's already mapped. - auto Result = Vars.insert(std::make_pair(Var, Rec)); + auto Result = Vars.insert(std::make_pair(VarID, Rec)); if (!Result.second) Result.first->second = Rec; - Scopes[Var] = MI.getDebugLoc().get(); + Scopes[VarID] = MI.getDebugLoc().get(); considerOverlaps(Var, MI.getDebugLoc().get()); } @@ -1056,13 +1101,15 @@ public: DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo, Var.getInlinedAt()); + // Produce an ID number for this overlapping fragment of a variable. + DebugVariableID OverlappedID = DVMap.insertDVID(Overlapped, Loc); DbgValue Rec = DbgValue(EmptyProperties, DbgValue::Undef); // Attempt insertion; overwrite if it's already mapped. - auto Result = Vars.insert(std::make_pair(Overlapped, Rec)); + auto Result = Vars.insert(std::make_pair(OverlappedID, Rec)); if (!Result.second) Result.first->second = Rec; - Scopes[Overlapped] = Loc; + Scopes[OverlappedID] = Loc; } } @@ -1093,7 +1140,7 @@ public: /// variables to their values. using LiveIdxT = DenseMap<const MachineBasicBlock *, DbgValue *>; - using VarAndLoc = std::pair<DebugVariable, DbgValue>; + using VarAndLoc = std::pair<DebugVariableID, DbgValue>; /// Type for a live-in value: the predecessor block, and its value. using InValueT = std::pair<MachineBasicBlock *, DbgValue *>; @@ -1106,7 +1153,8 @@ public: using ScopeToDILocT = DenseMap<const LexicalScope *, const DILocation *>; /// Mapping from lexical scopes to variables in that scope. - using ScopeToVarsT = DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>>; + using ScopeToVarsT = + DenseMap<const LexicalScope *, SmallSet<DebugVariableID, 4>>; /// Mapping from lexical scopes to blocks where variables in that scope are /// assigned. Such blocks aren't necessarily "in" the lexical scope, it's @@ -1153,7 +1201,7 @@ private: SmallPtrSet<MachineBasicBlock *, 16> ArtificialBlocks; // Mapping of blocks to and from their RPOT order. - DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; + SmallVector<MachineBasicBlock *> OrderToBB; DenseMap<const MachineBasicBlock *, unsigned int> BBToOrder; DenseMap<unsigned, unsigned> BBNumToRPO; @@ -1200,6 +1248,11 @@ private: DbgOpIDMap DbgOpStore; + /// Mapping between DebugVariables and unique ID numbers. This is a more + /// efficient way to represent the identity of a variable, versus a plain + /// DebugVariable. + DebugVariableMap DVMap; + /// True if we need to examine call instructions for stack clobbers. We /// normally assume that they don't clobber SP, but stack probes on Windows /// do. @@ -1330,9 +1383,9 @@ private: /// performance as it doesn't have to find the dominance frontier between /// different assignments. void placePHIsForSingleVarDefinition( - const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks, - MachineBasicBlock *MBB, SmallVectorImpl<VLocTracker> &AllTheVLocs, - const DebugVariable &Var, LiveInsT &Output); + const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks, + MachineBasicBlock *MBB, SmallVectorImpl<VLocTracker> &AllTheVLocs, + DebugVariableID Var, LiveInsT &Output); /// Calculate the iterated-dominance-frontier for a set of defs, using the /// existing LLVM facilities for this. Works for a single "value" or @@ -1381,7 +1434,7 @@ private: /// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks /// locations through. void buildVLocValueMap(const DILocation *DILoc, - const SmallSet<DebugVariable, 4> &VarsWeCareAbout, + const SmallSet<DebugVariableID, 4> &VarsWeCareAbout, SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs, @@ -1414,10 +1467,8 @@ private: const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders); /// Take collections of DBG_VALUE instructions stored in TTracker, and - /// install them into their output blocks. Preserves a stable order of - /// DBG_VALUEs produced (which would otherwise cause nondeterminism) through - /// the AllVarsNumbering order. - bool emitTransfers(DenseMap<DebugVariable, unsigned> &AllVarsNumbering); + /// install them into their output blocks. + bool emitTransfers(); /// Boilerplate computation of some initial sets, artifical blocks and /// RPOT block ordering. @@ -1437,13 +1488,14 @@ private: /// block information can be fully computed before exploration finishes, /// allowing us to emit it and free data structures earlier than otherwise. /// It's also good for locality. - bool depthFirstVLocAndEmit( - unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation, - const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToBlocks, - LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs, - SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF, - DenseMap<DebugVariable, unsigned> &AllVarsNumbering, - const TargetPassConfig &TPC); + bool depthFirstVLocAndEmit(unsigned MaxNumBlocks, + const ScopeToDILocT &ScopeToDILocation, + const ScopeToVarsT &ScopeToVars, + ScopeToAssignBlocksT &ScopeToBlocks, + LiveInsT &Output, FuncValueTable &MOutLocs, + FuncValueTable &MInLocs, + SmallVectorImpl<VLocTracker> &AllTheVLocs, + MachineFunction &MF, const TargetPassConfig &TPC); bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, TargetPassConfig *TPC, unsigned InputBBLimit, @@ -1473,6 +1525,11 @@ public: } std::optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI); + + // Utility for unit testing, don't use directly. + DebugVariableMap &getDVMap() { + return DVMap; + } }; } // namespace LiveDebugValues diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index bf730be00a9a..e146fb7e5768 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -86,7 +86,7 @@ /// lookup the VarLoc in the VarLocMap. Rather than operate directly on machine /// locations, the dataflow analysis in this pass identifies locations by their /// indices in the VarLocMap, meaning all the variable locations in a block can -/// be described by a sparse vector of VarLocMap indicies. +/// be described by a sparse vector of VarLocMap indices. /// /// All the storage for the dataflow analysis is local to the ExtendRanges /// method and passed down to helper methods. "OutLocs" and "InLocs" record the diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp index 7cb90af5ff17..d1341f116a54 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -18,7 +18,7 @@ // //===----------------------------------------------------------------------===// -#include "LiveDebugVariables.h" +#include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntervalMap.h" @@ -78,14 +78,14 @@ char LiveDebugVariables::ID = 0; INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE, "Debug Variable Analysis", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_END(LiveDebugVariables, DEBUG_TYPE, "Debug Variable Analysis", false, false) void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineDominatorTree>(); - AU.addRequiredTransitive<LiveIntervals>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addRequiredTransitive<LiveIntervalsWrapperPass>(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -764,9 +764,9 @@ void LDVImpl::print(raw_ostream &OS) { #endif void UserValue::mapVirtRegs(LDVImpl *LDV) { - for (unsigned i = 0, e = locations.size(); i != e; ++i) - if (locations[i].isReg() && locations[i].getReg().isVirtual()) - LDV->mapVirtReg(locations[i].getReg(), this); + for (const MachineOperand &MO : locations) + if (MO.isReg() && MO.getReg().isVirtual()) + LDV->mapVirtReg(MO.getReg(), this); } UserValue * @@ -1254,16 +1254,16 @@ void LDVImpl::computeIntervals() { LexicalScopes LS; LS.initialize(*MF); - for (unsigned i = 0, e = userValues.size(); i != e; ++i) { - userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS); - userValues[i]->mapVirtRegs(this); + for (const auto &UV : userValues) { + UV->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS); + UV->mapVirtRegs(this); } } bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) { clear(); MF = &mf; - LIS = &pass.getAnalysis<LiveIntervals>(); + LIS = &pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS(); TRI = mf.getSubtarget().getRegisterInfo(); LLVM_DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << mf.getName() << " **********\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h deleted file mode 100644 index 9998ce9e8dad..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h +++ /dev/null @@ -1,68 +0,0 @@ -//===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides the interface to the LiveDebugVariables analysis. -// -// The analysis removes DBG_VALUE instructions for virtual registers and tracks -// live user variables in a data structure that can be updated during register -// allocation. -// -// After register allocation new DBG_VALUE instructions are emitted to reflect -// the new locations of user variables. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H -#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/Compiler.h" - -namespace llvm { - -template <typename T> class ArrayRef; -class LiveIntervals; -class VirtRegMap; - -class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass { - void *pImpl = nullptr; - -public: - static char ID; // Pass identification, replacement for typeid - - LiveDebugVariables(); - ~LiveDebugVariables() override; - - /// splitRegister - Move any user variables in OldReg to the live ranges in - /// NewRegs where they are live. Mark the values as unavailable where no new - /// register is live. - void splitRegister(Register OldReg, ArrayRef<Register> NewRegs, - LiveIntervals &LIS); - - /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes - /// that happened during register allocation. - /// @param VRM Rename virtual registers according to map. - void emitDebugValues(VirtRegMap *VRM); - - /// dump - Print data structures to dbgs(). - void dump() const; - -private: - bool runOnMachineFunction(MachineFunction &) override; - void releaseMemory() override; - void getAnalysisUsage(AnalysisUsage &) const override; - - MachineFunctionProperties getSetProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::TracksDebugUserValues); - } -}; - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp index 68fff9bc221d..33270807f260 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp @@ -57,14 +57,39 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" -char LiveIntervals::ID = 0; -char &llvm::LiveIntervalsID = LiveIntervals::ID; -INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", - false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_END(LiveIntervals, "liveintervals", - "Live Interval Analysis", false, false) +AnalysisKey LiveIntervalsAnalysis::Key; + +LiveIntervalsAnalysis::Result +LiveIntervalsAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + return Result(MF, MFAM.getResult<SlotIndexesAnalysis>(MF), + MFAM.getResult<MachineDominatorTreeAnalysis>(MF)); +} + +PreservedAnalyses +LiveIntervalsPrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + OS << "Live intervals for machine function: " << MF.getName() << ":\n"; + MFAM.getResult<LiveIntervalsAnalysis>(MF).print(OS); + return PreservedAnalyses::all(); +} + +char LiveIntervalsWrapperPass::ID = 0; +char &llvm::LiveIntervalsID = LiveIntervalsWrapperPass::ID; +INITIALIZE_PASS_BEGIN(LiveIntervalsWrapperPass, "liveintervals", + "Live Interval Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) +INITIALIZE_PASS_END(LiveIntervalsWrapperPass, "liveintervals", + "Live Interval Analysis", false, false) + +bool LiveIntervalsWrapperPass::runOnMachineFunction(MachineFunction &MF) { + LIS.Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI(); + LIS.DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + LIS.analyze(MF); + LLVM_DEBUG(dump()); + return false; +} #ifndef NDEBUG static cl::opt<bool> EnablePrecomputePhysRegs( @@ -83,24 +108,24 @@ cl::opt<bool> UseSegmentSetForPhysRegs( } // end namespace llvm -void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { +void LiveIntervalsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addPreserved<LiveVariables>(); + AU.addPreserved<LiveVariablesWrapperPass>(); AU.addPreservedID(MachineLoopInfoID); AU.addRequiredTransitiveID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); - AU.addPreserved<SlotIndexes>(); - AU.addRequiredTransitive<SlotIndexes>(); + AU.addPreserved<SlotIndexesWrapperPass>(); + AU.addRequiredTransitive<SlotIndexesWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } -LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) { - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); +LiveIntervalsWrapperPass::LiveIntervalsWrapperPass() : MachineFunctionPass(ID) { + initializeLiveIntervalsWrapperPassPass(*PassRegistry::getPassRegistry()); } -LiveIntervals::~LiveIntervals() { delete LICalc; } +LiveIntervals::~LiveIntervals() { clear(); } -void LiveIntervals::releaseMemory() { +void LiveIntervals::clear() { // Free the live intervals themselves. for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i) delete VirtRegIntervals[Register::index2VirtReg(i)]; @@ -117,16 +142,14 @@ void LiveIntervals::releaseMemory() { VNInfoAllocator.Reset(); } -bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { +void LiveIntervals::analyze(MachineFunction &fn) { MF = &fn; MRI = &MF->getRegInfo(); TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); - Indexes = &getAnalysis<SlotIndexes>(); - DomTree = &getAnalysis<MachineDominatorTree>(); if (!LICalc) - LICalc = new LiveIntervalCalc(); + LICalc = std::make_unique<LiveIntervalCalc>(); // Allocate space for all virtual registers. VirtRegIntervals.resize(MRI->getNumVirtRegs()); @@ -141,11 +164,9 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) getRegUnit(i); } - LLVM_DEBUG(dump()); - return false; } -void LiveIntervals::print(raw_ostream &OS, const Module* ) const { +void LiveIntervals::print(raw_ostream &OS) const { OS << "********** INTERVALS **********\n"; // Dump the regunits. @@ -179,6 +200,10 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const { } #endif +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LiveIntervals::dump() const { print(dbgs()); } +#endif + LiveInterval *LiveIntervals::createInterval(Register reg) { float Weight = reg.isPhysical() ? huge_valf : 0.0F; return new LiveInterval(reg, Weight); @@ -1536,8 +1561,7 @@ void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart, // Fix up dead defs const SlotIndex Index = getInstructionIndex(BundleStart); - for (unsigned Idx = 0, E = BundleStart.getNumOperands(); Idx != E; ++Idx) { - MachineOperand &MO = BundleStart.getOperand(Idx); + for (MachineOperand &MO : BundleStart.operands()) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); @@ -1666,13 +1690,27 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg().isVirtual()) { Register Reg = MO.getReg(); - // If the new instructions refer to subregs but the old instructions did - // not, throw away any old live interval so it will be recomputed with - // subranges. if (MO.getSubReg() && hasInterval(Reg) && - !getInterval(Reg).hasSubRanges() && - MRI->shouldTrackSubRegLiveness(Reg)) - removeInterval(Reg); + MRI->shouldTrackSubRegLiveness(Reg)) { + LiveInterval &LI = getInterval(Reg); + if (!LI.hasSubRanges()) { + // If the new instructions refer to subregs but the old instructions + // did not, throw away any old live interval so it will be + // recomputed with subranges. + removeInterval(Reg); + } else if (MO.isDef()) { + // Similarly if a subreg def has no precise subrange match then + // assume we need to recompute all subranges. + unsigned SubReg = MO.getSubReg(); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); + if (llvm::none_of(LI.subranges(), + [Mask](LiveInterval::SubRange &SR) { + return SR.LaneMask == Mask; + })) { + removeInterval(Reg); + } + } + } if (!hasInterval(Reg)) { createAndComputeVirtRegInterval(Reg); // Don't bother to repair a freshly calculated live interval. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp index 643370f0573d..7b7b5459ad7b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -414,7 +414,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { DeadRemats->insert(MI); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); MI->substituteRegister(Dest, NewLI.reg(), 0, TRI); - MI->getOperand(0).setIsDead(true); + assert(MI->registerDefIsDead(NewLI.reg(), &TRI)); } else { if (TheDelegate) TheDelegate->LRE_WillEraseInstruction(MI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp index 6df7e5c10862..c8c722359a4c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -38,7 +38,7 @@ STATISTIC(NumUnassigned , "Number of registers unassigned"); char LiveRegMatrix::ID = 0; INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix", "Live Register Matrix", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix", "Live Register Matrix", false, false) @@ -47,14 +47,14 @@ LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID) {} void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequiredTransitive<LiveIntervals>(); + AU.addRequiredTransitive<LiveIntervalsWrapperPass>(); AU.addRequiredTransitive<VirtRegMap>(); MachineFunctionPass::getAnalysisUsage(AU); } bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); - LIS = &getAnalysis<LiveIntervals>(); + LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); VRM = &getAnalysis<VirtRegMap>(); unsigned NumRegUnits = TRI->getNumRegUnits(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp index 8fc5a929d77b..ae36b2819a35 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp @@ -23,7 +23,7 @@ using namespace llvm; char LiveStacks::ID = 0; INITIALIZE_PASS_BEGIN(LiveStacks, DEBUG_TYPE, "Live Stack Slot Analysis", false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_END(LiveStacks, DEBUG_TYPE, "Live Stack Slot Analysis", false, false) @@ -31,8 +31,8 @@ char &llvm::LiveStacksID = LiveStacks::ID; void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addPreserved<SlotIndexes>(); - AU.addRequiredTransitive<SlotIndexes>(); + AU.addPreserved<SlotIndexesWrapperPass>(); + AU.addRequiredTransitive<SlotIndexesWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp index b85526cfb380..f17d60dc22dd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp @@ -41,21 +41,49 @@ #include <algorithm> using namespace llvm; -char LiveVariables::ID = 0; -char &llvm::LiveVariablesID = LiveVariables::ID; -INITIALIZE_PASS_BEGIN(LiveVariables, "livevars", - "Live Variable Analysis", false, false) -INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim) -INITIALIZE_PASS_END(LiveVariables, "livevars", - "Live Variable Analysis", false, false) +AnalysisKey LiveVariablesAnalysis::Key; + +LiveVariablesAnalysis::Result +LiveVariablesAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + return Result(MF); +} + +PreservedAnalyses +LiveVariablesPrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + OS << "Live variables in machine function: " << MF.getName() << '\n'; + MFAM.getResult<LiveVariablesAnalysis>(MF).print(OS); + return PreservedAnalyses::all(); +} +char LiveVariablesWrapperPass::ID = 0; +char &llvm::LiveVariablesID = LiveVariablesWrapperPass::ID; +INITIALIZE_PASS_BEGIN(LiveVariablesWrapperPass, "livevars", + "Live Variable Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim) +INITIALIZE_PASS_END(LiveVariablesWrapperPass, "livevars", + "Live Variable Analysis", false, false) -void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { +void LiveVariablesWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(UnreachableMachineBlockElimID); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } +LiveVariables::LiveVariables(MachineFunction &MF) + : MF(&MF), MRI(&MF.getRegInfo()), TRI(MF.getSubtarget().getRegisterInfo()) { + analyze(MF); +} + +void LiveVariables::print(raw_ostream &OS) const { + for (size_t I = 0, E = VirtRegInfo.size(); I != E; ++I) { + const Register Reg = Register::index2VirtReg(I); + OS << "Virtual register '%" << I << "':\n"; + VirtRegInfo[Reg].print(OS); + } +} + MachineInstr * LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { for (MachineInstr *MI : Kills) @@ -64,20 +92,22 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { return nullptr; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const { - dbgs() << " Alive in blocks: "; +void LiveVariables::VarInfo::print(raw_ostream &OS) const { + OS << " Alive in blocks: "; for (unsigned AB : AliveBlocks) - dbgs() << AB << ", "; - dbgs() << "\n Killed by:"; + OS << AB << ", "; + OS << "\n Killed by:"; if (Kills.empty()) - dbgs() << " No instructions.\n"; + OS << " No instructions.\n\n"; else { for (unsigned i = 0, e = Kills.size(); i != e; ++i) - dbgs() << "\n #" << i << ": " << *Kills[i]; - dbgs() << "\n"; + OS << "\n #" << i << ": " << *Kills[i]; + OS << "\n"; } } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const { print(dbgs()); } #endif /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. @@ -258,7 +288,7 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) { } } } else if (LastDef && !PhysRegUse[Reg] && - !LastDef->findRegisterDefOperand(Reg)) + !LastDef->findRegisterDefOperand(Reg, /*TRI=*/nullptr)) // Last def defines the super register, add an implicit def of reg. LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, true/*IsImp*/)); @@ -361,7 +391,8 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) { continue; bool NeedDef = true; if (PhysRegDef[Reg] == PhysRegDef[SubReg]) { - MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg); + MachineOperand *MO = + PhysRegDef[Reg]->findRegisterDefOperand(SubReg, /*TRI=*/nullptr); if (MO) { NeedDef = false; assert(!MO->isDead()); @@ -388,7 +419,7 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) { true/*IsImp*/, true/*IsKill*/)); else { MachineOperand *MO = - LastRefOrPartRef->findRegisterDefOperand(Reg, false, false, TRI); + LastRefOrPartRef->findRegisterDefOperand(Reg, TRI, false, false); bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; // If the last reference is the last def, then it's not used at all. // That is, unless we are currently processing the last reference itself. @@ -396,7 +427,7 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) { if (NeedEC) { // If we are adding a subreg def and the superreg def is marked early // clobber, add an early clobber marker to the subreg def. - MO = LastRefOrPartRef->findRegisterDefOperand(Reg); + MO = LastRefOrPartRef->findRegisterDefOperand(Reg, /*TRI=*/nullptr); if (MO) MO->setIsEarlyClobber(); } @@ -594,7 +625,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs) { HandlePhysRegDef(i, nullptr, Defs); } -bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { +void LiveVariables::analyze(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); TRI = MF->getSubtarget().getRegisterInfo(); @@ -648,8 +679,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { PhysRegDef.clear(); PhysRegUse.clear(); PHIVarInfo.clear(); - - return false; } void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) { @@ -727,7 +756,7 @@ void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) { if (MI.isPHI()) break; if (MI.readsVirtualRegister(Reg)) { - assert(!MI.killsRegister(Reg)); + assert(!MI.killsRegister(Reg, /*TRI=*/nullptr)); MI.addRegisterKilled(Reg, nullptr); VI.Kills.push_back(&MI); break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index e491ed12034d..0bb7953efd52 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LocalStackSlotAllocation.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -71,7 +72,7 @@ namespace { int getFrameIndex() const { return FrameIdx; } }; - class LocalStackSlotPass: public MachineFunctionPass { + class LocalStackSlotImpl { SmallVector<int64_t, 16> LocalOffsets; /// StackObjSet - A set of stack object indexes @@ -87,13 +88,20 @@ namespace { bool insertFrameReferenceRegisters(MachineFunction &Fn); public: + bool runOnMachineFunction(MachineFunction &MF); + }; + + class LocalStackSlotPass : public MachineFunctionPass { + public: static char ID; // Pass identification, replacement for typeid explicit LocalStackSlotPass() : MachineFunctionPass(ID) { initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) override { + return LocalStackSlotImpl().runOnMachineFunction(MF); + } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -103,13 +111,24 @@ namespace { } // end anonymous namespace +PreservedAnalyses +LocalStackSlotAllocationPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + bool Changed = LocalStackSlotImpl().runOnMachineFunction(MF); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + char LocalStackSlotPass::ID = 0; char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; INITIALIZE_PASS(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) -bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { +bool LocalStackSlotImpl::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned LocalObjectCount = MFI.getObjectIndexEnd(); @@ -139,7 +158,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { } /// AdjustStackOffset - Helper function used to adjust the stack frame offset. -void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, +void LocalStackSlotImpl::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset, bool StackGrowsDown, Align &MaxAlign) { // If the stack grows down, add the object size to find the lowest address. @@ -171,7 +190,7 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., /// those required to be close to the Stack Protector) to stack offsets. -void LocalStackSlotPass::AssignProtectedObjSet( +void LocalStackSlotImpl::AssignProtectedObjSet( const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs, MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, Align &MaxAlign) { @@ -183,7 +202,7 @@ void LocalStackSlotPass::AssignProtectedObjSet( /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. -void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { +void LocalStackSlotImpl::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo &MFI = Fn.getFrameInfo(); const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); @@ -281,7 +300,7 @@ lookupCandidateBaseReg(unsigned BaseReg, return TRI->isFrameOffsetLegal(&MI, BaseReg, Offset); } -bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { +bool LocalStackSlotImpl::insertFrameReferenceRegisters(MachineFunction &Fn) { // Scan the function's instructions looking for frame index references. // For each, ask the target if it wants a virtual base register for it // based on what we can tell it about where the local will end up in the diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp deleted file mode 100644 index cd85bf606989..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp +++ /dev/null @@ -1,66 +0,0 @@ -//===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file This file implements the more header-heavy bits of the LLT class to -/// avoid polluting users' namespaces. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/LowLevelType.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -LLT::LLT(MVT VT) { - if (VT.isVector()) { - bool asVector = VT.getVectorMinNumElements() > 1 || VT.isScalableVector(); - init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector, - VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(), - /*AddressSpace=*/0); - } else if (VT.isValid() && !VT.isScalableTargetExtVT()) { - // Aggregates are no different from real scalars as far as GlobalISel is - // concerned. - init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true, - ElementCount::getFixed(0), VT.getSizeInBits(), /*AddressSpace=*/0); - } else { - IsScalar = false; - IsPointer = false; - IsVector = false; - RawData = 0; - } -} - -void LLT::print(raw_ostream &OS) const { - if (isVector()) { - OS << "<"; - OS << getElementCount() << " x " << getElementType() << ">"; - } else if (isPointer()) - OS << "p" << getAddressSpace(); - else if (isValid()) { - assert(isScalar() && "unexpected type"); - OS << "s" << getScalarSizeInBits(); - } else - OS << "LLT_invalid"; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void LLT::dump() const { - print(dbgs()); - dbgs() << '\n'; -} -#endif - -const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo; -const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo; -const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo; -const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerVectorScalableFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp index bc2ea3f05b6d..1602cd99c383 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp @@ -39,6 +39,9 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { return LLT::scalar(SizeInBits); } + if (Ty.isTokenTy()) + return LLT::token(); + return LLT(); } @@ -48,7 +51,7 @@ MVT llvm::getMVTForLLT(LLT Ty) { return MVT::getVectorVT( MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), - Ty.getNumElements()); + Ty.getElementCount()); } EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp index af0b0a20c856..ec36b669ac01 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -139,8 +139,7 @@ bool addEmuTlsVar(Module &M, const GlobalVariable *GV) { IntegerType *WordType = DL.getIntPtrType(C); PointerType *InitPtrType = PointerType::getUnqual(C); Type *ElementTypes[4] = {WordType, WordType, VoidPtrType, InitPtrType}; - ArrayRef<Type*> ElementTypeArray(ElementTypes, 4); - StructType *EmuTlsVarType = StructType::create(ElementTypeArray); + StructType *EmuTlsVarType = StructType::create(ElementTypes); EmuTlsVar = cast<GlobalVariable>( M.getOrInsertGlobal(EmuTlsVarName, EmuTlsVarType)); copyLinkageVisibility(M, GV, EmuTlsVar); @@ -170,9 +169,7 @@ bool addEmuTlsVar(Module &M, const GlobalVariable *GV) { ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)), ConstantInt::get(WordType, GVAlignment.value()), NullPtr, EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr}; - ArrayRef<Constant*> ElementValueArray(ElementValues, 4); - EmuTlsVar->setInitializer( - ConstantStruct::get(EmuTlsVarType, ElementValueArray)); + EmuTlsVar->setInitializer(ConstantStruct::get(EmuTlsVarType, ElementValues)); Align MaxAlignment = std::max(DL.getABITypeAlign(WordType), DL.getABITypeAlign(VoidPtrType)); EmuTlsVar->setAlignment(MaxAlignment); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 870611248466..0809f88fde56 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -212,7 +212,10 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("reassoc", MIToken::kw_reassoc) .Case("nuw", MIToken::kw_nuw) .Case("nsw", MIToken::kw_nsw) + .Case("nusw", MIToken::kw_nusw) .Case("exact", MIToken::kw_exact) + .Case("nneg", MIToken::kw_nneg) + .Case("disjoint", MIToken::kw_disjoint) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) @@ -239,6 +242,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("intrinsic", MIToken::kw_intrinsic) .Case("target-index", MIToken::kw_target_index) .Case("half", MIToken::kw_half) + .Case("bfloat", MIToken::kw_bfloat) .Case("float", MIToken::kw_float) .Case("double", MIToken::kw_double) .Case("x86_fp80", MIToken::kw_x86_fp80) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h index 0f344da52182..22547483a8a8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -69,11 +69,14 @@ struct MIToken { kw_contract, kw_afn, kw_reassoc, + kw_nusw, kw_nuw, kw_nsw, kw_exact, kw_nofpexcept, kw_unpredictable, + kw_nneg, + kw_disjoint, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, @@ -97,6 +100,7 @@ struct MIToken { kw_intrinsic, kw_target_index, kw_half, + kw_bfloat, kw_float, kw_double, kw_x86_fp80, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp index ede4291fe26d..1d16729aa338 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -24,7 +24,6 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -41,6 +40,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -1471,7 +1471,9 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_exact) || Token.is(MIToken::kw_nofpexcept) || Token.is(MIToken::kw_noconvergent) || - Token.is(MIToken::kw_unpredictable)) { + Token.is(MIToken::kw_unpredictable) || + Token.is(MIToken::kw_nneg) || + Token.is(MIToken::kw_disjoint)) { // clang-format on // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) @@ -1504,6 +1506,10 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::Unpredictable; if (Token.is(MIToken::kw_noconvergent)) Flags |= MachineInstr::NoConvergent; + if (Token.is(MIToken::kw_nneg)) + Flags |= MachineInstr::NonNeg; + if (Token.is(MIToken::kw_disjoint)) + Flags |= MachineInstr::Disjoint; lex(); } @@ -1919,10 +1925,13 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { if (Token.range().front() == 's') { auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); - if (!verifyScalarSize(ScalarSize)) - return error("invalid size for scalar type"); - - Ty = LLT::scalar(ScalarSize); + if (ScalarSize) { + if (!verifyScalarSize(ScalarSize)) + return error("invalid size for scalar type"); + Ty = LLT::scalar(ScalarSize); + } else { + Ty = LLT::token(); + } lex(); return false; } else if (Token.range().front() == 'p') { @@ -1980,7 +1989,7 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { if (Token.range().front() == 's') { auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); if (!verifyScalarSize(ScalarSize)) - return error("invalid size for scalar type"); + return error("invalid size for scalar element in vector"); Ty = LLT::scalar(ScalarSize); } else if (Token.range().front() == 'p') { const DataLayout &DL = MF.getDataLayout(); @@ -2181,10 +2190,10 @@ static bool parseGlobalValue(const MIToken &Token, unsigned GVIdx; if (getUnsigned(Token, GVIdx, ErrCB)) return true; - if (GVIdx >= PFS.IRSlots.GlobalValues.size()) + GV = PFS.IRSlots.GlobalValues.get(GVIdx); + if (!GV) return ErrCB(Token.location(), Twine("use of undefined global value '@") + Twine(GVIdx) + "'"); - GV = PFS.IRSlots.GlobalValues[GVIdx]; break; } default: @@ -2293,48 +2302,14 @@ bool MIParser::parseMDNode(MDNode *&Node) { } bool MIParser::parseDIExpression(MDNode *&Expr) { - assert(Token.is(MIToken::md_diexpr)); + unsigned Read; + Expr = llvm::parseDIExpressionBodyAtBeginning( + CurrentSource, Read, Error, *PFS.MF.getFunction().getParent(), + &PFS.IRSlots); + CurrentSource = CurrentSource.slice(Read, StringRef::npos); lex(); - - // FIXME: Share this parsing with the IL parser. - SmallVector<uint64_t, 8> Elements; - - if (expectAndConsume(MIToken::lparen)) - return true; - - if (Token.isNot(MIToken::rparen)) { - do { - if (Token.is(MIToken::Identifier)) { - if (unsigned Op = dwarf::getOperationEncoding(Token.stringValue())) { - lex(); - Elements.push_back(Op); - continue; - } - if (unsigned Enc = dwarf::getAttributeEncoding(Token.stringValue())) { - lex(); - Elements.push_back(Enc); - continue; - } - return error(Twine("invalid DWARF op '") + Token.stringValue() + "'"); - } - - if (Token.isNot(MIToken::IntegerLiteral) || - Token.integerValue().isSigned()) - return error("expected unsigned integer"); - - auto &U = Token.integerValue(); - if (U.ugt(UINT64_MAX)) - return error("element too large, limit is " + Twine(UINT64_MAX)); - Elements.push_back(U.getZExtValue()); - lex(); - - } while (consumeIfPresent(MIToken::comma)); - } - - if (expectAndConsume(MIToken::rparen)) - return true; - - Expr = DIExpression::get(MF.getFunction().getContext(), Elements); + if (!Expr) + return error(Error.getMessage()); return false; } @@ -2907,6 +2882,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, case MIToken::IntegerLiteral: return parseImmediateOperand(Dest); case MIToken::kw_half: + case MIToken::kw_bfloat: case MIToken::kw_float: case MIToken::kw_double: case MIToken::kw_x86_fp80: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 78d7e62797ce..a5d6a40392d0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" @@ -97,13 +98,15 @@ public: /// Create an empty function with the given name. Function *createDummyFunction(StringRef Name, Module &M); - bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI); + bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI, + ModuleAnalysisManager *FAM = nullptr); /// Parse the machine function in the current YAML document. /// /// /// Return true if an error occurred. - bool parseMachineFunction(Module &M, MachineModuleInfo &MMI); + bool parseMachineFunction(Module &M, MachineModuleInfo &MMI, + ModuleAnalysisManager *FAM); /// Initialize the machine function to the state that's described in the MIR /// file. @@ -275,13 +278,14 @@ MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) { return M; } -bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { +bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI, + ModuleAnalysisManager *MAM) { if (NoMIRDocuments) return false; // Parse the machine functions. do { - if (parseMachineFunction(M, MMI)) + if (parseMachineFunction(M, MMI, MAM)) return true; In.nextDocument(); } while (In.setCurrentDocument()); @@ -303,7 +307,8 @@ Function *MIRParserImpl::createDummyFunction(StringRef Name, Module &M) { return F; } -bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { +bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI, + ModuleAnalysisManager *MAM) { // Parse the yaml. yaml::MachineFunction YamlMF; yaml::EmptyContext Ctx; @@ -327,14 +332,28 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { "' isn't defined in the provided LLVM IR"); } } - if (MMI.getMachineFunction(*F) != nullptr) - return error(Twine("redefinition of machine function '") + FunctionName + - "'"); - // Create the MachineFunction. - MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); - if (initializeMachineFunction(YamlMF, MF)) - return true; + if (!MAM) { + if (MMI.getMachineFunction(*F) != nullptr) + return error(Twine("redefinition of machine function '") + FunctionName + + "'"); + + // Create the MachineFunction. + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + if (initializeMachineFunction(YamlMF, MF)) + return true; + } else { + auto &FAM = + MAM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + if (FAM.getCachedResult<MachineFunctionAnalysis>(*F)) + return error(Twine("redefinition of machine function '") + FunctionName + + "'"); + + // Create the MachineFunction. + MachineFunction &MF = FAM.getResult<MachineFunctionAnalysis>(*F).getMF(); + if (initializeMachineFunction(YamlMF, MF)) + return true; + } return false; } @@ -425,11 +444,11 @@ bool MIRParserImpl::initializeCallSiteInfo( Register Reg; if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error)) return error(Error, ArgRegPair.Reg.SourceRange); - CSInfo.emplace_back(Reg, ArgRegPair.ArgNo); + CSInfo.ArgRegPairs.emplace_back(Reg, ArgRegPair.ArgNo); } if (TM.Options.EmitCallSiteInfo) - MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo)); + MF.addCallSiteInfo(&*CallI, std::move(CSInfo)); } if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo) @@ -574,7 +593,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, // FIXME: This is a temporary workaround until the reserved registers can be // serialized. MachineRegisterInfo &MRI = MF.getRegInfo(); - MRI.freezeReservedRegs(MF); + MRI.freezeReservedRegs(); computeFunctionProperties(MF); @@ -607,7 +626,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS, Twine(VReg.ID.Value) + "'"); Info.Explicit = true; - if (StringRef(VReg.Class.Value).equals("_")) { + if (VReg.Class.Value == "_") { Info.Kind = VRegInfo::GENERIC; Info.D.RegBank = nullptr; } else { @@ -760,6 +779,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); MFI.setHasTailCall(YamlMFI.HasTailCall); + MFI.setCalleeSavedInfoValid(YamlMFI.IsCalleeSavedInfoValid); MFI.setLocalFrameSize(YamlMFI.LocalFrameSize); if (!YamlMFI.SavePoint.Value.empty()) { MachineBasicBlock *MBB = nullptr; @@ -1101,6 +1121,11 @@ bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { return Impl->parseMachineFunctions(M, MMI); } +bool MIRParser::parseMachineFunctions(Module &M, ModuleAnalysisManager &MAM) { + auto &MMI = MAM.getResult<MachineModuleAnalysis>(M).getMMI(); + return Impl->parseMachineFunctions(M, MMI, &MAM); +} + std::unique_ptr<MIRParser> llvm::createMIRParserFromFile( StringRef Filename, SMDiagnostic &Error, LLVMContext &Context, std::function<void(Function &)> ProcessIRFunction) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp index fee237104022..48c3e0d7a97e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -34,6 +33,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -69,6 +69,8 @@ static cl::opt<bool> SimplifyMIR( static cl::opt<bool> PrintLocations("mir-debug-loc", cl::Hidden, cl::init(true), cl::desc("Print MIR debug-locations")); +extern cl::opt<bool> WriteNewDbgInfoFormat; + namespace { /// This structure describes how to print out stack object references. @@ -366,6 +368,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.HasVAStart = MFI.hasVAStart(); YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); YamlMFI.HasTailCall = MFI.hasTailCall(); + YamlMFI.IsCalleeSavedInfoValid = MFI.isCalleeSavedInfoValid(); YamlMFI.LocalFrameSize = MFI.getLocalFrameSize(); if (MFI.getSavePoint()) { raw_string_ostream StrOS(YamlMFI.SavePoint.Value); @@ -540,7 +543,7 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF, std::distance(CallI->getParent()->instr_begin(), CallI); YmlCS.CallLocation = CallLocation; // Construct call arguments and theirs forwarding register info. - for (auto ArgReg : CSInfo.second) { + for (auto ArgReg : CSInfo.second.ArgRegPairs) { yaml::CallSiteInfo::ArgRegPair YmlArgReg; YmlArgReg.ArgNo = ArgReg.ArgNo; printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI); @@ -567,7 +570,7 @@ void MIRPrinter::convertMachineMetadataNodes(yaml::MachineFunction &YMF, std::string NS; raw_string_ostream StrOS(NS); MD.second->print(StrOS, MST, MF.getFunction().getParent()); - YMF.MachineMetadataNodes.push_back(StrOS.str()); + YMF.MachineMetadataNodes.push_back(NS); } } @@ -585,7 +588,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, yaml::MachineConstantPoolValue YamlConstant; YamlConstant.ID = ID++; - YamlConstant.Value = StrOS.str(); + YamlConstant.Value = Str; YamlConstant.Alignment = Constant.getAlign(); YamlConstant.IsTargetSpecific = Constant.isMachineConstantPoolEntry(); @@ -605,7 +608,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, for (const auto *MBB : Table.MBBs) { raw_string_ostream StrOS(Str); StrOS << printMBBReference(*MBB); - Entry.Blocks.push_back(StrOS.str()); + Entry.Blocks.push_back(Str); Str.clear(); } YamlJTI.Entries.push_back(Entry); @@ -694,7 +697,9 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { // fallthrough. if ((!MBB.succ_empty() && !SimplifyMIR) || !canPredictProbs || !canPredictSuccessors(MBB)) { - OS.indent(2) << "successors: "; + OS.indent(2) << "successors:"; + if (!MBB.succ_empty()) + OS << " "; for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) { if (I != MBB.succ_begin()) OS << ", "; @@ -726,11 +731,10 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { HasLineAttributes = true; } - if (HasLineAttributes) + if (HasLineAttributes && !MBB.empty()) OS << "\n"; bool IsInBundle = false; - for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) { - const MachineInstr &MI = *I; + for (const MachineInstr &MI : MBB.instrs()) { if (IsInBundle && !MI.isInsideBundle()) { OS.indent(2) << "}\n"; IsInBundle = false; @@ -805,6 +809,12 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "unpredictable "; if (MI.getFlag(MachineInstr::NoConvergent)) OS << "noconvergent "; + if (MI.getFlag(MachineInstr::NonNeg)) + OS << "nneg "; + if (MI.getFlag(MachineInstr::Disjoint)) + OS << "disjoint "; + if (MI.getFlag(MachineInstr::NoUSWrap)) + OS << "nusw "; OS << TII->getName(MI.getOpcode()); if (I < E) @@ -849,6 +859,13 @@ void MIPrinter::print(const MachineInstr &MI) { PCSections->printAsOperand(OS, MST); NeedComma = true; } + if (MDNode *MMRA = MI.getMMRAMetadata()) { + if (NeedComma) + OS << ','; + OS << " mmra "; + MMRA->printAsOperand(OS, MST); + NeedComma = true; + } if (uint32_t CFIType = MI.getCFIType()) { if (NeedComma) OS << ','; @@ -981,29 +998,19 @@ void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V, } void llvm::printMIR(raw_ostream &OS, const Module &M) { - // RemoveDIs: as there's no textual form for DPValues yet, print debug-info - // in dbg.value format. - bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat; - if (IsNewDbgInfoFormat) - const_cast<Module &>(M).convertFromNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter(const_cast<Module &>(M), + WriteNewDbgInfoFormat); yaml::Output Out(OS); Out << const_cast<Module &>(M); - - if (IsNewDbgInfoFormat) - const_cast<Module &>(M).convertToNewDbgValues(); } void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) { - // RemoveDIs: as there's no textual form for DPValues yet, print debug-info + // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info // in dbg.value format. - bool IsNewDbgInfoFormat = MF.getFunction().IsNewDbgInfoFormat; - if (IsNewDbgInfoFormat) - const_cast<Function &>(MF.getFunction()).convertFromNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter( + const_cast<Function &>(MF.getFunction()), WriteNewDbgInfoFormat); MIRPrinter Printer(OS); Printer.print(MF); - - if (IsNewDbgInfoFormat) - const_cast<Function &>(MF.getFunction()).convertToNewDbgValues(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp index 1b5a9ade0871..f70c0731ffaf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp @@ -15,11 +15,20 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; +PreservedAnalyses PrintMIRPreparePass::run(Module &M, ModuleAnalysisManager &) { + printMIR(OS, M); + return PreservedAnalyses::all(); +} + +PreservedAnalyses PrintMIRPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + printMIR(OS, MF); + return PreservedAnalyses::all(); +} + namespace { /// This pass prints out the LLVM IR to an output stream using the MIR @@ -43,7 +52,7 @@ struct MIRPrintingPass : public MachineFunctionPass { std::string Str; raw_string_ostream StrOS(Str); printMIR(StrOS, MF); - MachineFunctions.append(StrOS.str()); + MachineFunctions.append(Str); return false; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp index 42d0aba4b166..ce82f280c1c5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -69,10 +69,10 @@ char MIRProfileLoaderPass::ID = 0; INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile", /* cfg = */ false, /* is_analysis = */ false) -INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile", /* cfg = */ false, /* is_analysis = */ false) @@ -363,26 +363,28 @@ bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: " << MF.getFunction().getName() << "\n"); - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); MIRSampleLoader->setInitVals( - &getAnalysis<MachineDominatorTree>(), - &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(), - MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE()); + &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(), + &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(), + &getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI, + &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE()); MF.RenumberBlocks(); if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || - MF.getFunction().getName().equals(ViewBlockFreqFuncName))) { + MF.getFunction().getName() == ViewBlockFreqFuncName)) { MBFI->view("MIR_Prof_loader_b." + MF.getName(), false); } bool Changed = MIRSampleLoader->runOnFunction(MF); if (Changed) - MBFI->calculate(MF, *MBFI->getMBPI(), *&getAnalysis<MachineLoopInfo>()); + MBFI->calculate(MF, *MBFI->getMBPI(), + *&getAnalysis<MachineLoopInfoWrapperPass>().getLI()); if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || - MF.getFunction().getName().equals(ViewBlockFreqFuncName))) { + MF.getFunction().getName() == ViewBlockFreqFuncName)) { MBFI->view("MIR_prof_loader_a." + MF.getName(), false); } @@ -399,10 +401,10 @@ bool MIRProfileLoaderPass::doInitialization(Module &M) { void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachinePostDominatorTree>(); - AU.addRequiredTransitive<MachineLoopInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachinePostDominatorTreeWrapperPass>(); + AU.addRequiredTransitive<MachineLoopInfoWrapperPass>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index 812d57984e6c..ccfc4565d3a9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -123,7 +123,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { llvm::transform(MI.uses(), std::back_inserter(MIOperands), GetHashableMO); for (const auto *Op : MI.memoperands()) { - MIOperands.push_back((unsigned)Op->getSize()); + MIOperands.push_back((unsigned)Op->getSize().getValue()); MIOperands.push_back((unsigned)Op->getFlags()); MIOperands.push_back((unsigned)Op->getOffset()); MIOperands.push_back((unsigned)Op->getSuccessOrdering()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index 114e7910dc27..4f0fab8e58bf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" @@ -109,7 +110,7 @@ public: AU.setPreservesAll(); AU.addRequired<RegAllocEvictionAdvisorAnalysis>(); AU.addRequired<RegAllocPriorityAdvisorAnalysis>(); - AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -212,7 +213,7 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences}; M(float, mbb_frequencies, MBBFrequencyShape, \ "A vector of machine basic block frequencies") \ M(int64_t, mbb_mapping, InstructionsShape, \ - "A vector of indicies mapping instructions to MBBs") + "A vector of indices mapping instructions to MBBs") #else #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) @@ -387,8 +388,8 @@ private: std::vector<TensorSpec> InputFeatures; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU); } @@ -405,8 +406,9 @@ private: InteractiveChannelBaseName + ".in"); } return std::make_unique<MLEvictAdvisor>( - MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(), - getAnalysis<MachineLoopInfo>()); + MF, RA, Runner.get(), + getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(), + getAnalysis<MachineLoopInfoWrapperPass>().getLI()); } std::unique_ptr<MLModelRunner> Runner; }; @@ -494,8 +496,8 @@ private: std::vector<TensorSpec> TrainingInputFeatures; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU); } @@ -543,8 +545,9 @@ private: if (Log) Log->switchContext(MF.getName()); return std::make_unique<DevelopmentModeEvictAdvisor>( - MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(), - getAnalysis<MachineLoopInfo>(), Log.get()); + MF, RA, Runner.get(), + getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(), + getAnalysis<MachineLoopInfoWrapperPass>().getLI(), Log.get()); } std::unique_ptr<MLModelRunner> Runner; @@ -1138,7 +1141,8 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) { auto GetReward = [&]() { if (!CachedReward) CachedReward = static_cast<float>( - calculateRegAllocScore(MF, getAnalysis<MachineBlockFrequencyInfo>()) + calculateRegAllocScore( + MF, getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()) .getScore()); return *CachedReward; }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h index e36a41154096..0213801cd61b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h @@ -17,6 +17,7 @@ #include "llvm/Analysis/MLModelRunner.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/SlotIndexes.h" +#include <map> using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp index 422781593a9c..9638df81770c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp @@ -37,6 +37,7 @@ #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" #include "llvm/Analysis/Utils/TrainingLogger.h" +#include "llvm/IR/Module.h" #endif using namespace llvm; @@ -133,7 +134,7 @@ public: private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addRequired<SlotIndexes>(); + AU.addRequired<SlotIndexesWrapperPass>(); RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); } @@ -150,7 +151,7 @@ private: InteractiveChannelBaseName + ".in"); } return std::make_unique<MLPriorityAdvisor>( - MF, RA, &getAnalysis<SlotIndexes>(), Runner.get()); + MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI(), Runner.get()); } std::unique_ptr<MLModelRunner> Runner; }; @@ -214,7 +215,7 @@ public: private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addRequired<SlotIndexes>(); + AU.addRequired<SlotIndexesWrapperPass>(); RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); } @@ -265,7 +266,8 @@ private: } return std::make_unique<DevelopmentModePriorityAdvisor>( - MF, RA, &getAnalysis<SlotIndexes>(), Runner.get(), Log.get()); + MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI(), Runner.get(), + Log.get()); } std::unique_ptr<MLModelRunner> Runner; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp index 4410fb7ecd23..d681d00b5d8c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -80,10 +80,11 @@ MCSymbol *MachineBasicBlock::getSymbol() const { } CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix); } else { - const StringRef Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); - CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + - Twine(MF->getFunctionNumber()) + - "_" + Twine(getNumber())); + // If the block occurs as label in inline assembly, parsing the assembly + // needs an actual label name => set AlwaysEmit in these cases. + CachedMCSymbol = Ctx.createBlockSymbol( + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()), + /*AlwaysEmit=*/hasLabelMustBeEmitted()); } } return CachedMCSymbol; @@ -104,10 +105,9 @@ MCSymbol *MachineBasicBlock::getEndSymbol() const { if (!CachedEndMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); - CachedEndMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB_END" + - Twine(MF->getFunctionNumber()) + - "_" + Twine(getNumber())); + CachedEndMCSymbol = Ctx.createBlockSymbol( + "BB_END" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()), + /*AlwaysEmit=*/false); } return CachedEndMCSymbol; } @@ -315,6 +315,12 @@ bool MachineBasicBlock::isLegalToHoistInto() const { return true; } +bool MachineBasicBlock::hasName() const { + if (const BasicBlock *LBB = getBasicBlock()) + return LBB->hasName(); + return false; +} + StringRef MachineBasicBlock::getName() const { if (const BasicBlock *LBB = getBasicBlock()) return LBB->getName(); @@ -1129,15 +1135,24 @@ public: } }; +#define GET_RESULT(RESULT, GETTER, INFIX) \ + [MF, P, MFAM]() { \ + if (P) { \ + auto *Wrapper = P->getAnalysisIfAvailable<RESULT##INFIX##WrapperPass>(); \ + return Wrapper ? &Wrapper->GETTER() : nullptr; \ + } \ + return MFAM->getCachedResult<RESULT##Analysis>(*MF); \ + }() + MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( - MachineBasicBlock *Succ, Pass &P, + MachineBasicBlock *Succ, Pass *P, MachineFunctionAnalysisManager *MFAM, std::vector<SparseBitVector<>> *LiveInSets) { + assert((P || MFAM) && "Need a way to get analysis results!"); if (!canSplitCriticalEdge(Succ)) return nullptr; MachineFunction *MF = getParent(); MachineBasicBlock *PrevFallthrough = getNextNode(); - DebugLoc DL; // FIXME: this is nowhere MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); NMBB->setCallFrameSize(Succ->getCallFrameSize()); @@ -1156,8 +1171,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( << " -- " << printMBBReference(*NMBB) << " -- " << printMBBReference(*Succ) << '\n'); - LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>(); - SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>(); + LiveIntervals *LIS = GET_RESULT(LiveIntervals, getLIS, ); + SlotIndexes *Indexes = GET_RESULT(SlotIndexes, getSI, ); if (LIS) LIS->insertMBBInMaps(NMBB); else if (Indexes) @@ -1166,7 +1181,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the // terminators. - LiveVariables *LV = P.getAnalysisIfAvailable<LiveVariables>(); + LiveVariables *LV = GET_RESULT(LiveVariables, getLV, ); // Collect a list of virtual registers killed by the terminators. SmallVector<Register, 4> KilledRegs; @@ -1218,6 +1233,15 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes); SmallVector<MachineOperand, 4> Cond; const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); + + // In original 'this' BB, there must be a branch instruction targeting at + // Succ. We can not find it out since currently getBranchDestBlock was not + // implemented for all targets. However, if the merged DL has column or line + // number, the scope and non-zero column and line number is same with that + // branch instruction so we can safely use it. + DebugLoc DL, MergedDL = findBranchDebugLoc(); + if (MergedDL && (MergedDL.getLine() || MergedDL.getCol())) + DL = MergedDL; TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL); } @@ -1322,24 +1346,23 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); } - if (MachineDominatorTree *MDT = - P.getAnalysisIfAvailable<MachineDominatorTree>()) + if (auto *MDT = GET_RESULT(MachineDominatorTree, getDomTree, )) MDT->recordSplitCriticalEdge(this, Succ, NMBB); - if (MachineLoopInfo *MLI = P.getAnalysisIfAvailable<MachineLoopInfo>()) + if (MachineLoopInfo *MLI = GET_RESULT(MachineLoop, getLI, Info)) if (MachineLoop *TIL = MLI->getLoopFor(this)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { if (TIL == DestLoop) { // Both in the same loop, the NMBB joins loop. - DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + DestLoop->addBasicBlockToLoop(NMBB, *MLI); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. - TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); + TIL->addBasicBlockToLoop(NMBB, *MLI); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. - DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + DestLoop->addBasicBlockToLoop(NMBB, *MLI); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the @@ -1348,7 +1371,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( assert(DestLoop->getHeader() == Succ && "Should not create irreducible loops!"); if (MachineLoop *P = DestLoop->getParentLoop()) - P->addBasicBlockToLoop(NMBB, MLI->getBase()); + P->addBasicBlockToLoop(NMBB, *MLI); } } } @@ -1466,10 +1489,9 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, // Scan the operands of this machine instruction, replacing any uses of Old // with New. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (I->getOperand(i).isMBB() && - I->getOperand(i).getMBB() == Old) - I->getOperand(i).setMBB(New); + for (MachineOperand &MO : I->operands()) + if (MO.isMBB() && MO.getMBB() == Old) + MO.setMBB(New); } // Update the successor information. @@ -1720,6 +1742,12 @@ void MachineBasicBlock::clearLiveIns() { LiveIns.clear(); } +void MachineBasicBlock::clearLiveIns( + std::vector<RegisterMaskPair> &OldLiveIns) { + assert(OldLiveIns.empty() && "Vector must be empty"); + std::swap(LiveIns, OldLiveIns); +} + MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const { assert(getParent()->getProperties().hasProperty( MachineFunctionProperties::Property::TracksLiveness) && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 7ee72e214426..9daacfd39978 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -161,32 +161,69 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *> } // end namespace llvm -INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, DEBUG_TYPE, +AnalysisKey MachineBlockFrequencyAnalysis::Key; + +MachineBlockFrequencyAnalysis::Result +MachineBlockFrequencyAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto &MBPI = MFAM.getResult<MachineBranchProbabilityAnalysis>(MF); + auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF); + return Result(MF, MBPI, MLI); +} + +PreservedAnalyses +MachineBlockFrequencyPrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto &MBFI = MFAM.getResult<MachineBlockFrequencyAnalysis>(MF); + OS << "Machine block frequency for machine function: " << MF.getName() + << '\n'; + MBFI.print(OS); + return PreservedAnalyses::all(); +} + +INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfoWrapperPass, DEBUG_TYPE, "Machine Block Frequency Analysis", true, true) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockFrequencyInfo, DEBUG_TYPE, +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) +INITIALIZE_PASS_END(MachineBlockFrequencyInfoWrapperPass, DEBUG_TYPE, "Machine Block Frequency Analysis", true, true) -char MachineBlockFrequencyInfo::ID = 0; +char MachineBlockFrequencyInfoWrapperPass::ID = 0; -MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() +MachineBlockFrequencyInfoWrapperPass::MachineBlockFrequencyInfoWrapperPass() : MachineFunctionPass(ID) { - initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); + initializeMachineBlockFrequencyInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); } +MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() = default; + MachineBlockFrequencyInfo::MachineBlockFrequencyInfo( - MachineFunction &F, - MachineBranchProbabilityInfo &MBPI, - MachineLoopInfo &MLI) : MachineFunctionPass(ID) { + MachineBlockFrequencyInfo &&) = default; + +MachineBlockFrequencyInfo::MachineBlockFrequencyInfo( + MachineFunction &F, MachineBranchProbabilityInfo &MBPI, + MachineLoopInfo &MLI) { calculate(F, MBPI, MLI); } MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default; -void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addRequired<MachineLoopInfo>(); +bool MachineBlockFrequencyInfo::invalidate( + MachineFunction &MF, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on machine functions, or the + // machine function's CFG have been preserved. + auto PAC = PA.getChecker<MachineBlockFrequencyAnalysis>(); + return !PAC.preserved() && + !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() && + !PAC.preservedSet<CFGAnalyses>(); +} + +void MachineBlockFrequencyInfoWrapperPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -198,24 +235,26 @@ void MachineBlockFrequencyInfo::calculate( MBFI.reset(new ImplType); MBFI->calculate(F, MBPI, MLI); if (ViewMachineBlockFreqPropagationDAG != GVDT_None && - (ViewBlockFreqFuncName.empty() || - F.getName().equals(ViewBlockFreqFuncName))) { + (ViewBlockFreqFuncName.empty() || F.getName() == ViewBlockFreqFuncName)) { view("MachineBlockFrequencyDAGS." + F.getName()); } if (PrintMachineBlockFreq && - (PrintBFIFuncName.empty() || F.getName().equals(PrintBFIFuncName))) { + (PrintBFIFuncName.empty() || F.getName() == PrintBFIFuncName)) { MBFI->print(dbgs()); } } -bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { +bool MachineBlockFrequencyInfoWrapperPass::runOnMachineFunction( + MachineFunction &F) { MachineBranchProbabilityInfo &MBPI = - getAnalysis<MachineBranchProbabilityInfo>(); - MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); - calculate(F, MBPI, MLI); + getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); + MBFI.calculate(F, MBPI, MLI); return false; } +void MachineBlockFrequencyInfo::print(raw_ostream &OS) { MBFI->print(OS); } + void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); } /// Pop up a ghostview window with the current block frequency propagation @@ -280,7 +319,7 @@ BlockFrequency MachineBlockFrequencyInfo::getEntryFreq() const { Printable llvm::printBlockFreq(const MachineBlockFrequencyInfo &MBFI, BlockFrequency Freq) { return Printable([&MBFI, Freq](raw_ostream &OS) { - printBlockFreqImpl(OS, MBFI.getEntryFreq(), Freq); + printRelativeBlockFreq(OS, MBFI.getEntryFreq(), Freq); }); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp index a7a839688ddf..4c864ca15ccc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -480,14 +480,16 @@ class MachineBlockPlacement : public MachineFunctionPass { BlockFilterSet *BlockFilter); bool repeatedlyTailDuplicateBlock( MachineBasicBlock *BB, MachineBasicBlock *&LPred, - const MachineBasicBlock *LoopHeaderBB, - BlockChain &Chain, BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt); - bool maybeTailDuplicateBlock( - MachineBasicBlock *BB, MachineBasicBlock *LPred, - BlockChain &Chain, BlockFilterSet *BlockFilter, + const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain, + BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt, - bool &DuplicatedToLPred); + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt); + bool + maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred, + BlockChain &Chain, BlockFilterSet *BlockFilter, + MachineFunction::iterator &PrevUnplacedBlockIt, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, + bool &DuplicatedToLPred); bool hasBetterLayoutPredecessor( const MachineBasicBlock *BB, const MachineBasicBlock *Succ, const BlockChain &SuccChain, BranchProbability SuccProb, @@ -498,10 +500,13 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet *BlockFilter); MachineBasicBlock *selectBestCandidateBlock( const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList); - MachineBasicBlock *getFirstUnplacedBlock( - const BlockChain &PlacedChain, - MachineFunction::iterator &PrevUnplacedBlockIt, - const BlockFilterSet *BlockFilter); + MachineBasicBlock * + getFirstUnplacedBlock(const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt); + MachineBasicBlock * + getFirstUnplacedBlock(const BlockChain &PlacedChain, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, + const BlockFilterSet *BlockFilter); /// Add a basic block to the work list if it is appropriate. /// @@ -603,11 +608,11 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); if (TailDupPlacement) - AU.addRequired<MachinePostDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachinePostDominatorTreeWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -622,10 +627,10 @@ char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) @@ -1761,7 +1766,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( return BestBlock; } -/// Retrieve the first unplaced basic block. +/// Retrieve the first unplaced basic block in the entire function. /// /// This routine is called when we are unable to use the CFG to walk through /// all of the basic blocks and form a chain due to unnatural loops in the CFG. @@ -1770,12 +1775,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( /// re-scanning the entire sequence on repeated calls to this routine. MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( const BlockChain &PlacedChain, - MachineFunction::iterator &PrevUnplacedBlockIt, - const BlockFilterSet *BlockFilter) { + MachineFunction::iterator &PrevUnplacedBlockIt) { + for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E; ++I) { - if (BlockFilter && !BlockFilter->count(&*I)) - continue; if (BlockToChain[&*I] != &PlacedChain) { PrevUnplacedBlockIt = I; // Now select the head of the chain to which the unplaced block belongs @@ -1787,6 +1790,31 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( return nullptr; } +/// Retrieve the first unplaced basic block among the blocks in BlockFilter. +/// +/// This is similar to getFirstUnplacedBlock for the entire function, but since +/// the size of BlockFilter is typically far less than the number of blocks in +/// the entire function, iterating through the BlockFilter is more efficient. +/// When processing the entire funciton, using the version without BlockFilter +/// has a complexity of #(loops in function) * #(blocks in function), while this +/// version has a complexity of sum(#(loops in block) foreach block in function) +/// which is always smaller. For long function mostly sequential in structure, +/// the complexity is amortized to 1 * #(blocks in function). +MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( + const BlockChain &PlacedChain, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, + const BlockFilterSet *BlockFilter) { + assert(BlockFilter); + for (; PrevUnplacedBlockInFilterIt != BlockFilter->end(); + ++PrevUnplacedBlockInFilterIt) { + BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt]; + if (C != &PlacedChain) { + return *C->begin(); + } + } + return nullptr; +} + void MachineBlockPlacement::fillWorkLists( const MachineBasicBlock *MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds, @@ -1826,6 +1854,9 @@ void MachineBlockPlacement::buildChain( assert(HeadBB && "BB must not be null.\n"); assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n"); MachineFunction::iterator PrevUnplacedBlockIt = F->begin(); + BlockFilterSet::iterator PrevUnplacedBlockInFilterIt; + if (BlockFilter) + PrevUnplacedBlockInFilterIt = BlockFilter->begin(); const MachineBasicBlock *LoopHeaderBB = HeadBB; markChainSuccessors(Chain, LoopHeaderBB, BlockFilter); @@ -1855,7 +1886,11 @@ void MachineBlockPlacement::buildChain( BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList); if (!BestSucc) { - BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter); + if (BlockFilter) + BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt, + BlockFilter); + else + BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt); if (!BestSucc) break; @@ -1867,7 +1902,8 @@ void MachineBlockPlacement::buildChain( // Check for that now. if (allowTailDupPlacement() && BestSucc && ShouldTailDup) { repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain, - BlockFilter, PrevUnplacedBlockIt); + BlockFilter, PrevUnplacedBlockIt, + PrevUnplacedBlockInFilterIt); // If the chosen successor was duplicated into BB, don't bother laying // it out, just go round the loop again with BB as the chain end. if (!BB->isSuccessor(BestSucc)) @@ -2923,8 +2959,8 @@ void MachineBlockPlacement::alignBlocks() { unsigned MDAlign = 1; MDNode *LoopID = L->getLoopID(); if (LoopID) { - for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) { - MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I)); + for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) { + MDNode *MD = dyn_cast<MDNode>(MDO); if (MD == nullptr) continue; MDString *S = dyn_cast<MDString>(MD->getOperand(0)); @@ -3017,14 +3053,14 @@ void MachineBlockPlacement::alignBlocks() { /// @return true if \p BB was removed. bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( MachineBasicBlock *BB, MachineBasicBlock *&LPred, - const MachineBasicBlock *LoopHeaderBB, - BlockChain &Chain, BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt) { + const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain, + BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) { bool Removed, DuplicatedToLPred; bool DuplicatedToOriginalLPred; - Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter, - PrevUnplacedBlockIt, - DuplicatedToLPred); + Removed = maybeTailDuplicateBlock( + BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt, + PrevUnplacedBlockInFilterIt, DuplicatedToLPred); if (!Removed) return false; DuplicatedToOriginalLPred = DuplicatedToLPred; @@ -3045,9 +3081,9 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( if (ChainEnd == Chain.begin()) break; DupPred = *std::prev(ChainEnd); - Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter, - PrevUnplacedBlockIt, - DuplicatedToLPred); + Removed = maybeTailDuplicateBlock( + DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt, + PrevUnplacedBlockInFilterIt, DuplicatedToLPred); } // If BB was duplicated into LPred, it is now scheduled. But because it was // removed, markChainSuccessors won't be called for its chain. Instead we @@ -3074,9 +3110,9 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( /// \p DuplicatedToLPred - True if the block was duplicated into LPred. /// \return - True if the block was duplicated into all preds and removed. bool MachineBlockPlacement::maybeTailDuplicateBlock( - MachineBasicBlock *BB, MachineBasicBlock *LPred, - BlockChain &Chain, BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt, + MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain, + BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, bool &DuplicatedToLPred) { DuplicatedToLPred = false; if (!shouldTailDuplicate(BB)) @@ -3118,7 +3154,25 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( // Handle the filter set if (BlockFilter) { - BlockFilter->remove(RemBB); + auto It = llvm::find(*BlockFilter, RemBB); + // Erase RemBB from BlockFilter, and keep PrevUnplacedBlockInFilterIt + // pointing to the same element as before. + if (It != BlockFilter->end()) { + if (It < PrevUnplacedBlockInFilterIt) { + const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt; + // BlockFilter is a SmallVector so all elements after RemBB are + // shifted to the front by 1 after its deletion. + auto Distance = PrevUnplacedBlockInFilterIt - It - 1; + PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance; + assert(*PrevUnplacedBlockInFilterIt == PrevBB); + (void)PrevBB; + } else if (It == PrevUnplacedBlockInFilterIt) + // The block pointed by PrevUnplacedBlockInFilterIt is erased, we + // have to set it to the next element. + PrevUnplacedBlockInFilterIt = BlockFilter->erase(It); + else + BlockFilter->erase(It); + } } // Remove the block from loop info. @@ -3371,10 +3425,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { return false; F = &MF; - MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); MBFI = std::make_unique<MBFIWrapper>( - getAnalysis<MachineBlockFrequencyInfo>()); - MLI = &getAnalysis<MachineLoopInfo>(); + getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()); + MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); TII = MF.getSubtarget().getInstrInfo(); TLI = MF.getSubtarget().getTargetLowering(); MPDT = nullptr; @@ -3417,7 +3471,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel()); if (allowTailDupPlacement()) { - MPDT = &getAnalysis<MachinePostDominatorTree>(); + MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(); bool OptForSize = MF.getFunction().hasOptSize() || llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI()); if (OptForSize) @@ -3449,7 +3503,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { ComputedEdges.clear(); // Must redo the post-dominator tree if blocks were changed. if (MPDT) - MPDT->runOnMachineFunction(MF); + MPDT->recalculate(MF); ChainAllocator.DestroyAll(); buildCFGChains(); } @@ -3500,7 +3554,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } if (ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || - F->getFunction().getName().equals(ViewBlockFreqFuncName))) { + F->getFunction().getName() == ViewBlockFreqFuncName)) { if (RenumberBlocksBeforeView) MF.RenumberBlocks(); MBFI->view("MBP." + MF.getName(), false); @@ -3672,8 +3726,8 @@ public: bool runOnMachineFunction(MachineFunction &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -3687,8 +3741,8 @@ char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID; INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats", "Basic Block Placement Stats", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", "Basic Block Placement Stats", false, false) @@ -3700,8 +3754,8 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { if (!isFunctionInPrintList(F.getName())) return false; - MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); for (MachineBasicBlock &MBB : F) { BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index a84377d70855..56ffffff6224 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -18,9 +18,11 @@ using namespace llvm; -INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob", +INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfoWrapperPass, + "machine-branch-prob", "Machine Branch Probability Analysis", false, true) -INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", +INITIALIZE_PASS_END(MachineBranchProbabilityInfoWrapperPass, + "machine-branch-prob", "Machine Branch Probability Analysis", false, true) namespace llvm { @@ -37,15 +39,45 @@ cl::opt<unsigned> ProfileLikelyProb( cl::init(51), cl::Hidden); } // namespace llvm -char MachineBranchProbabilityInfo::ID = 0; +MachineBranchProbabilityAnalysis::Result +MachineBranchProbabilityAnalysis::run(MachineFunction &, + MachineFunctionAnalysisManager &) { + return MachineBranchProbabilityInfo(); +} + +PreservedAnalyses +MachineBranchProbabilityPrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + OS << "Printing analysis 'Machine Branch Probability Analysis' for machine " + "function '" + << MF.getName() << "':\n"; + auto &MBPI = MFAM.getResult<MachineBranchProbabilityAnalysis>(MF); + for (const MachineBasicBlock &MBB : MF) { + for (const MachineBasicBlock *Succ : MBB.successors()) + MBPI.printEdgeProbability(OS << " ", &MBB, Succ); + } + return PreservedAnalyses::all(); +} + +char MachineBranchProbabilityInfoWrapperPass::ID = 0; -MachineBranchProbabilityInfo::MachineBranchProbabilityInfo() +MachineBranchProbabilityInfoWrapperPass:: + MachineBranchProbabilityInfoWrapperPass() : ImmutablePass(ID) { PassRegistry &Registry = *PassRegistry::getPassRegistry(); - initializeMachineBranchProbabilityInfoPass(Registry); + initializeMachineBranchProbabilityInfoWrapperPassPass(Registry); } -void MachineBranchProbabilityInfo::anchor() {} +void MachineBranchProbabilityInfoWrapperPass::anchor() {} + +AnalysisKey MachineBranchProbabilityAnalysis::Key; + +bool MachineBranchProbabilityInfo::invalidate( + MachineFunction &, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &) { + auto PAC = PA.getChecker<MachineBranchProbabilityAnalysis>(); + return !PAC.preservedWhenStateless(); +} BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( const MachineBasicBlock *Src, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp index 26a8d00e6626..27bbf5599b60 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp @@ -92,10 +92,10 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AAResultsWrapperPass>(); AU.addPreservedID(MachineLoopInfoID); - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); } MachineFunctionProperties getRequiredProperties() const override { @@ -166,7 +166,7 @@ char &llvm::MachineCSEID = MachineCSE::ID; INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, "Machine Common Subexpression Elimination", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, "Machine Common Subexpression Elimination", false, false) @@ -184,7 +184,7 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, continue; bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (!DefMI->isCopy()) + if (!DefMI || !DefMI->isCopy()) continue; Register SrcReg = DefMI->getOperand(1).getReg(); if (!SrcReg.isVirtual()) @@ -709,7 +709,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II) for (auto ImplicitDef : ImplicitDefs) if (MachineOperand *MO = II->findRegisterUseOperand( - ImplicitDef, /*isKill=*/true, TRI)) + ImplicitDef, TRI, /*isKill=*/true)) MO->setIsKill(false); } else { // If the instructions aren't in the same BB, bail out and clear the @@ -943,8 +943,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - DT = &getAnalysis<MachineDominatorTree>(); - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); bool ChangedPRE, ChangedCSE; ChangedPRE = PerformSimplePRE(DT); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp index 874f726d2947..9b703d5401cb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp index c65937935ed8..1a19e053d30f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp @@ -99,17 +99,16 @@ private: const MachineBasicBlock &MBB); unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace); - bool - improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, - MachineTraceMetrics::Trace BlockTrace, - SmallVectorImpl<MachineInstr *> &InsInstrs, - SmallVectorImpl<MachineInstr *> &DelInstrs, - DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, - MachineCombinerPattern Pattern, bool SlackIsAccurate); + bool improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + unsigned Pattern, bool SlackIsAccurate); bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, - MachineCombinerPattern Pattern); + unsigned Pattern); bool preservesResourceLen(MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -123,7 +122,8 @@ private: MachineTraceMetrics::Trace BlockTrace); void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root, - SmallVector<MachineCombinerPattern, 16> &Patterns); + SmallVector<unsigned, 16> &Patterns); + CombinerObjective getCombinerObjective(unsigned Pattern); }; } @@ -132,16 +132,16 @@ char &llvm::MachineCombinerID = MachineCombiner::ID; INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); AU.addRequired<MachineTraceMetrics>(); AU.addPreserved<MachineTraceMetrics>(); AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); @@ -155,9 +155,6 @@ MachineCombiner::getOperandDef(const MachineOperand &MO) { // We need a virtual register definition. if (MO.isReg() && MO.getReg().isVirtual()) DefInstr = MRI->getUniqueVRegDef(MO.getReg()); - // PHI's have no depth etc. - if (DefInstr && DefInstr->isPHI()) - DefInstr = nullptr; return DefInstr; } @@ -232,8 +229,10 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, assert(DefInstr && "There must be a definition for a new virtual register"); DepthOp = InstrDepth[II->second]; - int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg()); - int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg()); + int DefIdx = + DefInstr->findRegisterDefOperandIdx(MO.getReg(), /*TRI=*/nullptr); + int UseIdx = + InstrPtr->findRegisterUseOperandIdx(MO.getReg(), /*TRI=*/nullptr); LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx, InstrPtr, UseIdx); } else { @@ -244,8 +243,12 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth; if (!isTransientMI(DefInstr)) LatencyOp = TSchedModel.computeOperandLatency( - DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), - InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + DefInstr, + DefInstr->findRegisterDefOperandIdx(MO.getReg(), + /*TRI=*/nullptr), + InstrPtr, + InstrPtr->findRegisterUseOperandIdx(MO.getReg(), + /*TRI=*/nullptr)); } } IDepth = std::max(IDepth, DepthOp + LatencyOp); @@ -283,8 +286,10 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, unsigned LatencyOp = 0; if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) { LatencyOp = TSchedModel.computeOperandLatency( - NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, - UseMO->findRegisterUseOperandIdx(MO.getReg())); + NewRoot, + NewRoot->findRegisterDefOperandIdx(MO.getReg(), /*TRI=*/nullptr), + UseMO, + UseMO->findRegisterUseOperandIdx(MO.getReg(), /*TRI=*/nullptr)); } else { LatencyOp = TSchedModel.computeInstrLatency(NewRoot); } @@ -293,36 +298,17 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, return NewRootLatency; } -/// The combiner's goal may differ based on which pattern it is attempting -/// to optimize. -enum class CombinerObjective { - MustReduceDepth, // The data dependency chain must be improved. - MustReduceRegisterPressure, // The register pressure must be reduced. - Default // The critical path must not be lengthened. -}; - -static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { +CombinerObjective MachineCombiner::getCombinerObjective(unsigned Pattern) { // TODO: If C++ ever gets a real enum class, make this part of the // MachineCombinerPattern class. - switch (P) { + switch (Pattern) { case MachineCombinerPattern::REASSOC_AX_BY: case MachineCombinerPattern::REASSOC_AX_YB: case MachineCombinerPattern::REASSOC_XA_BY: case MachineCombinerPattern::REASSOC_XA_YB: - case MachineCombinerPattern::REASSOC_XY_AMM_BMM: - case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: - case MachineCombinerPattern::SUBADD_OP1: - case MachineCombinerPattern::SUBADD_OP2: - case MachineCombinerPattern::FMADD_AX: - case MachineCombinerPattern::FMADD_XA: - case MachineCombinerPattern::FMSUB: - case MachineCombinerPattern::FNMSUB: return CombinerObjective::MustReduceDepth; - case MachineCombinerPattern::REASSOC_XY_BCA: - case MachineCombinerPattern::REASSOC_XY_BAC: - return CombinerObjective::MustReduceRegisterPressure; default: - return CombinerObjective::Default; + return TII->getCombinerObjective(Pattern); } } @@ -352,8 +338,7 @@ std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences( bool MachineCombiner::reduceRegisterPressure( MachineInstr &Root, MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &InsInstrs, - SmallVectorImpl<MachineInstr *> &DelInstrs, - MachineCombinerPattern Pattern) { + SmallVectorImpl<MachineInstr *> &DelInstrs, unsigned Pattern) { // FIXME: for now, we don't do any check for the register pressure patterns. // We treat them as always profitable. But we can do better if we make // RegPressureTracker class be aware of TIE attribute. Then we can get an @@ -371,8 +356,7 @@ bool MachineCombiner::improvesCriticalPathLen( MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, - DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, - MachineCombinerPattern Pattern, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned Pattern, bool SlackIsAccurate) { // Get depth and latency of NewRoot and Root. unsigned NewRootDepth = @@ -496,13 +480,14 @@ bool MachineCombiner::preservesResourceLen( /// \param Pattern is used to call target hook finalizeInsInstrs /// \param IncrementalUpdate if true, compute instruction depths incrementally, /// otherwise invalidate the trace -static void insertDeleteInstructions( - MachineBasicBlock *MBB, MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &InsInstrs, - SmallVectorImpl<MachineInstr *> &DelInstrs, - MachineTraceMetrics::Ensemble *TraceEnsemble, - SparseSet<LiveRegUnit> &RegUnits, const TargetInstrInfo *TII, - MachineCombinerPattern Pattern, bool IncrementalUpdate) { +static void +insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + MachineTraceMetrics::Ensemble *TraceEnsemble, + SparseSet<LiveRegUnit> &RegUnits, + const TargetInstrInfo *TII, unsigned Pattern, + bool IncrementalUpdate) { // If we want to fix up some placeholder for some target, do it now. // We need this because in genAlternativeCodeSequence, we have not decided the // better pattern InsInstrs or DelInstrs, so we don't want generate some @@ -537,9 +522,9 @@ static void insertDeleteInstructions( // Check that the difference between original and new latency is decreasing for // later patterns. This helps to discover sub-optimal pattern orderings. -void MachineCombiner::verifyPatternOrder( - MachineBasicBlock *MBB, MachineInstr &Root, - SmallVector<MachineCombinerPattern, 16> &Patterns) { +void MachineCombiner::verifyPatternOrder(MachineBasicBlock *MBB, + MachineInstr &Root, + SmallVector<unsigned, 16> &Patterns) { long PrevLatencyDiff = std::numeric_limits<long>::max(); (void)PrevLatencyDiff; // Variable is used in assert only. for (auto P : Patterns) { @@ -593,7 +578,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; - SmallVector<MachineCombinerPattern, 16> Patterns; + SmallVector<unsigned, 16> Patterns; // The motivating example is: // // MUL Other MUL_op1 MUL_op2 Other @@ -741,7 +726,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { SchedModel = STI->getSchedModel(); TSchedModel.init(STI); MRI = &MF.getRegInfo(); - MLI = &getAnalysis<MachineLoopInfo>(); + MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); Traces = &getAnalysis<MachineTraceMetrics>(); PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); MBFI = (PSI && PSI->hasProfileSummary()) ? diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp new file mode 100644 index 000000000000..3d3c55faa824 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp @@ -0,0 +1,99 @@ +//===- MachineConvergenceVerifier.cpp - Verify convergencectrl ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineConvergenceVerifier.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/IR/GenericConvergenceVerifierImpl.h" + +using namespace llvm; + +template <> +auto GenericConvergenceVerifier<MachineSSAContext>::getConvOp( + const MachineInstr &MI) -> ConvOpKind { + switch (MI.getOpcode()) { + default: + return CONV_NONE; + case TargetOpcode::CONVERGENCECTRL_ENTRY: + return CONV_ENTRY; + case TargetOpcode::CONVERGENCECTRL_ANCHOR: + return CONV_ANCHOR; + case TargetOpcode::CONVERGENCECTRL_LOOP: + return CONV_LOOP; + } +} + +template <> +void GenericConvergenceVerifier< + MachineSSAContext>::checkConvergenceTokenProduced(const MachineInstr &MI) { + Check(!MI.hasImplicitDef(), + "Convergence control tokens are defined explicitly.", + {Context.print(&MI)}); + const MachineOperand &Def = MI.getOperand(0); + const MachineRegisterInfo &MRI = Context.getFunction()->getRegInfo(); + Check(MRI.getUniqueVRegDef(Def.getReg()), + "Convergence control tokens must have unique definitions.", + {Context.print(&MI)}); +} + +template <> +const MachineInstr * +GenericConvergenceVerifier<MachineSSAContext>::findAndCheckConvergenceTokenUsed( + const MachineInstr &MI) { + const MachineRegisterInfo &MRI = Context.getFunction()->getRegInfo(); + const MachineInstr *TokenDef = nullptr; + + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse()) + continue; + Register OpReg = MO.getReg(); + if (!OpReg.isVirtual()) + continue; + + const MachineInstr *Def = MRI.getUniqueVRegDef(OpReg); + if (!Def) + continue; + if (getConvOp(*Def) == CONV_NONE) + continue; + + CheckOrNull( + MI.isConvergent(), + "Convergence control tokens can only be used by convergent operations.", + {Context.print(OpReg), Context.print(&MI)}); + + CheckOrNull(!TokenDef, + "An operation can use at most one convergence control token.", + {Context.print(OpReg), Context.print(&MI)}); + + TokenDef = Def; + } + + if (TokenDef) + Tokens[&MI] = TokenDef; + + return TokenDef; +} + +template <> +bool GenericConvergenceVerifier<MachineSSAContext>::isInsideConvergentFunction( + const MachineInstr &MI) { + // The class MachineFunction does not have any property to indicate whether it + // is convergent. Trivially return true so that the check always passes. + return true; +} + +template <> +bool GenericConvergenceVerifier<MachineSSAContext>::isConvergent( + const MachineInstr &MI) { + return MI.isConvergent(); +} + +template class llvm::GenericConvergenceVerifier<MachineSSAContext>; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 9a0ab300b21b..b34e0939d1c7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -65,6 +65,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -112,7 +113,7 @@ class CopyTracker { bool Avail; }; - DenseMap<MCRegister, CopyInfo> Copies; + DenseMap<MCRegUnit, CopyInfo> Copies; public: /// Mark all of the given registers and their subregisters as unavailable for @@ -251,7 +252,7 @@ public: return !Copies.empty(); } - MachineInstr *findCopyForUnit(MCRegister RegUnit, + MachineInstr *findCopyForUnit(MCRegUnit RegUnit, const TargetRegisterInfo &TRI, bool MustBeAvailable = false) { auto CI = Copies.find(RegUnit); @@ -262,7 +263,7 @@ public: return CI->second.MI; } - MachineInstr *findCopyDefViaUnit(MCRegister RegUnit, + MachineInstr *findCopyDefViaUnit(MCRegUnit RegUnit, const TargetRegisterInfo &TRI) { auto CI = Copies.find(RegUnit); if (CI == Copies.end()) @@ -411,6 +412,7 @@ private: typedef enum { DebugUse = false, RegularUse = true } DebugType; void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT); + void readSuccessorLiveIns(const MachineBasicBlock &MBB); void ForwardCopyPropagateBlock(MachineBasicBlock &MBB); void BackwardCopyPropagateBlock(MachineBasicBlock &MBB); void EliminateSpillageCopies(MachineBasicBlock &MBB); @@ -463,6 +465,22 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader, } } +void MachineCopyPropagation::readSuccessorLiveIns( + const MachineBasicBlock &MBB) { + if (MaybeDeadCopies.empty()) + return; + + // If a copy result is livein to a successor, it is not dead. + for (const MachineBasicBlock *Succ : MBB.successors()) { + for (const auto &LI : Succ->liveins()) { + for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) { + if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI)) + MaybeDeadCopies.remove(Copy); + } + } + } +} + /// Return true if \p PreviousCopy did copy register \p Src to register \p Def. /// This fact may have been obscured by sub register usage or may not be true at /// all even though Src and Def are subregisters of the registers used in @@ -640,7 +658,7 @@ bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI, /// The umull instruction is unpredictable unless RdHi and RdLo are different. bool MachineCopyPropagation::hasOverlappingMultipleDef( const MachineInstr &MI, const MachineOperand &MODef, Register Def) { - for (const MachineOperand &MIDef : MI.defs()) { + for (const MachineOperand &MIDef : MI.all_defs()) { if ((&MIDef != &MODef) && MIDef.isReg() && TRI->regsOverlap(Def, MIDef.getReg())) return true; @@ -720,7 +738,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { // cannot cope with that. if (isCopyInstr(MI, *TII, UseCopyInstr) && MI.modifiesRegister(CopySrcReg, TRI) && - !MI.definesRegister(CopySrcReg)) { + !MI.definesRegister(CopySrcReg, /*TRI=*/nullptr)) { LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI); continue; } @@ -914,10 +932,17 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } - // If MBB doesn't have successors, delete the copies whose defs are not used. - // If MBB does have successors, then conservative assume the defs are live-out - // since we don't want to trust live-in lists. - if (MBB.succ_empty()) { + bool TracksLiveness = MRI->tracksLiveness(); + + // If liveness is tracked, we can use the live-in lists to know which + // copies aren't dead. + if (TracksLiveness) + readSuccessorLiveIns(MBB); + + // If MBB doesn't have succesor, delete copies whose defs are not used. + // If MBB does have successors, we can only delete copies if we are able to + // use liveness information from successors to confirm they are really dead. + if (MBB.succ_empty() || TracksLiveness) { for (MachineInstr *MaybeDead : MaybeDeadCopies) { LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; MaybeDead->dump()); @@ -948,8 +973,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { } static bool isBackwardPropagatableCopy(const DestSourcePair &CopyOperands, - const MachineRegisterInfo &MRI, - const TargetInstrInfo &TII) { + const MachineRegisterInfo &MRI) { Register Def = CopyOperands.Destination->getReg(); Register Src = CopyOperands.Source->getReg(); @@ -1036,7 +1060,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( if (!TRI->regsOverlap(DefReg, SrcReg)) { // Unlike forward cp, we don't invoke propagateDefs here, // just let forward cp do COPY-to-COPY propagation. - if (isBackwardPropagatableCopy(*CopyOperands, *MRI, *TII)) { + if (isBackwardPropagatableCopy(*CopyOperands, *MRI)) { Tracker.invalidateRegister(SrcReg.asMCReg(), *TRI, *TII, UseCopyInstr); Tracker.invalidateRegister(DefReg.asMCReg(), *TRI, *TII, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp index c264e199cf47..bffdd51bfbca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp @@ -65,6 +65,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, // all the others. Function *DbgValF = M.getFunction("llvm.dbg.value"); DbgValueInst *EarliestDVI = nullptr; + DbgVariableRecord *EarliestDVR = nullptr; DenseMap<unsigned, DILocalVariable *> Line2Var; DIExpression *Expr = nullptr; if (DbgValF) { @@ -80,6 +81,20 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, Expr = DVI->getExpression(); } } + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { + if (!DVR.isDbgValue()) + continue; + unsigned Line = DVR.getDebugLoc().getLine(); + assert(Line != 0 && "debugify should not insert line 0 locations"); + Line2Var[Line] = DVR.getVariable(); + if (!EarliestDVR || Line < EarliestDVR->getDebugLoc().getLine()) + EarliestDVR = &DVR; + Expr = DVR.getExpression(); + } + } + } if (Line2Var.empty()) return true; @@ -109,7 +124,8 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, // Find a suitable local variable for the DBG_VALUE. unsigned Line = MI.getDebugLoc().getLine(); if (!Line2Var.count(Line)) - Line = EarliestDVI->getDebugLoc().getLine(); + Line = EarliestDVI ? EarliestDVI->getDebugLoc().getLine() + : EarliestDVR->getDebugLoc().getLine(); DILocalVariable *LocalVar = Line2Var[Line]; assert(LocalVar && "No variable for current line?"); VarSet.insert(LocalVar); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp new file mode 100644 index 000000000000..afffafb245e6 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp @@ -0,0 +1,66 @@ +//===- MachineDomTreeUpdater.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MachineDomTreeUpdater class, which provides a +// uniform way to update dominator tree related data structures. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineDomTreeUpdater.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/GenericDomTreeUpdaterImpl.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/Support/GenericDomTree.h" +#include <algorithm> +#include <functional> +#include <utility> + +namespace llvm { + +template class GenericDomTreeUpdater< + MachineDomTreeUpdater, MachineDominatorTree, MachinePostDominatorTree>; + +template void +GenericDomTreeUpdater<MachineDomTreeUpdater, MachineDominatorTree, + MachinePostDominatorTree>::recalculate(MachineFunction + &MF); + +bool MachineDomTreeUpdater::forceFlushDeletedBB() { + if (DeletedBBs.empty()) + return false; + + for (auto *BB : DeletedBBs) { + eraseDelBBNode(BB); + BB->eraseFromParent(); + } + DeletedBBs.clear(); + return true; +} + +// The DT and PDT require the nodes related to updates +// are not deleted when update functions are called. +// So MachineBasicBlock deletions must be pended when the +// UpdateStrategy is Lazy. When the UpdateStrategy is +// Eager, the MachineBasicBlock will be deleted immediately. +void MachineDomTreeUpdater::deleteBB(MachineBasicBlock *DelBB) { + validateDeleteBB(DelBB); + if (Strategy == UpdateStrategy::Lazy) { + DeletedBBs.insert(DelBB); + return; + } + + eraseDelBBNode(DelBB); + DelBB->eraseFromParent(); +} + +void MachineDomTreeUpdater::validateDeleteBB(MachineBasicBlock *DelBB) { + assert(DelBB && "Invalid push_back of nullptr DelBB."); + assert(DelBB->pred_empty() && "DelBB has one or more predecessors."); +} + +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp index 346cfedde390..6a8ede4feb93 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp @@ -26,7 +26,7 @@ char MachineDominanceFrontier::ID = 0; INITIALIZE_PASS_BEGIN(MachineDominanceFrontier, "machine-domfrontier", "Machine Dominance Frontier Construction", true, true) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier", "Machine Dominance Frontier Construction", true, true) @@ -38,7 +38,8 @@ char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID; bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) { releaseMemory(); - Base.analyze(getAnalysis<MachineDominatorTree>().getBase()); + Base.analyze( + getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree().getBase()); return false; } @@ -48,6 +49,6 @@ void MachineDominanceFrontier::releaseMemory() { void MachineDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp index 0632cde9c6f4..a2cc8fdfa7c9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp @@ -18,6 +18,7 @@ #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/GenericDomTreeConstruction.h" using namespace llvm; @@ -37,51 +38,86 @@ static cl::opt<bool, true> VerifyMachineDomInfoX( namespace llvm { template class DomTreeNodeBase<MachineBasicBlock>; template class DominatorTreeBase<MachineBasicBlock, false>; // DomTreeBase + +namespace DomTreeBuilder { +template void Calculate<MBBDomTree>(MBBDomTree &DT); +template void CalculateWithUpdates<MBBDomTree>(MBBDomTree &DT, MBBUpdates U); + +template void InsertEdge<MBBDomTree>(MBBDomTree &DT, MachineBasicBlock *From, + MachineBasicBlock *To); + +template void DeleteEdge<MBBDomTree>(MBBDomTree &DT, MachineBasicBlock *From, + MachineBasicBlock *To); + +template void ApplyUpdates<MBBDomTree>(MBBDomTree &DT, MBBDomTreeGraphDiff &, + MBBDomTreeGraphDiff *); + +template bool Verify<MBBDomTree>(const MBBDomTree &DT, + MBBDomTree::VerificationLevel VL); +} // namespace DomTreeBuilder } -char MachineDominatorTree::ID = 0; +bool MachineDominatorTree::invalidate( + MachineFunction &, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on machine functions, or the + // machine function's CFG have been preserved. + auto PAC = PA.getChecker<MachineDominatorTreeAnalysis>(); + return !PAC.preserved() && + !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() && + !PAC.preservedSet<CFGAnalyses>(); +} -INITIALIZE_PASS(MachineDominatorTree, "machinedomtree", - "MachineDominator Tree Construction", true, true) +AnalysisKey MachineDominatorTreeAnalysis::Key; -char &llvm::MachineDominatorsID = MachineDominatorTree::ID; +MachineDominatorTreeAnalysis::Result +MachineDominatorTreeAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + return MachineDominatorTree(MF); +} -void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); +PreservedAnalyses +MachineDominatorTreePrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + OS << "MachineDominatorTree for machine function: " << MF.getName() << '\n'; + MFAM.getResult<MachineDominatorTreeAnalysis>(MF).print(OS); + return PreservedAnalyses::all(); } -bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { - calculate(F); - return false; +char MachineDominatorTreeWrapperPass::ID = 0; + +INITIALIZE_PASS(MachineDominatorTreeWrapperPass, "machinedomtree", + "MachineDominator Tree Construction", true, true) + +MachineDominatorTreeWrapperPass::MachineDominatorTreeWrapperPass() + : MachineFunctionPass(ID) { + initializeMachineDominatorTreeWrapperPassPass( + *PassRegistry::getPassRegistry()); } void MachineDominatorTree::calculate(MachineFunction &F) { CriticalEdgesToSplit.clear(); NewBBs.clear(); - DT.reset(new DomTreeBase<MachineBasicBlock>()); - DT->recalculate(F); + recalculate(F); } -MachineDominatorTree::MachineDominatorTree() - : MachineFunctionPass(ID) { - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); -} +char &llvm::MachineDominatorsID = MachineDominatorTreeWrapperPass::ID; -void MachineDominatorTree::releaseMemory() { - CriticalEdgesToSplit.clear(); - DT.reset(nullptr); +bool MachineDominatorTreeWrapperPass::runOnMachineFunction(MachineFunction &F) { + DT = MachineDominatorTree(F); + return false; } -void MachineDominatorTree::verifyAnalysis() const { - if (DT && VerifyMachineDomInfo) - if (!DT->verify(MachineDomTree::VerificationLevel::Basic)) { - errs() << "MachineDominatorTree verification failed\n"; - abort(); - } +void MachineDominatorTreeWrapperPass::releaseMemory() { DT.reset(); } + +void MachineDominatorTreeWrapperPass::verifyAnalysis() const { + if (VerifyMachineDomInfo && DT) + if (!DT->verify(MachineDominatorTree::VerificationLevel::Basic)) + report_fatal_error("MachineDominatorTree verification failed!"); } -void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { +void MachineDominatorTreeWrapperPass::print(raw_ostream &OS, + const Module *) const { if (DT) DT->print(OS); } @@ -103,7 +139,7 @@ void MachineDominatorTree::applySplitCriticalEdges() const { for (CriticalEdge &Edge : CriticalEdgesToSplit) { // Update dominator information. MachineBasicBlock *Succ = Edge.ToBB; - MachineDomTreeNode *SuccDTNode = DT->getNode(Succ); + MachineDomTreeNode *SuccDTNode = Base::getNode(Succ); for (MachineBasicBlock *PredBB : Succ->predecessors()) { if (PredBB == Edge.NewBB) @@ -126,7 +162,7 @@ void MachineDominatorTree::applySplitCriticalEdges() const { "than one predecessor!"); PredBB = *PredBB->pred_begin(); } - if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) { + if (!Base::dominates(SuccDTNode, Base::getNode(PredBB))) { IsNewIDom[Idx] = false; break; } @@ -138,13 +174,16 @@ void MachineDominatorTree::applySplitCriticalEdges() const { Idx = 0; for (CriticalEdge &Edge : CriticalEdgesToSplit) { // We know FromBB dominates NewBB. - MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB); + MachineDomTreeNode *NewDTNode = + const_cast<MachineDominatorTree *>(this)->Base::addNewBlock( + Edge.NewBB, Edge.FromBB); // If all the other predecessors of "Succ" are dominated by "Succ" itself // then the new block is the new immediate dominator of "Succ". Otherwise, // the new block doesn't dominate anything. if (IsNewIDom[Idx]) - DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode); + const_cast<MachineDominatorTree *>(this)->Base::changeImmediateDominator( + Base::getNode(Edge.ToBB), NewDTNode); ++Idx; } NewBBs.clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp index 280d3a6a41ed..853de4c88cae 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -184,7 +184,8 @@ uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { return alignTo(Offset, StackAlign); } -void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) { +void MachineFrameInfo::computeMaxCallFrameSize( + MachineFunction &MF, std::vector<MachineBasicBlock::iterator> *FrameSDOps) { const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); @@ -192,18 +193,14 @@ void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) { "Can only compute MaxCallFrameSize if Setup/Destroy opcode are known"); MaxCallFrameSize = 0; - for (const MachineBasicBlock &MBB : MF) { - for (const MachineInstr &MI : MBB) { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { unsigned Opcode = MI.getOpcode(); if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) { unsigned Size = TII.getFrameSize(MI); MaxCallFrameSize = std::max(MaxCallFrameSize, Size); - AdjustsStack = true; - } else if (MI.isInlineAsm()) { - // Some inline asm's need a stack frame, as indicated by operand 1. - unsigned ExtraInfo = MI.getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); - if (ExtraInfo & InlineAsm::Extra_IsAlignStack) - AdjustsStack = true; + if (FrameSDOps != nullptr) + FrameSDOps->push_back(&MI); } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp index 57af571ed9bf..7f6a75208d25 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp @@ -200,10 +200,11 @@ void MachineFunction::init() { // explicitly asked us not to. bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() && !F.hasFnAttribute("no-realign-stack"); + bool ForceRealignSP = F.hasFnAttribute(Attribute::StackAlignment) || + F.hasFnAttribute("stackrealign"); FrameInfo = new (Allocator) MachineFrameInfo( getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP, - /*ForcedRealign=*/CanRealignSP && - F.hasFnAttribute(Attribute::StackAlignment)); + /*ForcedRealign=*/ForceRealignSP && CanRealignSP); setUnsafeStackSize(F, *FrameInfo); @@ -306,7 +307,7 @@ void MachineFunction::clear() { } const DataLayout &MachineFunction::getDataLayout() const { - return F.getParent()->getDataLayout(); + return F.getDataLayout(); } /// Get the JumpTableInfo for this function. @@ -467,6 +468,7 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB, // `-basic-block-sections=list` to allow robust mapping of profiles to basic // blocks. if (Target.getBBSectionsType() == BasicBlockSection::Labels || + Target.Options.BBAddrMap || Target.getBBSectionsType() == BasicBlockSection::List) MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0}); return MBB; @@ -483,13 +485,17 @@ void MachineFunction::deleteMachineBasicBlock(MachineBasicBlock *MBB) { } MachineMemOperand *MachineFunction::getMachineMemOperand( - MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, - Align base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, + MachinePointerInfo PtrInfo, MachineMemOperand::Flags F, LocationSize Size, + Align BaseAlignment, const AAMDNodes &AAInfo, const MDNode *Ranges, SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) { + assert((!Size.hasValue() || + Size.getValue().getKnownMinValue() != ~UINT64_C(0)) && + "Unexpected an unknown size to be represented using " + "LocationSize::beforeOrAfter()"); return new (Allocator) - MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges, - SSID, Ordering, FailureOrdering); + MachineMemOperand(PtrInfo, F, Size, BaseAlignment, AAInfo, Ranges, SSID, + Ordering, FailureOrdering); } MachineMemOperand *MachineFunction::getMachineMemOperand( @@ -502,8 +508,14 @@ MachineMemOperand *MachineFunction::getMachineMemOperand( Ordering, FailureOrdering); } -MachineMemOperand *MachineFunction::getMachineMemOperand( - const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, uint64_t Size) { +MachineMemOperand * +MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, + const MachinePointerInfo &PtrInfo, + LocationSize Size) { + assert((!Size.hasValue() || + Size.getValue().getKnownMinValue() != ~UINT64_C(0)) && + "Unexpected an unknown size to be represented using " + "LocationSize::beforeOrAfter()"); return new (Allocator) MachineMemOperand(PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(), AAMDNodes(), nullptr, MMO->getSyncScopeID(), @@ -562,10 +574,10 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo( ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker, MDNode *PCSections, - uint32_t CFIType) { + uint32_t CFIType, MDNode *MMRAs) { return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol, PostInstrSymbol, HeapAllocMarker, - PCSections, CFIType); + PCSections, CFIType, MMRAs); } const char *MachineFunction::createExternalSymbolName(StringRef Name) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp new file mode 100644 index 000000000000..24eb360723da --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -0,0 +1,47 @@ +//===- MachineFunctionAnalysis.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionAnalysis +// members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +AnalysisKey MachineFunctionAnalysis::Key; + +bool MachineFunctionAnalysis::Result::invalidate( + Function &, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // Unless it is invalidated explicitly, it should remain preserved. + auto PAC = PA.getChecker<MachineFunctionAnalysis>(); + return !PAC.preservedWhenStateless(); +} + +MachineFunctionAnalysis::Result +MachineFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { + auto &Context = F.getContext(); + const TargetSubtargetInfo &STI = *TM->getSubtargetImpl(F); + auto &MMI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F) + .getCachedResult<MachineModuleAnalysis>(*F.getParent()) + ->getMMI(); + auto MF = std::make_unique<MachineFunction>( + F, *TM, STI, Context.generateMachineFunctionNum(F), MMI); + MF->initTargetMachineFunctionInfo(STI); + + // MRI callback for target specific initializations. + TM->registerMachineRegisterInfoCallback(*MF); + + return Result(std::move(MF)); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp index d57a912f418b..62ac3e32d24d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PrintPasses.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index c31c065b1976..0f88a7b74160 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -39,7 +39,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addUsedIfAvailable<SlotIndexes>(); + AU.addUsedIfAvailable<SlotIndexesWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -47,7 +47,8 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { if (!isFunctionInPrintList(MF.getName())) return false; OS << "# " << Banner << ":\n"; - MF.print(OS, getAnalysisIfAvailable<SlotIndexes>()); + auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); + MF.print(OS, SIWrapper ? &SIWrapper->getSI() : nullptr); return false; } }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 38c1c56d2823..edb7a13f4487 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -109,12 +109,6 @@ static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI) { std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); - - // Temporary hack to cope with AArch64's jump table encoding - const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); - if (!TII.isMBBSafeToSplitToCold(MBB)) - return false; - // For instrumentation profiles and sample profiles, we use different ways // to judge whether a block is cold and should be split. if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) { @@ -156,7 +150,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { MachineBlockFrequencyInfo *MBFI = nullptr; ProfileSummaryInfo *PSI = nullptr; if (UseProfileData) { - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); // If we don't have a good profile (sample profile is not deemed // as a "good profile") and the function is not hot, then early @@ -178,7 +172,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { if (MBB.isEHPad()) LandingPads.push_back(&MBB); - else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode) + else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && + TII.isMBBSafeToSplitToCold(MBB) && !SplitAllEHCode) MBB.setSectionID(MBBSectionID::ColdSectionID); } @@ -190,7 +185,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { // Here we have UseProfileData == true. bool HasHotLandingPads = false; for (const MachineBasicBlock *LP : LandingPads) { - if (!isColdBlock(*LP, MBFI, PSI)) + if (!isColdBlock(*LP, MBFI, PSI) || !TII.isMBBSafeToSplitToCold(*LP)) HasHotLandingPads = true; } if (!HasHotLandingPads) { @@ -205,7 +200,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfoWrapperPass>(); - AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp index 27eae372f8ad..be64e9c8452f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -34,11 +33,13 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -97,7 +98,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID, DebugLoc DL, bool NoImp) : MCID(&TID), NumOperands(0), Flags(0), AsmPrinterFlags(0), - DbgLoc(std::move(DL)), DebugInstrNum(0) { + DbgLoc(std::move(DL)), DebugInstrNum(0), Opcode(TID.Opcode) { assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. @@ -116,7 +117,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID, /// uniqueness. MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) : MCID(&MI.getDesc()), NumOperands(0), Flags(0), AsmPrinterFlags(0), - Info(MI.Info), DbgLoc(MI.getDebugLoc()), DebugInstrNum(0) { + Info(MI.Info), DbgLoc(MI.getDebugLoc()), DebugInstrNum(0), + Opcode(MI.getOpcode()) { assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); @@ -142,6 +144,7 @@ void MachineInstr::setDesc(const MCInstrDesc &TID) { if (getParent()) getMF()->handleChangeDesc(*this, TID); MCID = &TID; + Opcode = TID.Opcode; } void MachineInstr::moveBefore(MachineInstr *MovePos) { @@ -317,14 +320,15 @@ void MachineInstr::setExtraInfo(MachineFunction &MF, MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker, MDNode *PCSections, - uint32_t CFIType) { + uint32_t CFIType, MDNode *MMRAs) { bool HasPreInstrSymbol = PreInstrSymbol != nullptr; bool HasPostInstrSymbol = PostInstrSymbol != nullptr; bool HasHeapAllocMarker = HeapAllocMarker != nullptr; bool HasPCSections = PCSections != nullptr; bool HasCFIType = CFIType != 0; + bool HasMMRAs = MMRAs != nullptr; int NumPointers = MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol + - HasHeapAllocMarker + HasPCSections + HasCFIType; + HasHeapAllocMarker + HasPCSections + HasCFIType + HasMMRAs; // Drop all extra info if there is none. if (NumPointers <= 0) { @@ -336,11 +340,11 @@ void MachineInstr::setExtraInfo(MachineFunction &MF, // out of line because PointerSumType cannot hold more than 4 tag types with // 32-bit pointers. // FIXME: Maybe we should make the symbols in the extra info mutable? - else if (NumPointers > 1 || HasHeapAllocMarker || HasPCSections || + else if (NumPointers > 1 || HasMMRAs || HasHeapAllocMarker || HasPCSections || HasCFIType) { Info.set<EIIK_OutOfLine>( MF.createMIExtraInfo(MMOs, PreInstrSymbol, PostInstrSymbol, - HeapAllocMarker, PCSections, CFIType)); + HeapAllocMarker, PCSections, CFIType, MMRAs)); return; } @@ -358,7 +362,8 @@ void MachineInstr::dropMemRefs(MachineFunction &MF) { return; setExtraInfo(MF, {}, getPreInstrSymbol(), getPostInstrSymbol(), - getHeapAllocMarker(), getPCSections(), getCFIType()); + getHeapAllocMarker(), getPCSections(), getCFIType(), + getMMRAMetadata()); } void MachineInstr::setMemRefs(MachineFunction &MF, @@ -369,7 +374,8 @@ void MachineInstr::setMemRefs(MachineFunction &MF, } setExtraInfo(MF, MMOs, getPreInstrSymbol(), getPostInstrSymbol(), - getHeapAllocMarker(), getPCSections(), getCFIType()); + getHeapAllocMarker(), getPCSections(), getCFIType(), + getMMRAMetadata()); } void MachineInstr::addMemOperand(MachineFunction &MF, @@ -393,7 +399,8 @@ void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) { if (getPreInstrSymbol() == MI.getPreInstrSymbol() && getPostInstrSymbol() == MI.getPostInstrSymbol() && getHeapAllocMarker() == MI.getHeapAllocMarker() && - getPCSections() == MI.getPCSections()) { + getPCSections() == MI.getPCSections() && getMMRAMetadata() && + MI.getMMRAMetadata()) { Info = MI.Info; return; } @@ -478,7 +485,8 @@ void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { } setExtraInfo(MF, memoperands(), Symbol, getPostInstrSymbol(), - getHeapAllocMarker(), getPCSections(), getCFIType()); + getHeapAllocMarker(), getPCSections(), getCFIType(), + getMMRAMetadata()); } void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { @@ -493,7 +501,8 @@ void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) { } setExtraInfo(MF, memoperands(), getPreInstrSymbol(), Symbol, - getHeapAllocMarker(), getPCSections(), getCFIType()); + getHeapAllocMarker(), getPCSections(), getCFIType(), + getMMRAMetadata()); } void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) { @@ -502,7 +511,7 @@ void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) { return; setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(), - Marker, getPCSections(), getCFIType()); + Marker, getPCSections(), getCFIType(), getMMRAMetadata()); } void MachineInstr::setPCSections(MachineFunction &MF, MDNode *PCSections) { @@ -511,7 +520,8 @@ void MachineInstr::setPCSections(MachineFunction &MF, MDNode *PCSections) { return; setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(), - getHeapAllocMarker(), PCSections, getCFIType()); + getHeapAllocMarker(), PCSections, getCFIType(), + getMMRAMetadata()); } void MachineInstr::setCFIType(MachineFunction &MF, uint32_t Type) { @@ -520,7 +530,16 @@ void MachineInstr::setCFIType(MachineFunction &MF, uint32_t Type) { return; setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(), - getHeapAllocMarker(), getPCSections(), Type); + getHeapAllocMarker(), getPCSections(), Type, getMMRAMetadata()); +} + +void MachineInstr::setMMRAMetadata(MachineFunction &MF, MDNode *MMRAs) { + // Do nothing if old and new symbols are the same. + if (MMRAs == getMMRAMetadata()) + return; + + setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(), + getHeapAllocMarker(), getPCSections(), getCFIType(), MMRAs); } void MachineInstr::cloneInstrSymbols(MachineFunction &MF, @@ -536,6 +555,7 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF, setPostInstrSymbol(MF, MI.getPostInstrSymbol()); setHeapAllocMarker(MF, MI.getHeapAllocMarker()); setPCSections(MF, MI.getPCSections()); + setMMRAMetadata(MF, MI.getMMRAMetadata()); } uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { @@ -553,6 +573,27 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::NoSWrap; if (OB->hasNoUnsignedWrap()) MIFlags |= MachineInstr::MIFlag::NoUWrap; + } else if (const TruncInst *TI = dyn_cast<TruncInst>(&I)) { + if (TI->hasNoSignedWrap()) + MIFlags |= MachineInstr::MIFlag::NoSWrap; + if (TI->hasNoUnsignedWrap()) + MIFlags |= MachineInstr::MIFlag::NoUWrap; + } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { + if (GEP->hasNoUnsignedSignedWrap()) + MIFlags |= MachineInstr::MIFlag::NoUSWrap; + if (GEP->hasNoUnsignedWrap()) + MIFlags |= MachineInstr::MIFlag::NoUWrap; + } + + // Copy the nonneg flag. + if (const PossiblyNonNegInst *PNI = dyn_cast<PossiblyNonNegInst>(&I)) { + if (PNI->hasNonNeg()) + MIFlags |= MachineInstr::MIFlag::NonNeg; + // Copy the disjoint flag. + } else if (const PossiblyDisjointInst *PD = + dyn_cast<PossiblyDisjointInst>(&I)) { + if (PD->isDisjoint()) + MIFlags |= MachineInstr::MIFlag::Disjoint; } // Copy the exact flag. @@ -1000,8 +1041,7 @@ unsigned MachineInstr::getBundleSize() const { /// Returns true if the MachineInstr has an implicit-use operand of exactly /// the given register (not considering sub/super-registers). bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); + for (const MachineOperand &MO : operands()) { if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg) return true; } @@ -1011,8 +1051,9 @@ bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const { /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. -int MachineInstr::findRegisterUseOperandIdx( - Register Reg, bool isKill, const TargetRegisterInfo *TRI) const { +int MachineInstr::findRegisterUseOperandIdx(Register Reg, + const TargetRegisterInfo *TRI, + bool isKill) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isUse()) @@ -1059,9 +1100,9 @@ MachineInstr::readsWritesVirtualRegister(Register Reg, /// the specified register or -1 if it is not found. If isDead is true, defs /// that are not dead are skipped. If TargetRegisterInfo is non-null, then it /// also checks if there is a def of a super-register. -int -MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap, - const TargetRegisterInfo *TRI) const { +int MachineInstr::findRegisterDefOperandIdx(Register Reg, + const TargetRegisterInfo *TRI, + bool isDead, bool Overlap) const { bool isPhys = Reg.isPhysical(); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -1302,10 +1343,11 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, int64_t OffsetB = MMOb->getOffset(); int64_t MinOffset = std::min(OffsetA, OffsetB); - uint64_t WidthA = MMOa->getSize(); - uint64_t WidthB = MMOb->getSize(); - bool KnownWidthA = WidthA != MemoryLocation::UnknownSize; - bool KnownWidthB = WidthB != MemoryLocation::UnknownSize; + LocationSize WidthA = MMOa->getSize(); + LocationSize WidthB = MMOb->getSize(); + bool KnownWidthA = WidthA.hasValue(); + bool KnownWidthB = WidthB.hasValue(); + bool BothMMONonScalable = !WidthA.isScalable() && !WidthB.isScalable(); const Value *ValA = MMOa->getValue(); const Value *ValB = MMOb->getValue(); @@ -1321,11 +1363,13 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, SameVal = true; } - if (SameVal) { + if (SameVal && BothMMONonScalable) { if (!KnownWidthA || !KnownWidthB) return true; int64_t MaxOffset = std::max(OffsetA, OffsetB); - int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; + int64_t LowWidth = (MinOffset == OffsetA) + ? WidthA.getValue().getKnownMinValue() + : WidthB.getValue().getKnownMinValue(); return (MinOffset + LowWidth > MaxOffset); } @@ -1338,15 +1382,29 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); + // If Scalable Location Size has non-zero offset, Width + Offset does not work + // at the moment + if ((WidthA.isScalable() && OffsetA > 0) || + (WidthB.isScalable() && OffsetB > 0)) + return true; + int64_t OverlapA = - KnownWidthA ? WidthA + OffsetA - MinOffset : MemoryLocation::UnknownSize; + KnownWidthA ? WidthA.getValue().getKnownMinValue() + OffsetA - MinOffset + : MemoryLocation::UnknownSize; int64_t OverlapB = - KnownWidthB ? WidthB + OffsetB - MinOffset : MemoryLocation::UnknownSize; + KnownWidthB ? WidthB.getValue().getKnownMinValue() + OffsetB - MinOffset + : MemoryLocation::UnknownSize; + + LocationSize LocA = (WidthA.isScalable() || !KnownWidthA) + ? WidthA + : LocationSize::precise(OverlapA); + LocationSize LocB = (WidthB.isScalable() || !KnownWidthB) + ? WidthB + : LocationSize::precise(OverlapB); return !AA->isNoAlias( - MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(ValB, OverlapB, - UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + MemoryLocation(ValA, LocA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValB, LocB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); } bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, @@ -1689,6 +1747,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nofpexcept "; if (getFlag(MachineInstr::NoMerge)) OS << "nomerge "; + if (getFlag(MachineInstr::NonNeg)) + OS << "nneg "; + if (getFlag(MachineInstr::Disjoint)) + OS << "disjoint "; // Print the opcode name. if (TII) @@ -1843,6 +1905,14 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << " pcsections "; PCSections->printAsOperand(OS, MST); } + if (MDNode *MMRA = getMMRAMetadata()) { + if (!FirstOp) { + FirstOp = false; + OS << ','; + } + OS << " mmra "; + MMRA->printAsOperand(OS, MST); + } if (uint32_t CFIType = getCFIType()) { if (!FirstOp) OS << ','; @@ -2073,7 +2143,7 @@ void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) { void MachineInstr::addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo) { if (Reg.isPhysical()) { - MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo); + MachineOperand *MO = findRegisterDefOperand(Reg, RegInfo, false, false); if (MO) return; } else { @@ -2146,7 +2216,7 @@ void MachineInstr::emitError(StringRef Msg) const { if (const MachineBasicBlock *MBB = getParent()) if (const MachineFunction *MF = MBB->getParent()) - return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg); + return MF->getFunction().getContext().emitError(LocCookie, Msg); report_fatal_error(Msg); } @@ -2354,18 +2424,23 @@ void MachineInstr::changeDebugValuesDefReg(Register Reg) { using MMOList = SmallVector<const MachineMemOperand *, 2>; -static unsigned getSpillSlotSize(const MMOList &Accesses, - const MachineFrameInfo &MFI) { - unsigned Size = 0; - for (const auto *A : Accesses) +static LocationSize getSpillSlotSize(const MMOList &Accesses, + const MachineFrameInfo &MFI) { + uint64_t Size = 0; + for (const auto *A : Accesses) { if (MFI.isSpillSlotObjectIndex( cast<FixedStackPseudoSourceValue>(A->getPseudoValue()) - ->getFrameIndex())) - Size += A->getSize(); + ->getFrameIndex())) { + LocationSize S = A->getSize(); + if (!S.hasValue()) + return LocationSize::beforeOrAfterPointer(); + Size += S.getValue(); + } + } return Size; } -std::optional<unsigned> +std::optional<LocationSize> MachineInstr::getSpillSize(const TargetInstrInfo *TII) const { int FI; if (TII->isStoreToStackSlotPostFE(*this, FI)) { @@ -2376,7 +2451,7 @@ MachineInstr::getSpillSize(const TargetInstrInfo *TII) const { return std::nullopt; } -std::optional<unsigned> +std::optional<LocationSize> MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const { MMOList Accesses; if (TII->hasStoreToStackSlot(*this, Accesses)) @@ -2384,7 +2459,7 @@ MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const { return std::nullopt; } -std::optional<unsigned> +std::optional<LocationSize> MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const { int FI; if (TII->isLoadFromStackSlotPostFE(*this, FI)) { @@ -2395,7 +2470,7 @@ MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const { return std::nullopt; } -std::optional<unsigned> +std::optional<LocationSize> MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const { MMOList Accesses; if (TII->hasLoadFromStackSlot(*this, Accesses)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp index 6eeed8b5c3f7..92189f636068 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -177,26 +177,25 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, } } - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - MachineOperand &MO = *Defs[i]; - Register Reg = MO.getReg(); + for (MachineOperand *MO : Defs) { + Register Reg = MO->getReg(); if (!Reg) continue; if (LocalDefSet.insert(Reg).second) { LocalDefs.push_back(Reg); - if (MO.isDead()) { + if (MO->isDead()) { DeadDefSet.insert(Reg); } } else { // Re-defined inside the bundle, it's no longer killed. KilledDefSet.erase(Reg); - if (!MO.isDead()) + if (!MO->isDead()) // Previously defined but dead. DeadDefSet.erase(Reg); } - if (!MO.isDead() && Reg.isPhysical()) { + if (!MO->isDead() && Reg.isPhysical()) { for (MCPhysReg SubReg : TRI->subregs(Reg)) { if (LocalDefSet.insert(SubReg).second) LocalDefs.push_back(SubReg); @@ -312,8 +311,7 @@ llvm::AnalyzeVirtRegLanesInBundle(const MachineInstr &MI, Register Reg, LaneBitmask UseMask, DefMask; - for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - const MachineOperand &MO = *O; + for (const MachineOperand &MO : const_mi_bundle_ops(MI)) { if (!MO.isReg() || MO.getReg() != Reg) continue; @@ -339,9 +337,7 @@ PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, PhysRegInfo PRI = {false, false, false, false, false, false, false, false}; assert(Reg.isPhysical() && "analyzePhysReg not given a physical register!"); - for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - const MachineOperand &MO = *O; - + for (const MachineOperand &MO : const_mi_bundle_ops(MI)) { if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) { PRI.Clobbered = true; continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp index efc19f8fdbf8..f24ab187ef40 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp @@ -155,7 +155,7 @@ namespace { } // Track 'estimated' register pressure. - SmallSet<Register, 32> RegSeen; + SmallDenseSet<Register> RegSeen; SmallVector<unsigned, 8> RegPressure; // Register pressure "limit" per register pressure set. If the pressure @@ -188,12 +188,12 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); if (DisableHoistingToHotterBlocks != UseBFI::None) - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -223,8 +223,8 @@ namespace { void HoistPostRA(MachineInstr *MI, unsigned Def, MachineLoop *CurLoop, MachineBasicBlock *CurPreheader); - void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, - BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, + void ProcessMI(MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers, + SmallDenseSet<int> &StoredFIs, SmallVectorImpl<CandidateInfo> &Candidates, MachineLoop *CurLoop); @@ -323,18 +323,18 @@ char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID; INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, "Machine Loop Invariant Code Motion", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm", "Early Machine Loop Invariant Code Motion", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", "Early Machine Loop Invariant Code Motion", false, false) @@ -373,9 +373,9 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { // Get our Loop information... if (DisableHoistingToHotterBlocks != UseBFI::None) - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); - MLI = &getAnalysis<MachineLoopInfo>(); - DT = &getAnalysis<MachineDominatorTree>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); + MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); + DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); if (HoistConstLoads) @@ -423,11 +423,64 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { return false; } +static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI, + BitVector &RUs, + const uint32_t *Mask) { + // FIXME: This intentionally works in reverse due to some issues with the + // Register Units infrastructure. + // + // This is used to apply callee-saved-register masks to the clobbered regunits + // mask. + // + // The right way to approach this is to start with a BitVector full of ones, + // then reset all the bits of the regunits of each register that is set in the + // mask (registers preserved), then OR the resulting bits with the Clobbers + // mask. This correctly prioritizes the saved registers, so if a RU is shared + // between a register that is preserved, and one that is NOT preserved, that + // RU will not be set in the output vector (the clobbers). + // + // What we have to do for now is the opposite: we have to assume that the + // regunits of all registers that are NOT preserved are clobbered, even if + // those regunits are preserved by another register. So if a RU is shared + // like described previously, that RU will be set. + // + // This is to work around an issue which appears in AArch64, but isn't + // exclusive to that target: AArch64's Qn registers (128 bits) have Dn + // register (lower 64 bits). A few Dn registers are preserved by some calling + // conventions, but Qn and Dn share exactly the same reg units. + // + // If we do this the right way, Qn will be marked as NOT clobbered even though + // its upper 64 bits are NOT preserved. The conservative approach handles this + // correctly at the cost of some missed optimizations on other targets. + // + // This is caused by how RegUnits are handled within TableGen. Ideally, Qn + // should have an extra RegUnit to model the "unknown" bits not covered by the + // subregs. + BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits()); + const unsigned NumRegs = TRI.getNumRegs(); + const unsigned MaskWords = (NumRegs + 31) / 32; + for (unsigned K = 0; K < MaskWords; ++K) { + const uint32_t Word = Mask[K]; + for (unsigned Bit = 0; Bit < 32; ++Bit) { + const unsigned PhysReg = (K * 32) + Bit; + if (PhysReg == NumRegs) + break; + + if (PhysReg && !((Word >> Bit) & 1)) { + for (MCRegUnitIterator RUI(PhysReg, &TRI); RUI.isValid(); ++RUI) + RUsFromRegsNotInMask.set(*RUI); + } + } + } + + RUs |= RUsFromRegsNotInMask; +} + /// Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. -void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, - BitVector &PhysRegClobbers, - SmallSet<int, 32> &StoredFIs, +void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &RUDefs, + BitVector &RUClobbers, + SmallDenseSet<int> &StoredFIs, SmallVectorImpl<CandidateInfo> &Candidates, MachineLoop *CurLoop) { bool RuledOut = false; @@ -448,7 +501,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, // We can't hoist an instruction defining a physreg that is clobbered in // the loop. if (MO.isRegMask()) { - PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); + applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, MO.getRegMask()); continue; } @@ -460,16 +513,22 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, assert(Reg.isPhysical() && "Not expecting virtual register!"); if (!MO.isDef()) { - if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) - // If it's using a non-loop-invariant register, then it's obviously not - // safe to hoist. - HasNonInvariantUse = true; + if (!HasNonInvariantUse) { + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { + // If it's using a non-loop-invariant register, then it's obviously + // not safe to hoist. + if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) { + HasNonInvariantUse = true; + break; + } + } + } continue; } if (MO.isImplicit()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - PhysRegClobbers.set(*AI); + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + RUClobbers.set(*RUI); if (!MO.isDead()) // Non-dead implicit def? This cannot be hoisted. RuledOut = true; @@ -488,19 +547,18 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, // If we have already seen another instruction that defines the same // register, then this is not safe. Two defs is indicated by setting a // PhysRegClobbers bit. - for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { - if (PhysRegDefs.test(*AS)) - PhysRegClobbers.set(*AS); + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { + if (RUDefs.test(*RUI)) { + RUClobbers.set(*RUI); + RuledOut = true; + } else if (RUClobbers.test(*RUI)) { + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; + } + + RUDefs.set(*RUI); } - // Need a second loop because MCRegAliasIterator can visit the same - // register twice. - for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) - PhysRegDefs.set(*AS); - - if (PhysRegClobbers.test(Reg)) - // MI defined register is seen defined by another instruction in - // the loop, it cannot be a LICM candidate. - RuledOut = true; } // Only consider reloads for now and remats which do not have register @@ -521,12 +579,12 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop, if (!Preheader) return; - unsigned NumRegs = TRI->getNumRegs(); - BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. - BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. + unsigned NumRegUnits = TRI->getNumRegUnits(); + BitVector RUDefs(NumRegUnits); // RUs defined once in the loop. + BitVector RUClobbers(NumRegUnits); // RUs defined more than once. SmallVector<CandidateInfo, 32> Candidates; - SmallSet<int, 32> StoredFIs; + SmallDenseSet<int> StoredFIs; // Walk the entire region, count number of defs for each register, and // collect potential LICM candidates. @@ -540,22 +598,21 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop, // FIXME: That means a reload that're reused in successor block(s) will not // be LICM'ed. for (const auto &LI : BB->liveins()) { - for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) - PhysRegDefs.set(*AI); + for (MCRegUnitIterator RUI(LI.PhysReg, TRI); RUI.isValid(); ++RUI) + RUDefs.set(*RUI); } // Funclet entry blocks will clobber all registers if (const uint32_t *Mask = BB->getBeginClobberMask(TRI)) - PhysRegClobbers.setBitsNotInMask(Mask); + applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, Mask); SpeculationState = SpeculateUnknown; for (MachineInstr &MI : *BB) - ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates, - CurLoop); + ProcessMI(&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop); } // Gather the registers read / clobbered by the terminator. - BitVector TermRegs(NumRegs); + BitVector TermRUs(NumRegUnits); MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); if (TI != Preheader->end()) { for (const MachineOperand &MO : TI->operands()) { @@ -564,8 +621,8 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop, Register Reg = MO.getReg(); if (!Reg) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - TermRegs.set(*AI); + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + TermRUs.set(*RUI); } } @@ -583,24 +640,36 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop, continue; unsigned Def = Candidate.Def; - if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { - bool Safe = true; - MachineInstr *MI = Candidate.MI; - for (const MachineOperand &MO : MI->all_uses()) { - if (!MO.getReg()) - continue; - Register Reg = MO.getReg(); - if (PhysRegDefs.test(Reg) || - PhysRegClobbers.test(Reg)) { + bool Safe = true; + for (MCRegUnitIterator RUI(Def, TRI); RUI.isValid(); ++RUI) { + if (RUClobbers.test(*RUI) || TermRUs.test(*RUI)) { + Safe = false; + break; + } + } + + if (!Safe) + continue; + + MachineInstr *MI = Candidate.MI; + for (const MachineOperand &MO : MI->all_uses()) { + if (!MO.getReg()) + continue; + for (MCRegUnitIterator RUI(MO.getReg(), TRI); RUI.isValid(); ++RUI) { + if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) { // If it's using a non-loop-invariant register, then it's obviously // not safe to hoist. Safe = false; break; } } - if (Safe) - HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader); + + if (!Safe) + break; } + + if (Safe) + HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader); } } @@ -1264,15 +1333,33 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI, // If we have a COPY with other uses in the loop, hoist to allow the users to // also be hoisted. - if (MI.isCopy() && MI.getOperand(0).isReg() && - MI.getOperand(0).getReg().isVirtual() && MI.getOperand(1).isReg() && - MI.getOperand(1).getReg().isVirtual() && - IsLoopInvariantInst(MI, CurLoop) && - any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()), - [&CurLoop](MachineInstr &UseMI) { - return CurLoop->contains(&UseMI); - })) - return true; + // TODO: Handle all isCopyLike? + if (MI.isCopy() || MI.isRegSequence()) { + Register DefReg = MI.getOperand(0).getReg(); + if (DefReg.isVirtual() && + all_of(MI.uses(), + [this](const MachineOperand &UseOp) { + return !UseOp.isReg() || UseOp.getReg().isVirtual() || + MRI->isConstantPhysReg(UseOp.getReg()); + }) && + IsLoopInvariantInst(MI, CurLoop) && + any_of(MRI->use_nodbg_instructions(DefReg), + [&CurLoop, this, DefReg, Cost](MachineInstr &UseMI) { + if (!CurLoop->contains(&UseMI)) + return false; + + // COPY is a cheap instruction, but if moving it won't cause + // high RP we're fine to hoist it even if the user can't be + // hoisted later Otherwise we want to check the user if it's + // hoistable + if (CanCauseHighRegPressure(Cost, false) && + !CurLoop->isLoopInvariant(UseMI, DefReg)) + return false; + + return true; + })) + return true; + } // High register pressure situation, only hoist if the instruction is going // to be remat'ed. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp index aa1eb7c35425..1f596cd1bd2e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp @@ -230,7 +230,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) { if (MI.modifiesRegister(Reg, TRI)) { MBBDefs.erase(Reg); MBBKills.erase(Reg); - } else if (MI.findRegisterUseOperandIdx(Reg, true /*isKill*/, TRI) != -1) + } else if (MI.findRegisterUseOperandIdx(Reg, TRI, true /*isKill*/) != -1) // Keep track of register kills. MBBKills[Reg] = &MI; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp index bdbc57099aa8..a03c008e6045 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -30,31 +30,59 @@ using namespace llvm; template class llvm::LoopBase<MachineBasicBlock, MachineLoop>; template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>; -char MachineLoopInfo::ID = 0; -MachineLoopInfo::MachineLoopInfo() : MachineFunctionPass(ID) { - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); +AnalysisKey MachineLoopAnalysis::Key; + +MachineLoopAnalysis::Result +MachineLoopAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + return MachineLoopInfo(MFAM.getResult<MachineDominatorTreeAnalysis>(MF)); +} + +PreservedAnalyses +MachineLoopPrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + OS << "Machine loop info for machine function '" << MF.getName() << "':\n"; + MFAM.getResult<MachineLoopAnalysis>(MF).print(OS); + return PreservedAnalyses::all(); +} + +char MachineLoopInfoWrapperPass::ID = 0; +MachineLoopInfoWrapperPass::MachineLoopInfoWrapperPass() + : MachineFunctionPass(ID) { + initializeMachineLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry()); } -INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops", - "Machine Natural Loop Construction", true, true) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops", - "Machine Natural Loop Construction", true, true) +INITIALIZE_PASS_BEGIN(MachineLoopInfoWrapperPass, "machine-loops", + "Machine Natural Loop Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_END(MachineLoopInfoWrapperPass, "machine-loops", + "Machine Natural Loop Construction", true, true) -char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; +char &llvm::MachineLoopInfoID = MachineLoopInfoWrapperPass::ID; -bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { - calculate(getAnalysis<MachineDominatorTree>()); +bool MachineLoopInfoWrapperPass::runOnMachineFunction(MachineFunction &) { + LI.calculate(getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()); return false; } +bool MachineLoopInfo::invalidate( + MachineFunction &, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker<MachineLoopAnalysis>(); + return !PAC.preserved() && + !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() && + !PAC.preservedSet<CFGAnalyses>(); +} + void MachineLoopInfo::calculate(MachineDominatorTree &MDT) { releaseMemory(); - LI.analyze(MDT.getBase()); + analyze(MDT.getBase()); } -void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { +void MachineLoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -198,7 +226,25 @@ MDNode *MachineLoop::getLoopID() const { return LoopID; } -bool MachineLoop::isLoopInvariant(MachineInstr &I) const { +bool MachineLoop::isLoopInvariantImplicitPhysReg(Register Reg) const { + MachineFunction *MF = getHeader()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + + if (MRI->isConstantPhysReg(Reg)) + return true; + + if (!MF->getSubtarget() + .getRegisterInfo() + ->shouldAnalyzePhysregInMachineLoopInfo(Reg)) + return false; + + return !llvm::any_of( + MRI->def_instructions(Reg), + [this](const MachineInstr &MI) { return this->contains(&MI); }); +} + +bool MachineLoop::isLoopInvariant(MachineInstr &I, + const Register ExcludeReg) const { MachineFunction *MF = I.getParent()->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); const TargetSubtargetInfo &ST = MF->getSubtarget(); @@ -213,6 +259,9 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const { Register Reg = MO.getReg(); if (Reg == 0) continue; + if (ExcludeReg == Reg) + continue; + // An instruction that uses or defines a physical register can't e.g. be // hoisted, so mark this as not invariant. if (Reg.isPhysical()) { @@ -222,7 +271,7 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const { // it could get allocated to something with a def during allocation. // However, if the physreg is known to always be caller saved/restored // then this use is safe to hoist. - if (!MRI->isConstantPhysReg(Reg) && + if (!isLoopInvariantImplicitPhysReg(Reg) && !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && !TII->isIgnorableUse(MO)) return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp index 921feb253d64..b950f4fdbcf7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -185,7 +185,7 @@ INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo", "Machine Module Information", false, false) char MachineModuleInfoWrapperPass::ID = 0; -static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr, +static uint64_t getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr, std::vector<const MDNode *> &LocInfos) { // Look up a LocInfo for the buffer this diagnostic is coming from. unsigned BufNum = SrcMgr.FindBufferContainingLoc(SMD.getLoc()); @@ -195,7 +195,7 @@ static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr, // If the inline asm had metadata associated with it, pull out a location // cookie corresponding to which line the error occurred on. - unsigned LocCookie = 0; + uint64_t LocCookie = 0; if (LocInfo) { unsigned ErrorLine = SMD.getLineNo() - 1; if (ErrorLine >= LocInfo->getNumOperands()) @@ -213,13 +213,12 @@ static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr, bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { MMI.initialize(); MMI.TheModule = &M; - // FIXME: Do this for new pass manager. LLVMContext &Ctx = M.getContext(); MMI.getContext().setDiagnosticHandler( [&Ctx, &M](const SMDiagnostic &SMD, bool IsInlineAsm, const SourceMgr &SrcMgr, std::vector<const MDNode *> &LocInfos) { - unsigned LocCookie = 0; + uint64_t LocCookie = 0; if (IsInlineAsm) LocCookie = getLocCookie(SMD, SrcMgr, LocInfos); Ctx.diagnose( @@ -237,11 +236,21 @@ bool MachineModuleInfoWrapperPass::doFinalization(Module &M) { AnalysisKey MachineModuleAnalysis::Key; -MachineModuleInfo MachineModuleAnalysis::run(Module &M, - ModuleAnalysisManager &) { - MachineModuleInfo MMI(TM); +MachineModuleAnalysis::Result +MachineModuleAnalysis::run(Module &M, ModuleAnalysisManager &) { MMI.TheModule = &M; - MMI.DbgInfoAvailable = !DisableDebugInfoPrinting && - !M.debug_compile_units().empty(); - return MMI; + LLVMContext &Ctx = M.getContext(); + MMI.getContext().setDiagnosticHandler( + [&Ctx, &M](const SMDiagnostic &SMD, bool IsInlineAsm, + const SourceMgr &SrcMgr, + std::vector<const MDNode *> &LocInfos) { + unsigned LocCookie = 0; + if (IsInlineAsm) + LocCookie = getLocCookie(SMD, SrcMgr, LocInfos); + Ctx.diagnose( + DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie)); + }); + MMI.DbgInfoAvailable = + !DisableDebugInfoPrinting && !M.debug_compile_units().empty(); + return Result(MMI); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp index 9c3b31935f6d..956317510dc7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; @@ -41,3 +42,20 @@ MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs( Map.clear(); return List; } + +using ExprStubPairTy = std::pair<MCSymbol *, const MCExpr *>; +static int SortAuthStubPair(const ExprStubPairTy *LHS, + const ExprStubPairTy *RHS) { + return LHS->first->getName().compare(RHS->first->getName()); +} + +MachineModuleInfoImpl::ExprStubListTy MachineModuleInfoImpl::getSortedExprStubs( + DenseMap<MCSymbol *, const MCExpr *> &ExprStubs) { + MachineModuleInfoImpl::ExprStubListTy List(ExprStubs.begin(), + ExprStubs.end()); + + array_pod_sort(List.begin(), List.end(), SortAuthStubPair); + + ExprStubs.clear(); + return List; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp index aa63411df965..965539ddaca8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/MachineModuleSlotTracker.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Module.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp index c7c0a1c20d57..ace05902d5df 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp @@ -1101,24 +1101,27 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, assert(getFailureOrdering() == FailureOrdering && "Value truncated"); } -MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, - uint64_t s, Align a, +MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags F, + LocationSize TS, Align BaseAlignment, const AAMDNodes &AAInfo, const MDNode *Ranges, SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) - : MachineMemOperand(ptrinfo, f, - s == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * s), a, - AAInfo, Ranges, SSID, Ordering, FailureOrdering) {} + : MachineMemOperand( + ptrinfo, F, + !TS.hasValue() ? LLT() + : TS.isScalable() + ? LLT::scalable_vector(1, 8 * TS.getValue().getKnownMinValue()) + : LLT::scalar(8 * TS.getValue().getKnownMinValue()), + BaseAlignment, AAInfo, Ranges, SSID, Ordering, FailureOrdering) {} void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { // The Value and Offset may differ due to CSE. But the flags and size // should be the same. assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); - assert((MMO->getSize() == ~UINT64_C(0) || getSize() == ~UINT64_C(0) || + assert((!MMO->getSize().hasValue() || !getSize().hasValue() || MMO->getSize() == getSize()) && "Size mismatch!"); - if (MMO->getBaseAlign() >= getBaseAlign()) { // Update the alignment value. BaseAlign = MMO->getBaseAlign(); @@ -1240,7 +1243,8 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, << "unknown-address"; } MachineOperand::printOperandOffset(OS, getOffset()); - if (getSize() > 0 && getAlign() != getSize()) + if (!getSize().hasValue() || + getAlign() != getSize().getValue().getKnownMinValue()) OS << ", align " << getAlign().value(); if (getAlign() != getBaseAlign()) OS << ", basealign " << getBaseAlign().value(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 1c31eba909e7..039f07f2e5e3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -31,6 +31,14 @@ DiagnosticInfoMIROptimization::MachineArgument::MachineArgument( /*SkipDebugLoc=*/true); } +bool MachineOptimizationRemarkEmitter::invalidate( + MachineFunction &MF, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &Inv) { + // This analysis has no state and so can be trivially preserved but it needs + // a fresh view of BFI if it was constructed with one. + return MBFI && Inv.invalidate<MachineBlockFrequencyAnalysis>(MF, PA); +} + std::optional<uint64_t> MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) { if (!MBFI) @@ -86,6 +94,18 @@ void MachineOptimizationRemarkEmitterPass::getAnalysisUsage( MachineFunctionPass::getAnalysisUsage(AU); } +AnalysisKey MachineOptimizationRemarkEmitterAnalysis::Key; + +MachineOptimizationRemarkEmitterAnalysis::Result +MachineOptimizationRemarkEmitterAnalysis::run( + MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { + MachineBlockFrequencyInfo *MBFI = + MF.getFunction().getContext().getDiagnosticsHotnessRequested() + ? &MFAM.getResult<MachineBlockFrequencyAnalysis>(MF) + : nullptr; + return Result(MF, MBFI); +} + char MachineOptimizationRemarkEmitterPass::ID = 0; static const char ore_name[] = "Machine Optimization Remark Emitter"; #define ORE_NAME "machine-opt-remark-emitter" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp index b8d3b2e30e6e..c7ccf10e12b1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp @@ -69,6 +69,7 @@ #include "llvm/IR/DIBuilder.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -121,6 +122,12 @@ static cl::opt<unsigned> OutlinerBenefitThreshold( cl::desc( "The minimum size in bytes before an outlining candidate is accepted")); +static cl::opt<bool> OutlinerLeafDescendants( + "outliner-leaf-descendants", cl::init(true), cl::Hidden, + cl::desc("Consider all leaf descendants of internal nodes of the suffix " + "tree as candidates for outlining (if false, only leaf children " + "are considered)")); + namespace { /// Maps \p MachineInstrs to unsigned integers and stores the mappings. @@ -576,7 +583,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) { void MachineOutliner::findCandidates( InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) { FunctionList.clear(); - SuffixTree ST(Mapper.UnsignedVec); + SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants); // First, find all of the repeated substrings in the tree of minimum length // 2. @@ -584,7 +591,7 @@ void MachineOutliner::findCandidates( LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n"); LLVM_DEBUG( dbgs() << "Searching for overlaps in all repeated sequences...\n"); - for (const SuffixTree::RepeatedSubstring &RS : ST) { + for (SuffixTree::RepeatedSubstring &RS : ST) { CandidatesForRepeatedSeq.clear(); unsigned StringLen = RS.Length; LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n"); @@ -593,6 +600,9 @@ void MachineOutliner::findCandidates( unsigned NumDiscarded = 0; unsigned NumKept = 0; #endif + // Sort the start indices so that we can efficiently check if candidates + // overlap with the ones we've already found for this sequence. + llvm::sort(RS.StartIndices); for (const unsigned &StartIdx : RS.StartIndices) { // Trick: Discard some candidates that would be incompatible with the // ones we've already found for this sequence. This will save us some @@ -616,17 +626,15 @@ void MachineOutliner::findCandidates( // * End before the other starts // * Start after the other ends unsigned EndIdx = StartIdx + StringLen - 1; - auto FirstOverlap = find_if( - CandidatesForRepeatedSeq, [StartIdx, EndIdx](const Candidate &C) { - return EndIdx >= C.getStartIdx() && StartIdx <= C.getEndIdx(); - }); - if (FirstOverlap != CandidatesForRepeatedSeq.end()) { + if (!CandidatesForRepeatedSeq.empty() && + StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) { #ifndef NDEBUG ++NumDiscarded; - LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx - << ", " << EndIdx << "]; overlaps with candidate @ [" - << FirstOverlap->getStartIdx() << ", " - << FirstOverlap->getEndIdx() << "]\n"); + LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx << ", " + << EndIdx << "]; overlaps with candidate @ [" + << CandidatesForRepeatedSeq.back().getStartIdx() + << ", " << CandidatesForRepeatedSeq.back().getEndIdx() + << "]\n"); #endif continue; } @@ -717,8 +725,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction( [](UWTableKind K, const outliner::Candidate &C) { return std::max(K, C.getMF()->getFunction().getUWTableKind()); }); - if (UW != UWTableKind::None) - F->setUWTableKind(UW); + F->setUWTableKind(UW); BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); @@ -759,7 +766,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction( MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs); MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness); - MF.getRegInfo().freezeReservedRegs(MF); + MF.getRegInfo().freezeReservedRegs(); // Compute live-in set for outlined fn const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -797,8 +804,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction( Mg.getNameWithPrefix(MangledNameStream, F, false); DISubprogram *OutlinedSP = DB.createFunction( - Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()), - Unit /* File */, + Unit /* Context */, F->getName(), StringRef(Dummy), Unit /* File */, 0 /* Line 0 is reserved for compiler-generated code. */, DB.createSubroutineType( DB.getOrCreateTypeArray(std::nullopt)), /* void type */ @@ -828,10 +834,12 @@ bool MachineOutliner::outline(Module &M, << "\n"); bool OutlinedSomething = false; - // Sort by benefit. The most beneficial functions should be outlined first. + // Sort by priority where priority := getNotOutlinedCost / getOutliningCost. + // The function with highest priority should be outlined first. stable_sort(FunctionList, [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) { - return LHS.getBenefit() > RHS.getBenefit(); + return LHS.getNotOutlinedCost() * RHS.getOutliningCost() > + RHS.getNotOutlinedCost() * LHS.getOutliningCost(); }); // Walk over each function, outlining them as we go along. Functions are diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp index 914e6b19fde9..6d540808d4cc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp @@ -12,100 +12,154 @@ #include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/PassManagerImpl.h" using namespace llvm; +AnalysisKey FunctionAnalysisManagerMachineFunctionProxy::Key; + namespace llvm { -template class AllAnalysesOn<MachineFunction>; template class AnalysisManager<MachineFunction>; template class PassManager<MachineFunction>; +template class InnerAnalysisManagerProxy<MachineFunctionAnalysisManager, + Module>; +template class InnerAnalysisManagerProxy<MachineFunctionAnalysisManager, + Function>; +template class OuterAnalysisManagerProxy<ModuleAnalysisManager, + MachineFunction>; +} // namespace llvm -Error MachineFunctionPassManager::run(Module &M, - MachineFunctionAnalysisManager &MFAM) { - // MachineModuleAnalysis is a module analysis pass that is never invalidated - // because we don't run any module pass in codegen pipeline. This is very - // important because the codegen state is stored in MMI which is the analysis - // result of MachineModuleAnalysis. MMI should not be recomputed. - auto &MMI = MFAM.getResult<MachineModuleAnalysis>(M); - - (void)RequireCodeGenSCCOrder; - assert(!RequireCodeGenSCCOrder && "not implemented"); - - // M is unused here - PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(M); - - // Add a PIC to verify machine functions. - if (VerifyMachineFunction) { - // No need to pop this callback later since MIR pipeline is flat which means - // current pipeline is the top-level pipeline. Callbacks are not used after - // current pipeline. - PI.pushBeforeNonSkippedPassCallback([&MFAM](StringRef PassID, Any IR) { - assert(llvm::any_cast<const MachineFunction *>(&IR)); - const MachineFunction *MF = llvm::any_cast<const MachineFunction *>(IR); - assert(MF && "Machine function should be valid for printing"); - std::string Banner = std::string("After ") + std::string(PassID); - verifyMachineFunction(&MFAM, Banner, *MF); - }); +bool FunctionAnalysisManagerMachineFunctionProxy::Result::invalidate( + MachineFunction &IR, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &Inv) { + // MachineFunction passes should not invalidate Function analyses. + // TODO: verify that PA doesn't invalidate Function analyses. + return false; +} + +template <> +bool MachineFunctionAnalysisManagerModuleProxy::Result::invalidate( + Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &Inv) { + // If literally everything is preserved, we're done. + if (PA.areAllPreserved()) + return false; // This is still a valid proxy. + + // If this proxy isn't marked as preserved, then even if the result remains + // valid, the key itself may no longer be valid, so we clear everything. + // + // Note that in order to preserve this proxy, a module pass must ensure that + // the MFAM has been completely updated to handle the deletion of functions. + // Specifically, any MFAM-cached results for those functions need to have been + // forcibly cleared. When preserved, this proxy will only invalidate results + // cached on functions *still in the module* at the end of the module pass. + auto PAC = PA.getChecker<MachineFunctionAnalysisManagerModuleProxy>(); + if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) { + InnerAM->clear(); + return true; } - for (auto &F : InitializationFuncs) { - if (auto Err = F(M, MFAM)) - return Err; + // FIXME: be more precise, see + // FunctionAnalysisManagerModuleProxy::Result::invalidate. + if (!PA.allAnalysesInSetPreserved<AllAnalysesOn<MachineFunction>>()) { + InnerAM->clear(); + return true; } - unsigned Idx = 0; - size_t Size = Passes.size(); - do { - // Run machine module passes - for (; MachineModulePasses.count(Idx) && Idx != Size; ++Idx) { - if (!PI.runBeforePass<Module>(*Passes[Idx], M)) - continue; - if (auto Err = MachineModulePasses.at(Idx)(M, MFAM)) - return Err; - PI.runAfterPass(*Passes[Idx], M, PreservedAnalyses::all()); - } - - // Finish running all passes. - if (Idx == Size) - break; - - // Run machine function passes - - // Get index range of machine function passes. - unsigned Begin = Idx; - for (; !MachineModulePasses.count(Idx) && Idx != Size; ++Idx) - ; - - for (Function &F : M) { - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (F.hasAvailableExternallyLinkage()) - continue; - - MachineFunction &MF = MMI.getOrCreateMachineFunction(F); - - for (unsigned I = Begin, E = Idx; I != E; ++I) { - auto *P = Passes[I].get(); - - if (!PI.runBeforePass<MachineFunction>(*P, MF)) - continue; - - // TODO: EmitSizeRemarks - PreservedAnalyses PassPA = P->run(MF, MFAM); - MFAM.invalidate(MF, PassPA); - PI.runAfterPass(*P, MF, PassPA); - } - } - } while (true); - - for (auto &F : FinalizationFuncs) { - if (auto Err = F(M, MFAM)) - return Err; + // Return false to indicate that this result is still a valid proxy. + return false; +} + +template <> +bool MachineFunctionAnalysisManagerFunctionProxy::Result::invalidate( + Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // If literally everything is preserved, we're done. + if (PA.areAllPreserved()) + return false; // This is still a valid proxy. + + // If this proxy isn't marked as preserved, then even if the result remains + // valid, the key itself may no longer be valid, so we clear everything. + // + // Note that in order to preserve this proxy, a module pass must ensure that + // the MFAM has been completely updated to handle the deletion of functions. + // Specifically, any MFAM-cached results for those functions need to have been + // forcibly cleared. When preserved, this proxy will only invalidate results + // cached on functions *still in the module* at the end of the module pass. + auto PAC = PA.getChecker<MachineFunctionAnalysisManagerFunctionProxy>(); + if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) { + InnerAM->clear(); + return true; + } + + // FIXME: be more precise, see + // FunctionAnalysisManagerModuleProxy::Result::invalidate. + if (!PA.allAnalysesInSetPreserved<AllAnalysesOn<MachineFunction>>()) { + InnerAM->clear(); + return true; } - return Error::success(); + // Return false to indicate that this result is still a valid proxy. + return false; } -} // namespace llvm +PreservedAnalyses +FunctionToMachineFunctionPassAdaptor::run(Function &F, + FunctionAnalysisManager &FAM) { + MachineFunctionAnalysisManager &MFAM = + FAM.getResult<MachineFunctionAnalysisManagerFunctionProxy>(F) + .getManager(); + PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F); + PreservedAnalyses PA = PreservedAnalyses::all(); + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.isDeclaration() || F.hasAvailableExternallyLinkage()) + return PreservedAnalyses::all(); + + MachineFunction &MF = FAM.getResult<MachineFunctionAnalysis>(F).getMF(); + + if (!PI.runBeforePass<MachineFunction>(*Pass, MF)) + return PreservedAnalyses::all(); + PreservedAnalyses PassPA = Pass->run(MF, MFAM); + MFAM.invalidate(MF, PassPA); + PI.runAfterPass(*Pass, MF, PassPA); + PA.intersect(std::move(PassPA)); + + return PA; +} + +void FunctionToMachineFunctionPassAdaptor::printPipeline( + raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { + OS << "machine-function("; + Pass->printPipeline(OS, MapClassName2PassName); + OS << ')'; +} + +template <> +PreservedAnalyses +PassManager<MachineFunction>::run(MachineFunction &MF, + AnalysisManager<MachineFunction> &MFAM) { + PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(MF); + PreservedAnalyses PA = PreservedAnalyses::all(); + for (auto &Pass : Passes) { + if (!PI.runBeforePass<MachineFunction>(*Pass, MF)) + continue; + + PreservedAnalyses PassPA = Pass->run(MF, MFAM); + MFAM.invalidate(MF, PassPA); + PI.runAfterPass(*Pass, MF, PassPA); + PA.intersect(std::move(PassPA)); + } + return PA; +} + +PreservedAnalyses llvm::getMachineFunctionPassPreservedAnalyses() { + PreservedAnalyses PA; + // Machine function passes are not allowed to modify the LLVM + // representation, therefore we should preserve all IR analyses. + PA.template preserveSet<AllAnalysesOn<Module>>(); + PA.template preserveSet<AllAnalysesOn<Function>>(); + return PA; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp index 2d2d0bffe216..497e282bb976 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -68,6 +68,7 @@ #include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" @@ -192,6 +193,10 @@ static cl::opt<int> cl::desc("Margin representing the unused percentage of " "the register pressure limit")); +static cl::opt<bool> + MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), + cl::desc("Use the MVE code generator for software pipelining")); + namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. @@ -206,6 +211,17 @@ cl::opt<int> SwpForceIssueWidth( cl::desc("Force pipeliner to use specified issue width."), cl::Hidden, cl::init(-1)); +/// A command line argument to set the window scheduling option. +cl::opt<WindowSchedulingFlag> WindowSchedulingOption( + "window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), + cl::desc("Set how to use window scheduling algorithm."), + cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", + "Turn off window algorithm."), + clEnumValN(WindowSchedulingFlag::WS_On, "on", + "Use window algorithm after SMS algorithm fails."), + clEnumValN(WindowSchedulingFlag::WS_Force, "force", + "Use window algorithm instead of SMS algorithm."))); + } // end namespace llvm unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5; @@ -218,9 +234,9 @@ char &llvm::MachinePipelinerID = MachinePipeliner::ID; INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) @@ -247,8 +263,8 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { return false; MF = &mf; - MLI = &getAnalysis<MachineLoopInfo>(); - MDT = &getAnalysis<MachineDominatorTree>(); + MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); + MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); TII = MF->getSubtarget().getInstrInfo(); RegClassInfo.runOnMachineFunction(*MF); @@ -292,8 +308,11 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { } ++NumTrytoPipeline; + if (useSwingModuloScheduler()) + Changed = swingModuloScheduler(L); - Changed = swingModuloScheduler(L); + if (useWindowScheduler(Changed)) + Changed = runWindowScheduler(L); LI.LoopPipelinerInfo.reset(); return Changed; @@ -324,8 +343,8 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { assert(LoopID->getNumOperands() > 0 && "requires atleast one operand"); assert(LoopID->getOperand(0) == LoopID && "invalid loop"); - for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { - MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) { + MDNode *MD = dyn_cast<MDNode>(MDO); if (MD == nullptr) continue; @@ -418,7 +437,8 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { MachineRegisterInfo &MRI = MF->getRegInfo(); - SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes(); + SlotIndexes &Slots = + *getAnalysis<LiveIntervalsWrapperPass>().getLIS().getSlotIndexes(); for (MachineInstr &PI : B.phis()) { MachineOperand &DefOp = PI.getOperand(0); @@ -453,8 +473,9 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { assert(L.getBlocks().size() == 1 && "SMS works on single blocks only."); - SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo, - II_setByPragma, LI.LoopPipelinerInfo.get()); + SwingSchedulerDAG SMS( + *this, L, getAnalysis<LiveIntervalsWrapperPass>().getLIS(), RegClassInfo, + II_setByPragma, LI.LoopPipelinerInfo.get()); MachineBasicBlock *MBB = L.getHeader(); // The kernel should not include any terminator instructions. These @@ -480,13 +501,39 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<AAResultsWrapperPass>(); - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<LiveIntervals>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addRequired<LiveIntervalsWrapperPass>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); + AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } +bool MachinePipeliner::runWindowScheduler(MachineLoop &L) { + MachineSchedContext Context; + Context.MF = MF; + Context.MLI = MLI; + Context.MDT = MDT; + Context.PassConfig = &getAnalysis<TargetPassConfig>(); + Context.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + Context.LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); + Context.RegClassInfo->runOnMachineFunction(*MF); + WindowScheduler WS(&Context, L); + return WS.run(); +} + +bool MachinePipeliner::useSwingModuloScheduler() { + // SwingModuloScheduler does not work when WindowScheduler is forced. + return WindowSchedulingOption != WindowSchedulingFlag::WS_Force; +} + +bool MachinePipeliner::useWindowScheduler(bool Changed) { + // WindowScheduler does not work when it is off or when SwingModuloScheduler + // is successfully scheduled. + return WindowSchedulingOption == WindowSchedulingFlag::WS_Force || + (WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed); +} + void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) { if (SwpForceII > 0) MII = SwpForceII; @@ -677,6 +724,11 @@ void SwingSchedulerDAG::schedule() { if (ExperimentalCodeGen && NewInstrChanges.empty()) { PeelingModuloScheduleExpander MSE(MF, MS, &LIS); MSE.expand(); + } else if (MVECodeGen && NewInstrChanges.empty() && + LoopPipelinerInfo->isMVEExpanderSupported() && + ModuloScheduleExpanderMVE::canApply(Loop)) { + ModuloScheduleExpanderMVE MSE(MF, MS, LIS); + MSE.expand(); } else { ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges)); MSE.expand(); @@ -768,7 +820,6 @@ static void getUnderlyingObjects(const MachineInstr *MI, Objs.clear(); return; } - Objs.push_back(V); } } @@ -920,7 +971,8 @@ void SwingSchedulerDAG::updatePhiDependences() { if (!MI->isPHI()) { SDep Dep(SU, SDep::Data, Reg); Dep.setLatency(0); - ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep); + ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep, + &SchedModel); I.addPred(Dep); } else { HasPhiUse = Reg; @@ -947,8 +999,8 @@ void SwingSchedulerDAG::updatePhiDependences() { RemoveDeps.push_back(PI); } } - for (int i = 0, e = RemoveDeps.size(); i != e; ++i) - I.removePred(RemoveDeps[i]); + for (const SDep &D : RemoveDeps) + I.removePred(D); } } @@ -989,18 +1041,18 @@ void SwingSchedulerDAG::changeDependences() { for (const SDep &P : I.Preds) if (P.getSUnit() == DefSU) Deps.push_back(P); - for (int i = 0, e = Deps.size(); i != e; i++) { - Topo.RemovePred(&I, Deps[i].getSUnit()); - I.removePred(Deps[i]); + for (const SDep &D : Deps) { + Topo.RemovePred(&I, D.getSUnit()); + I.removePred(D); } // Remove the chain dependence between the instructions. Deps.clear(); for (auto &P : LastSU->Preds) if (P.getSUnit() == &I && P.getKind() == SDep::Order) Deps.push_back(P); - for (int i = 0, e = Deps.size(); i != e; i++) { - Topo.RemovePred(LastSU, Deps[i].getSUnit()); - LastSU->removePred(Deps[i]); + for (const SDep &D : Deps) { + Topo.RemovePred(LastSU, D.getSUnit()); + LastSU->removePred(D); } // Add a dependence between the new instruction and the instruction @@ -1248,7 +1300,7 @@ private: for (auto &MI : *OrigMBB) { if (MI.isDebugInstr()) continue; - for (auto Use : ROMap[&MI].Uses) { + for (auto &Use : ROMap[&MI].Uses) { auto Reg = Use.RegUnit; // Ignore the variable that appears only on one side of phi instruction // because it's used only at the first iteration. @@ -1269,7 +1321,7 @@ private: // Calculate the upper limit of each pressure set void computePressureSetLimit(const RegisterClassInfo &RCI) { for (unsigned PSet = 0; PSet < PSetNum; PSet++) - PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet); + PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet); // We assume fixed registers, such as stack pointer, are already in use. // Therefore subtracting the weight of the fixed registers from the limit of @@ -1335,7 +1387,7 @@ private: Register Reg = getLoopPhiReg(*MI, OrigMBB); UpdateTargetRegs(Reg); } else { - for (auto Use : ROMap.find(MI)->getSecond().Uses) + for (auto &Use : ROMap.find(MI)->getSecond().Uses) UpdateTargetRegs(Use.RegUnit); } } @@ -1346,7 +1398,7 @@ private: DenseMap<Register, MachineInstr *> LastUseMI; for (MachineInstr *MI : llvm::reverse(OrderedInsts)) { - for (auto Use : ROMap.find(MI)->getSecond().Uses) { + for (auto &Use : ROMap.find(MI)->getSecond().Uses) { auto Reg = Use.RegUnit; if (!TargetRegs.contains(Reg)) continue; @@ -1439,7 +1491,7 @@ private: const unsigned Iter = I - Stage; - for (auto Def : ROMap.find(MI)->getSecond().Defs) + for (auto &Def : ROMap.find(MI)->getSecond().Defs) InsertReg(LiveRegSets[Iter], Def.RegUnit); for (auto LastUse : LastUses[MI]) { @@ -2411,47 +2463,43 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { // upon the scheduled time for any predecessors/successors. int EarlyStart = INT_MIN; int LateStart = INT_MAX; - // These values are set when the size of the schedule window is limited - // due to chain dependences. - int SchedEnd = INT_MAX; - int SchedStart = INT_MIN; - Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart, - II, this); + Schedule.computeStart(SU, &EarlyStart, &LateStart, II, this); LLVM_DEBUG({ dbgs() << "\n"; dbgs() << "Inst (" << SU->NodeNum << ") "; SU->getInstr()->dump(); dbgs() << "\n"; }); - LLVM_DEBUG({ - dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart, - LateStart, SchedEnd, SchedStart); - }); + LLVM_DEBUG( + dbgs() << format("\tes: %8x ls: %8x\n", EarlyStart, LateStart)); - if (EarlyStart > LateStart || SchedEnd < EarlyStart || - SchedStart > LateStart) + if (EarlyStart > LateStart) scheduleFound = false; - else if (EarlyStart != INT_MIN && LateStart == INT_MAX) { - SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1); - scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II); - } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) { - SchedStart = std::max(SchedStart, LateStart - (int)II + 1); - scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II); - } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) { - SchedEnd = - std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1)); - // When scheduling a Phi it is better to start at the late cycle and go - // backwards. The default order may insert the Phi too far away from - // its first dependence. - if (SU->getInstr()->isPHI()) - scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II); + else if (EarlyStart != INT_MIN && LateStart == INT_MAX) + scheduleFound = + Schedule.insert(SU, EarlyStart, EarlyStart + (int)II - 1, II); + else if (EarlyStart == INT_MIN && LateStart != INT_MAX) + scheduleFound = + Schedule.insert(SU, LateStart, LateStart - (int)II + 1, II); + else if (EarlyStart != INT_MIN && LateStart != INT_MAX) { + LateStart = std::min(LateStart, EarlyStart + (int)II - 1); + // When scheduling a Phi it is better to start at the late cycle and + // go backwards. The default order may insert the Phi too far away + // from its first dependence. + // Also, do backward search when all scheduled predecessors are + // loop-carried output/order dependencies. Empirically, there are also + // cases where scheduling becomes possible with backward search. + if (SU->getInstr()->isPHI() || + Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this)) + scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II); else - scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II); + scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II); } else { int FirstCycle = Schedule.getFirstCycle(); scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU), FirstCycle + getASAP(SU) + II - 1, II); } + // Even if we find a schedule, make sure the schedule doesn't exceed the // allowable number of stages. We keep trying if this happens. if (scheduleFound) @@ -2733,19 +2781,20 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D)) return true; - uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize(); - uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize(); + LocationSize AccessSizeS = (*SI->memoperands_begin())->getSize(); + LocationSize AccessSizeD = (*DI->memoperands_begin())->getSize(); // This is the main test, which checks the offset values and the loop // increment value to determine if the accesses may be loop carried. - if (AccessSizeS == MemoryLocation::UnknownSize || - AccessSizeD == MemoryLocation::UnknownSize) + if (!AccessSizeS.hasValue() || !AccessSizeD.hasValue()) return true; - if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD) + if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue() || + DeltaD < AccessSizeD.getValue()) return true; - return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD); + return (OffsetS + (int64_t)AccessSizeS.getValue() < + OffsetD + (int64_t)AccessSizeD.getValue()); } void SwingSchedulerDAG::postProcessDAG() { @@ -2858,8 +2907,7 @@ static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) { /// Compute the scheduling start slot for the instruction. The start slot /// depends on any predecessor or successor nodes scheduled already. void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, - int *MinEnd, int *MaxStart, int II, - SwingSchedulerDAG *DAG) { + int II, SwingSchedulerDAG *DAG) { // Iterate over each instruction that has been scheduled already. The start // slot computation depends on whether the previously scheduled instruction // is a predecessor or successor of the specified instruction. @@ -2878,7 +2926,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); if (DAG->isLoopCarriedDep(SU, Dep, false)) { int End = earliestCycleInChain(Dep) + (II - 1); - *MinEnd = std::min(*MinEnd, End); + *MinLateStart = std::min(*MinLateStart, End); } } else { int LateStart = cycle - Dep.getLatency() + @@ -2902,7 +2950,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, *MinLateStart = std::min(*MinLateStart, LateStart); if (DAG->isLoopCarriedDep(SU, Dep)) { int Start = latestCycleInChain(Dep) + 1 - II; - *MaxStart = std::max(*MaxStart, Start); + *MaxEarlyStart = std::max(*MaxEarlyStart, Start); } } else { int EarlyStart = cycle + Dep.getLatency() - @@ -3095,6 +3143,19 @@ bool SMSchedule::isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, return false; } +/// Return true if all scheduled predecessors are loop-carried output/order +/// dependencies. +bool SMSchedule::onlyHasLoopCarriedOutputOrOrderPreds( + SUnit *SU, SwingSchedulerDAG *DAG) const { + for (const SDep &Pred : SU->Preds) + if (InstrToCycle.count(Pred.getSUnit()) && !DAG->isBackedge(SU, Pred)) + return false; + for (const SDep &Succ : SU->Succs) + if (InstrToCycle.count(Succ.getSUnit()) && DAG->isBackedge(SU, Succ)) + return false; + return true; +} + /// Determine transitive dependences of unpipelineable instructions SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes( SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp index fb96d0efa4d4..51637130addc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp @@ -13,67 +13,108 @@ #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/GenericDomTreeConstruction.h" using namespace llvm; namespace llvm { template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTreeBase +namespace DomTreeBuilder { + +template void Calculate<MBBPostDomTree>(MBBPostDomTree &DT); +template void InsertEdge<MBBPostDomTree>(MBBPostDomTree &DT, + MachineBasicBlock *From, + MachineBasicBlock *To); +template void DeleteEdge<MBBPostDomTree>(MBBPostDomTree &DT, + MachineBasicBlock *From, + MachineBasicBlock *To); +template void ApplyUpdates<MBBPostDomTree>(MBBPostDomTree &DT, + MBBPostDomTreeGraphDiff &, + MBBPostDomTreeGraphDiff *); +template bool Verify<MBBPostDomTree>(const MBBPostDomTree &DT, + MBBPostDomTree::VerificationLevel VL); + +} // namespace DomTreeBuilder extern bool VerifyMachineDomInfo; } // namespace llvm -char MachinePostDominatorTree::ID = 0; +AnalysisKey MachinePostDominatorTreeAnalysis::Key; -//declare initializeMachinePostDominatorTreePass -INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", - "MachinePostDominator Tree Construction", true, true) +MachinePostDominatorTreeAnalysis::Result +MachinePostDominatorTreeAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + return MachinePostDominatorTree(MF); +} -MachinePostDominatorTree::MachinePostDominatorTree() - : MachineFunctionPass(ID), PDT(nullptr) { - initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); +PreservedAnalyses +MachinePostDominatorTreePrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + OS << "MachinePostDominatorTree for machine function: " << MF.getName() + << '\n'; + MFAM.getResult<MachinePostDominatorTreeAnalysis>(MF).print(OS); + return PreservedAnalyses::all(); } -FunctionPass *MachinePostDominatorTree::createMachinePostDominatorTreePass() { - return new MachinePostDominatorTree(); +char MachinePostDominatorTreeWrapperPass::ID = 0; + +//declare initializeMachinePostDominatorTreePass +INITIALIZE_PASS(MachinePostDominatorTreeWrapperPass, "machinepostdomtree", + "MachinePostDominator Tree Construction", true, true) + +MachinePostDominatorTreeWrapperPass::MachinePostDominatorTreeWrapperPass() + : MachineFunctionPass(ID), PDT() { + initializeMachinePostDominatorTreeWrapperPassPass( + *PassRegistry::getPassRegistry()); } -bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { - PDT = std::make_unique<PostDomTreeT>(); +bool MachinePostDominatorTreeWrapperPass::runOnMachineFunction( + MachineFunction &F) { + PDT = MachinePostDominatorTree(); PDT->recalculate(F); return false; } -void MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { +void MachinePostDominatorTreeWrapperPass::getAnalysisUsage( + AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } +bool MachinePostDominatorTree::invalidate( + MachineFunction &, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on machine functions, or the + // machine function's CFG have been preserved. + auto PAC = PA.getChecker<MachinePostDominatorTreeAnalysis>(); + return !PAC.preserved() && + !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() && + !PAC.preservedSet<CFGAnalyses>(); +} + MachineBasicBlock *MachinePostDominatorTree::findNearestCommonDominator( ArrayRef<MachineBasicBlock *> Blocks) const { assert(!Blocks.empty()); MachineBasicBlock *NCD = Blocks.front(); for (MachineBasicBlock *BB : Blocks.drop_front()) { - NCD = PDT->findNearestCommonDominator(NCD, BB); + NCD = Base::findNearestCommonDominator(NCD, BB); // Stop when the root is reached. - if (PDT->isVirtualRoot(PDT->getNode(NCD))) + if (isVirtualRoot(getNode(NCD))) return nullptr; } return NCD; } -void MachinePostDominatorTree::verifyAnalysis() const { - if (PDT && VerifyMachineDomInfo) - if (!PDT->verify(PostDomTreeT::VerificationLevel::Basic)) { - errs() << "MachinePostDominatorTree verification failed\n"; - - abort(); - } +void MachinePostDominatorTreeWrapperPass::verifyAnalysis() const { + if (VerifyMachineDomInfo && PDT && + !PDT->verify(MachinePostDominatorTree::VerificationLevel::Basic)) + report_fatal_error("MachinePostDominatorTree verification failed!"); } -void MachinePostDominatorTree::print(llvm::raw_ostream &OS, - const Module *M) const { +void MachinePostDominatorTreeWrapperPass::print(llvm::raw_ostream &OS, + const Module *M) const { PDT->print(OS); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp index 45cdcbfeab9f..f8268b8894ca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp @@ -84,8 +84,9 @@ MachineRegionInfoPass::~MachineRegionInfoPass() = default; bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) { releaseMemory(); - auto DT = &getAnalysis<MachineDominatorTree>(); - auto PDT = &getAnalysis<MachinePostDominatorTree>(); + auto DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + auto PDT = + &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(); auto DF = &getAnalysis<MachineDominanceFrontier>(); RI.recalculate(F, DT, PDT, DF); @@ -109,8 +110,8 @@ void MachineRegionInfoPass::verifyAnalysis() const { void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachinePostDominatorTree>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachinePostDominatorTreeWrapperPass>(); AU.addRequired<MachineDominanceFrontier>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -130,8 +131,8 @@ char &MachineRegionInfoPassID = MachineRegionInfoPass::ID; INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, DEBUG_TYPE, "Detect single entry single exit regions", true, true) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE, "Detect single entry single exit regions", true, true) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 087604af6a71..3caa96cd5e55 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -41,8 +41,10 @@ static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden, void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) - : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() && - EnableSubRegLiveness) { + : MF(MF), + TracksSubRegLiveness(EnableSubRegLiveness.getNumOccurrences() + ? EnableSubRegLiveness + : MF->getSubtarget().enableSubRegLiveness()) { unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); VRegInfo.reserve(256); RegAllocHints.reserve(256); @@ -167,6 +169,15 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, return Reg; } +Register MachineRegisterInfo::createVirtualRegister(VRegAttrs RegAttr, + StringRef Name) { + Register Reg = createIncompleteVirtualRegister(Name); + VRegInfo[Reg].first = RegAttr.RCOrRB; + setType(Reg, RegAttr.Ty); + noteNewVirtualRegister(Reg); + return Reg; +} + Register MachineRegisterInfo::cloneVirtualRegister(Register VReg, StringRef Name) { Register Reg = createIncompleteVirtualRegister(Name); @@ -508,8 +519,8 @@ LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(Register Reg) const { } #endif -void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { - ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF); +void MachineRegisterInfo::freezeReservedRegs() { + ReservedRegs = getTargetRegisterInfo()->getReservedRegs(*MF); assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() && "Invalid ReservedRegs vector from target"); } @@ -650,18 +661,3 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const { } return false; } - -bool MachineRegisterInfo::isArgumentRegister(const MachineFunction &MF, - MCRegister Reg) const { - return getTargetRegisterInfo()->isArgumentRegister(MF, Reg); -} - -bool MachineRegisterInfo::isFixedRegister(const MachineFunction &MF, - MCRegister Reg) const { - return getTargetRegisterInfo()->isFixedRegister(MF, Reg); -} - -bool MachineRegisterInfo::isGeneralPurposeRegister(const MachineFunction &MF, - MCRegister Reg) const { - return getTargetRegisterInfo()->isGeneralPurposeRegister(MF, Reg); -} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 48076663ddf5..4cbb6ad3128b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -51,17 +51,13 @@ MachineSSAUpdater::~MachineSSAUpdater() { /// Initialize - Reset this object to get ready for a new set of SSA /// updates. -void MachineSSAUpdater::Initialize(const TargetRegisterClass *RC) { +void MachineSSAUpdater::Initialize(Register V) { if (!AV) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); - VRC = RC; -} - -void MachineSSAUpdater::Initialize(Register V) { - Initialize(MRI->getRegClass(V)); + RegAttrs = MRI->getVRegAttrs(V); } /// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for @@ -93,8 +89,8 @@ Register LookForIdenticalPHI(MachineBasicBlock *BB, return Register(); AvailableValsTy AVals; - for (unsigned i = 0, e = PredValues.size(); i != e; ++i) - AVals[PredValues[i].first] = PredValues[i].second; + for (const auto &[SrcBB, SrcReg] : PredValues) + AVals[SrcBB] = SrcReg; while (I != BB->end() && I->isPHI()) { bool Same = true; for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { @@ -115,13 +111,12 @@ Register LookForIdenticalPHI(MachineBasicBlock *BB, /// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define /// a value of the given register class at the start of the specified basic /// block. It returns the virtual register defined by the instruction. -static -MachineInstrBuilder InsertNewDef(unsigned Opcode, - MachineBasicBlock *BB, MachineBasicBlock::iterator I, - const TargetRegisterClass *RC, - MachineRegisterInfo *MRI, - const TargetInstrInfo *TII) { - Register NewVR = MRI->createVirtualRegister(RC); +static MachineInstrBuilder InsertNewDef(unsigned Opcode, MachineBasicBlock *BB, + MachineBasicBlock::iterator I, + MachineRegisterInfo::VRegAttrs RegAttrs, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + Register NewVR = MRI->createVirtualRegister(RegAttrs); return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); } @@ -158,9 +153,9 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB, if (ExistingValueOnly) return Register(); // Insert an implicit_def to represent an undef value. - MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, - BB, BB->getFirstTerminator(), - VRC, MRI, TII); + MachineInstr *NewDef = + InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstTerminator(), + RegAttrs, MRI, TII); return NewDef->getOperand(0).getReg(); } @@ -197,12 +192,12 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB, // Otherwise, we do need a PHI: insert one now. MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); - MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, - Loc, VRC, MRI, TII); + MachineInstrBuilder InsertedPHI = + InsertNewDef(TargetOpcode::PHI, BB, Loc, RegAttrs, MRI, TII); // Fill in all the predecessors of the PHI. - for (unsigned i = 0, e = PredValues.size(); i != e; ++i) - InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first); + for (const auto &[SrcBB, SrcReg] : PredValues) + InsertedPHI.addReg(SrcReg).addMBB(SrcBB); // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. @@ -214,7 +209,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB, // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI); return InsertedPHI.getReg(0); } @@ -241,6 +236,22 @@ void MachineSSAUpdater::RewriteUse(MachineOperand &U) { NewVR = GetValueInMiddleOfBlock(UseMI->getParent()); } + // Insert a COPY if needed to satisfy register class constraints for the using + // MO. Or, if possible, just constrain the class for NewVR to avoid the need + // for a COPY. + if (NewVR) { + const TargetRegisterClass *UseRC = + dyn_cast_or_null<const TargetRegisterClass *>(RegAttrs.RCOrRB); + if (UseRC && !MRI->constrainRegClass(NewVR, UseRC)) { + MachineBasicBlock *UseBB = UseMI->getParent(); + MachineInstr *InsertedCopy = + InsertNewDef(TargetOpcode::COPY, UseBB, UseBB->getFirstNonPHI(), + RegAttrs, MRI, TII) + .addReg(NewVR); + NewVR = InsertedCopy->getOperand(0).getReg(); + LLVM_DEBUG(dbgs() << " Inserted COPY: " << *InsertedCopy); + } + } U.setReg(NewVR); } @@ -295,15 +306,14 @@ public: append_range(*Preds, BB->predecessors()); } - /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register. + /// GetPoisonVal - Create an IMPLICIT_DEF instruction with a new register. /// Add it into the specified block and return the register. - static Register GetUndefVal(MachineBasicBlock *BB, + static Register GetPoisonVal(MachineBasicBlock *BB, MachineSSAUpdater *Updater) { - // Insert an implicit_def to represent an undef value. - MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, - BB, BB->getFirstNonPHI(), - Updater->VRC, Updater->MRI, - Updater->TII); + // Insert an implicit_def to represent a poison value. + MachineInstr *NewDef = + InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstNonPHI(), + Updater->RegAttrs, Updater->MRI, Updater->TII); return NewDef->getOperand(0).getReg(); } @@ -312,9 +322,9 @@ public: static Register CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, MachineSSAUpdater *Updater) { MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); - MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, - Updater->VRC, Updater->MRI, - Updater->TII); + MachineInstr *PHI = + InsertNewDef(TargetOpcode::PHI, BB, Loc, Updater->RegAttrs, + Updater->MRI, Updater->TII); return PHI->getOperand(0).getReg(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp index f40e91819a48..a8a17101b9c9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" @@ -48,6 +47,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Config/llvm-config.h" #include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" @@ -81,6 +81,26 @@ cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, cl::desc("Force top-down list scheduling")); cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, cl::desc("Force bottom-up list scheduling")); +namespace MISchedPostRASched { +enum Direction { + TopDown, + BottomUp, + Bidirectional, +}; +} // end namespace MISchedPostRASched +cl::opt<MISchedPostRASched::Direction> PostRADirection( + "misched-postra-direction", cl::Hidden, + cl::desc("Post reg-alloc list scheduling direction"), + // Default to top-down because it was implemented first and existing targets + // expect that behavior by default. + cl::init(MISchedPostRASched::TopDown), + cl::values( + clEnumValN(MISchedPostRASched::TopDown, "topdown", + "Force top-down post reg-alloc list scheduling"), + clEnumValN(MISchedPostRASched::BottomUp, "bottomup", + "Force bottom-up post reg-alloc list scheduling"), + clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional", + "Force bidirectional post reg-alloc list scheduling"))); cl::opt<bool> DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout")); @@ -246,10 +266,10 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID; INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) @@ -259,14 +279,14 @@ MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) { void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<TargetPassConfig>(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); + AU.addRequired<SlotIndexesWrapperPass>(); + AU.addPreserved<SlotIndexesWrapperPass>(); + AU.addRequired<LiveIntervalsWrapperPass>(); + AU.addPreserved<LiveIntervalsWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -276,8 +296,8 @@ char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) @@ -288,8 +308,8 @@ PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -424,12 +444,12 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Initialize the context of the pass. MF = &mf; - MLI = &getAnalysis<MachineLoopInfo>(); - MDT = &getAnalysis<MachineDominatorTree>(); + MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); + MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); PassConfig = &getAnalysis<TargetPassConfig>(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - LIS = &getAnalysis<LiveIntervals>(); + LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); if (VerifyScheduling) { LLVM_DEBUG(LIS->dump()); @@ -440,6 +460,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); + ScheduleDAGMI::DumpDirection D; + if (ForceTopDown) + D = ScheduleDAGMI::DumpDirection::TopDown; + else if (ForceBottomUp) + D = ScheduleDAGMI::DumpDirection::BottomUp; + else + D = ScheduleDAGMI::DumpDirection::Bidirectional; + Scheduler->setDumpDirection(D); scheduleRegions(*Scheduler, false); LLVM_DEBUG(LIS->dump()); @@ -463,7 +491,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Initialize the context of the pass. MF = &mf; - MLI = &getAnalysis<MachineLoopInfo>(); + MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); PassConfig = &getAnalysis<TargetPassConfig>(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); @@ -473,6 +501,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); + ScheduleDAGMI::DumpDirection D; + if (PostRADirection == MISchedPostRASched::TopDown) + D = ScheduleDAGMI::DumpDirection::TopDown; + else if (PostRADirection == MISchedPostRASched::BottomUp) + D = ScheduleDAGMI::DumpDirection::BottomUp; + else + D = ScheduleDAGMI::DumpDirection::Bidirectional; + Scheduler->setDumpDirection(D); scheduleRegions(*Scheduler, true); if (VerifyScheduling) @@ -1125,12 +1161,14 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { if (MISchedDumpScheduleTrace) { - if (ForceTopDown) + if (DumpDir == DumpDirection::TopDown) dumpScheduleTraceTopDown(); - else if (ForceBottomUp) + else if (DumpDir == DumpDirection::BottomUp) dumpScheduleTraceBottomUp(); - else { + else if (DumpDir == DumpDirection::Bidirectional) { dbgs() << "* Schedule table (Bidirectional): not implemented\n"; + } else { + dbgs() << "* Schedule table: DumpDirection not set.\n"; } } @@ -1626,7 +1664,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { if (ShouldTrackPressure) { // Update top scheduled pressure. RegisterOperands RegOpers; - RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); + RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, + /*IgnoreDead=*/false); if (ShouldTrackLaneMasks) { // Adjust liveness and add missing dead+read-undef flags. SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); @@ -1660,7 +1699,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { } if (ShouldTrackPressure) { RegisterOperands RegOpers; - RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); + RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, + /*IgnoreDead=*/false); if (ShouldTrackLaneMasks) { // Adjust liveness and add missing dead+read-undef flags. SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); @@ -1697,11 +1737,11 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { SUnit *SU; SmallVector<const MachineOperand *, 4> BaseOps; int64_t Offset; - unsigned Width; + LocationSize Width; bool OffsetIsScalable; MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps, - int64_t Offset, bool OffsetIsScalable, unsigned Width) + int64_t Offset, bool OffsetIsScalable, LocationSize Width) : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset), Width(Width), OffsetIsScalable(OffsetIsScalable) {} @@ -1834,11 +1874,12 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( auto MemOpb = MemOpRecords[NextIdx]; unsigned ClusterLength = 2; - unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width; + unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() + + MemOpb.Width.getValue().getKnownMinValue(); if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) { ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1; - CurrentClusterBytes = - SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width; + CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + + MemOpb.Width.getValue().getKnownMinValue(); } if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset, @@ -1908,7 +1949,7 @@ void BaseMemOpClusterMutation::collectMemOpRecords( SmallVector<const MachineOperand *, 4> BaseOps; int64_t Offset; bool OffsetIsScalable; - unsigned Width; + LocationSize Width = 0; if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, Width, TRI)) { MemOpRecords.push_back( @@ -3224,14 +3265,10 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); if (!Top.HazardRec) { - Top.HazardRec = - DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( - Itin, DAG); + Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); } if (!Bot.HazardRec) { - Bot.HazardRec = - DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( - Itin, DAG); + Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); } TopCand.SU = nullptr; BotCand.SU = nullptr; @@ -3246,14 +3283,16 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, // Avoid setting up the register pressure tracker for small regions to save // compile time. As a rough heuristic, only track pressure when the number of - // schedulable instructions exceeds half the integer register file. + // schedulable instructions exceeds half the allocatable integer register file + // that is the largest legal integer regiser type. RegionPolicy.ShouldTrackPressure = true; - for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { + for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) { MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; if (TLI->isTypeLegal(LegalIntVT)) { unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( TLI->getRegClassFor(LegalIntVT)); RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); + break; } } @@ -3682,7 +3721,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { TCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); assert(TCand.SU == TopCand.SU && - "Last pick result should correspond to re-picking right now"); + "Last pick result should correspond to re-picking right now"); } #endif } @@ -3738,6 +3777,21 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) { } } while (SU->isScheduled); + // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise, + // if isTopReady(), then SU is in either Top.Available or Top.Pending. + // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise, + // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending. + // + // It is coincidental when !IsTopNode && isTopReady or when IsTopNode && + // isBottomReady. That is, it didn't factor into the decision to choose SU + // because it isTopReady or isBottomReady, respectively. In fact, if the + // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top + // queues respectivley contain the original roots and don't get updated when + // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was + // because we schduled everything but the top roots. Conversley, if SU + // isBottomReady on OnlyTopDown, then it was because we scheduled everything + // but the bottom roots. If its in a queue even coincidentally, it should be + // removed so it does not get re-picked in a subsequent pickNode call. if (SU->isTopReady()) Top.removeReady(SU); if (SU->isBottomReady()) @@ -3804,6 +3858,12 @@ ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + + const TargetSubtargetInfo &STI = C->MF->getSubtarget(); + // Add MacroFusion mutation if fusions are not empty. + const auto &MacroFusions = STI.getMacroFusions(); + if (!MacroFusions.empty()) + DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); return DAG; } @@ -3826,15 +3886,31 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { Rem.init(DAG, SchedModel); Top.init(DAG, SchedModel, &Rem); - BotRoots.clear(); + Bot.init(DAG, SchedModel, &Rem); // Initialize the HazardRecognizers. If itineraries don't exist, are empty, // or are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); if (!Top.HazardRec) { - Top.HazardRec = - DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( - Itin, DAG); + Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); + } + if (!Bot.HazardRec) { + Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); + } +} + +void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + if (PostRADirection == MISchedPostRASched::TopDown) { + RegionPolicy.OnlyTopDown = true; + RegionPolicy.OnlyBottomUp = false; + } else if (PostRADirection == MISchedPostRASched::BottomUp) { + RegionPolicy.OnlyTopDown = false; + RegionPolicy.OnlyBottomUp = true; + } else if (PostRADirection == MISchedPostRASched::Bidirectional) { + RegionPolicy.OnlyBottomUp = false; + RegionPolicy.OnlyTopDown = false; } } @@ -3842,7 +3918,7 @@ void PostGenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); // Some roots may not feed into ExitSU. Check all of them in case. - for (const SUnit *SU : BotRoots) { + for (const SUnit *SU : Bot.Available) { if (SU->getDepth() > Rem.CriticalPath) Rem.CriticalPath = SU->getDepth(); } @@ -3899,12 +3975,13 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, return false; } -void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { - ReadyQueue &Q = Top.Available; +void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, + SchedCandidate &Cand) { + ReadyQueue &Q = Zone.Available; for (SUnit *SU : Q) { SchedCandidate TryCand(Cand.Policy); TryCand.SU = SU; - TryCand.AtTop = true; + TryCand.AtTop = Zone.isTop(); TryCand.initResourceDelta(DAG, SchedModel); if (tryCandidate(Cand, TryCand)) { Cand.setBest(TryCand); @@ -3913,32 +3990,137 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { } } +/// Pick the best candidate node from either the top or bottom queue. +SUnit *PostGenericScheduler::pickNodeBidirectional(bool &IsTopNode) { + // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor + // out common parts. + + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + tracePick(Only1, false); + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + tracePick(Only1, true); + return SU; + } + // Set the bottom-up policy based on the state of the current bottom zone and + // the instructions outside the zone, including the top zone. + CandPolicy BotPolicy; + setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + CandPolicy TopPolicy; + setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot); + + // See if BotCand is still valid (because we previously scheduled from Top). + LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); + if (!BotCand.isValid() || BotCand.SU->isScheduled || + BotCand.Policy != BotPolicy) { + BotCand.reset(CandPolicy()); + pickNodeFromQueue(Bot, BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + } else { + LLVM_DEBUG(traceCandidate(BotCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(CandPolicy()); + pickNodeFromQueue(Bot, BotCand); + assert(TCand.SU == BotCand.SU && + "Last pick result should correspond to re-picking right now"); + } +#endif + } + + // Check if the top Q has a better candidate. + LLVM_DEBUG(dbgs() << "Picking from Top:\n"); + if (!TopCand.isValid() || TopCand.SU->isScheduled || + TopCand.Policy != TopPolicy) { + TopCand.reset(CandPolicy()); + pickNodeFromQueue(Top, TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + } else { + LLVM_DEBUG(traceCandidate(TopCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(CandPolicy()); + pickNodeFromQueue(Top, TopCand); + assert(TCand.SU == TopCand.SU && + "Last pick result should correspond to re-picking right now"); + } +#endif + } + + // Pick best from BotCand and TopCand. + assert(BotCand.isValid()); + assert(TopCand.isValid()); + SchedCandidate Cand = BotCand; + TopCand.Reason = NoCand; + if (tryCandidate(Cand, TopCand)) { + Cand.setBest(TopCand); + LLVM_DEBUG(traceCandidate(Cand)); + } + + IsTopNode = Cand.AtTop; + tracePick(Cand); + return Cand.SU; +} + /// Pick the next node to schedule. SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { - assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); return nullptr; } SUnit *SU; do { - SU = Top.pickOnlyChoice(); - if (SU) { - tracePick(Only1, true); + if (RegionPolicy.OnlyBottomUp) { + SU = Bot.pickOnlyChoice(); + if (SU) { + tracePick(Only1, true); + } else { + CandPolicy NoPolicy; + BotCand.reset(NoPolicy); + // Set the bottom-up policy based on the state of the current bottom + // zone and the instructions outside the zone, including the top zone. + setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr); + pickNodeFromQueue(Bot, BotCand); + assert(BotCand.Reason != NoCand && "failed to find a candidate"); + tracePick(BotCand); + SU = BotCand.SU; + } + IsTopNode = false; + } else if (RegionPolicy.OnlyTopDown) { + SU = Top.pickOnlyChoice(); + if (SU) { + tracePick(Only1, true); + } else { + CandPolicy NoPolicy; + TopCand.reset(NoPolicy); + // Set the top-down policy based on the state of the current top zone + // and the instructions outside the zone, including the bottom zone. + setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); + pickNodeFromQueue(Top, TopCand); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand); + SU = TopCand.SU; + } + IsTopNode = true; } else { - CandPolicy NoPolicy; - SchedCandidate TopCand(NoPolicy); - // Set the top-down policy based on the state of the current top zone and - // the instructions outside the zone, including the bottom zone. - setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); - pickNodeFromQueue(TopCand); - assert(TopCand.Reason != NoCand && "failed to find a candidate"); - tracePick(TopCand); - SU = TopCand.SU; + SU = pickNodeBidirectional(IsTopNode); } } while (SU->isScheduled); - IsTopNode = true; - Top.removeReady(SU); + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); @@ -3948,13 +4130,25 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { /// Called after ScheduleDAGMI has scheduled an instruction and updated /// scheduled/remaining flags in the DAG nodes. void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { - SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); - Top.bumpNode(SU); + if (IsTopNode) { + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); + Top.bumpNode(SU); + } else { + SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); + Bot.bumpNode(SU); + } } ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { - return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C), - /*RemoveKillFlags=*/true); + ScheduleDAGMI *DAG = + new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C), + /*RemoveKillFlags=*/true); + const TargetSubtargetInfo &STI = C->MF->getSubtarget(); + // Add MacroFusion mutation if fusions are not empty. + const auto &MacroFusions = STI.getMacroFusions(); + if (!MacroFusions.empty()) + DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); + return DAG; } //===----------------------------------------------------------------------===// @@ -4219,7 +4413,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { SS << "SU:" << SU->NodeNum; if (DFS) SS << " I:" << DFS->getNumInstrs(SU); - return SS.str(); + return Str; } static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { @@ -4275,6 +4469,12 @@ unsigned ResourceSegments::getFirstAvailableAt( assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals), sortIntervals) && "Cannot execute on an un-sorted set of intervals."); + + // Zero resource usage is allowed by TargetSchedule.td but we do not construct + // a ResourceSegment interval for that situation. + if (AcquireAtCycle == ReleaseAtCycle) + return CurrCycle; + unsigned RetCycle = CurrCycle; ResourceSegments::IntervalTy NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle); @@ -4294,8 +4494,16 @@ unsigned ResourceSegments::getFirstAvailableAt( void ResourceSegments::add(ResourceSegments::IntervalTy A, const unsigned CutOff) { - assert(A.first < A.second && "Cannot add empty resource usage"); + assert(A.first <= A.second && "Cannot add negative resource usage"); assert(CutOff > 0 && "0-size interval history has no use."); + // Zero resource usage is allowed by TargetSchedule.td, in the case that the + // instruction needed the resource to be available but does not use it. + // However, ResourceSegment represents an interval that is closed on the left + // and open on the right. It is impossible to represent an empty interval when + // the left is closed. Do not add it to Intervals. + if (A.first == A.second) + return; + assert(all_of(_Intervals, [&A](const ResourceSegments::IntervalTy &Interval) -> bool { return !intersects(A, Interval); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index e7e8f6026834..4b3ff57fb478 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -130,6 +130,14 @@ namespace { // Remember which edges have been considered for breaking. SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8> CEBCandidates; + // Memorize the register that also wanted to sink into the same block along + // a different critical edge. + // {register to sink, sink-to block} -> the first sink-from block. + // We're recording the first sink-from block because that (critical) edge + // was deferred until we see another register that's going to sink into the + // same block. + DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *> + CEMergeCandidates; // Remember which edges we are about to split. // This is different from CEBCandidates since those edges // will be split. @@ -138,7 +146,7 @@ namespace { DenseSet<Register> RegsToClearKillFlags; using AllSuccsCache = - DenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>; + SmallDenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>; /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is /// post-dominated by another DBG_VALUE of the same variable location. @@ -184,27 +192,30 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AAResultsWrapperPass>(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachinePostDominatorTree>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachinePostDominatorTreeWrapperPass>(); AU.addRequired<MachineCycleInfoWrapperPass>(); - AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); AU.addPreserved<MachineCycleInfoWrapperPass>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); if (UseBlockFreqInfo) - AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); AU.addRequired<TargetPassConfig>(); } void releaseMemory() override { CEBCandidates.clear(); + CEMergeCandidates.clear(); } private: bool ProcessBlock(MachineBasicBlock &MBB); void ProcessDbgInst(MachineInstr &MI); - bool isWorthBreakingCriticalEdge(MachineInstr &MI, - MachineBasicBlock *From, - MachineBasicBlock *To); + bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, + MachineBasicBlock *To, bool BreakPHIEdge); + bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, + MachineBasicBlock *To, + MachineBasicBlock *&DeferredFromBlock); bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, MachineInstr &MI); @@ -273,8 +284,8 @@ char &llvm::MachineSinkingID = MachineSinking::ID; INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, @@ -309,7 +320,7 @@ static bool blockPrologueInterferes(const MachineBasicBlock *BB, if (PI->readsRegister(Reg, TRI)) return true; // Check for interference with non-dead defs - auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI); + auto *DefOp = PI->findRegisterDefOperand(Reg, TRI, false, true); if (DefOp && !DefOp->isDead()) return true; } @@ -406,7 +417,7 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, continue; } - if (Reg.isPhysical() && + if (Reg.isPhysical() && MO.isUse() && (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) continue; @@ -708,11 +719,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); MRI = &MF.getRegInfo(); - DT = &getAnalysis<MachineDominatorTree>(); - PDT = &getAnalysis<MachinePostDominatorTree>(); + DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(); CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo(); - MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr; - MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBFI = UseBlockFreqInfo + ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI() + : nullptr; + MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); RegClassInfo.runOnMachineFunction(MF); TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); @@ -725,6 +738,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { // Process all basic blocks. CEBCandidates.clear(); + CEMergeCandidates.clear(); ToSplit.clear(); for (auto &MBB: MF) MadeChange |= ProcessBlock(MBB); @@ -873,9 +887,9 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) { SeenDbgVars.insert(Var); } -bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, - MachineBasicBlock *From, - MachineBasicBlock *To) { +bool MachineSinking::isWorthBreakingCriticalEdge( + MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To, + MachineBasicBlock *&DeferredFromBlock) { // FIXME: Need much better heuristics. // If the pass has already considered breaking this edge (during this pass @@ -887,6 +901,27 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) return true; + // Check and record the register and the destination block we want to sink + // into. Note that we want to do the following before the next check on branch + // probability. Because we want to record the initial candidate even if it's + // on hot edge, so that other candidates that might not on hot edges can be + // sinked as well. + for (const auto &MO : MI.all_defs()) { + Register Reg = MO.getReg(); + if (!Reg) + continue; + Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg; + auto Key = std::make_pair(SrcReg, To); + auto Res = CEMergeCandidates.try_emplace(Key, From); + // We wanted to sink the same register into the same block, consider it to + // be profitable. + if (!Res.second) { + // Return the source block that was previously held off. + DeferredFromBlock = Res.first->second; + return true; + } + } + if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <= BranchProbability(SplitEdgeProbabilityThreshold, 100)) return true; @@ -921,15 +956,12 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, return false; } -bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, - MachineBasicBlock *FromBB, - MachineBasicBlock *ToBB, - bool BreakPHIEdge) { - if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) - return false; - +bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI, + MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB, + bool BreakPHIEdge) { // Avoid breaking back edge. From == To means backedge for single BB cycle. - if (!SplitEdges || FromBB == ToBB) + if (!SplitEdges || FromBB == ToBB || !FromBB->isSuccessor(ToBB)) return false; MachineCycle *FromCycle = CI->getCycle(FromBB); @@ -985,11 +1017,32 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, return false; } - ToSplit.insert(std::make_pair(FromBB, ToBB)); - return true; } +bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, + MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB, + bool BreakPHIEdge) { + bool Status = false; + MachineBasicBlock *DeferredFromBB = nullptr; + if (isWorthBreakingCriticalEdge(MI, FromBB, ToBB, DeferredFromBB)) { + // If there is a DeferredFromBB, we consider FromBB only if _both_ + // of them are legal to split. + if ((!DeferredFromBB || + ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) || + isLegalToBreakCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) && + isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) { + ToSplit.insert(std::make_pair(FromBB, ToBB)); + if (DeferredFromBB) + ToSplit.insert(std::make_pair(DeferredFromBB, ToBB)); + Status = true; + } + } + + return Status; +} + std::vector<unsigned> & MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { // Currently to save compiling time, MBB's register pressure will not change @@ -1949,13 +2002,8 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, for (unsigned DefReg : DefedRegsInCopy) for (MCPhysReg S : TRI->subregs_inclusive(DefReg)) SuccBB->removeLiveIn(S); - for (auto U : UsedOpsInCopy) { - Register SrcReg = MI->getOperand(U).getReg(); - LaneBitmask Mask; - for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) - Mask |= (*S).second; - SuccBB->addLiveIn(SrcReg, Mask); - } + for (auto U : UsedOpsInCopy) + SuccBB->addLiveIn(MI->getOperand(U).getReg()); SuccBB->sortUniqueLiveIns(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp index 1cd90474898e..5abfbd5981fb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp @@ -200,7 +200,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, for (const auto *Op : MI.memoperands()) { if (!HashMemOperands) break; - HashComponents.push_back(static_cast<unsigned>(Op->getSize())); + HashComponents.push_back(static_cast<unsigned>(Op->getSize().getValue())); HashComponents.push_back(static_cast<unsigned>(Op->getFlags())); HashComponents.push_back(static_cast<unsigned>(Op->getOffset())); HashComponents.push_back(static_cast<unsigned>(Op->getSuccessOrdering())); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 3e6f36fe936f..bf3add010574 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -46,8 +46,8 @@ char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE, "Machine Trace Metrics", false, true) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE, "Machine Trace Metrics", false, true) @@ -57,8 +57,8 @@ MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) { void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -68,7 +68,7 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); MRI = &MF->getRegInfo(); - Loops = &getAnalysis<MachineLoopInfo>(); + Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); SchedModel.init(&ST); BlockInfo.resize(MF->getNumBlockIDs()); ProcReleaseAtCycles.resize(MF->getNumBlockIDs() * @@ -939,15 +939,15 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, } // Now we know the height of MI. Update any regunits read. - for (size_t I = 0, E = ReadOps.size(); I != E; ++I) { - MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg(); + for (unsigned Op : ReadOps) { + MCRegister Reg = MI.getOperand(Op).getReg().asMCReg(); for (MCRegUnit Unit : TRI->regunits(Reg)) { LiveRegUnit &LRU = RegUnits[Unit]; // Set the height to the highest reader of the unit. if (LRU.Cycle <= Height && LRU.MI != &MI) { LRU.Cycle = Height; LRU.MI = &MI; - LRU.Op = ReadOps[I]; + LRU.Op = Op; } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp index 3e0fe2b1ba08..7548fc8141ec 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp @@ -155,7 +155,7 @@ template struct llvm::GenericUniformityAnalysisImplDeleter< MachineUniformityInfo llvm::computeMachineUniformityInfo( MachineFunction &F, const MachineCycleInfo &cycleInfo, - const MachineDomTree &domTree, bool HasBranchDivergence) { + const MachineDominatorTree &domTree, bool HasBranchDivergence) { assert(F.getRegInfo().isSSA() && "Expected to be run on SSA form!"); MachineUniformityInfo UI(domTree, cycleInfo); if (HasBranchDivergence) @@ -165,25 +165,6 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo( namespace { -/// Legacy analysis pass which computes a \ref MachineUniformityInfo. -class MachineUniformityAnalysisPass : public MachineFunctionPass { - MachineUniformityInfo UI; - -public: - static char ID; - - MachineUniformityAnalysisPass(); - - MachineUniformityInfo &getUniformityInfo() { return UI; } - const MachineUniformityInfo &getUniformityInfo() const { return UI; } - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void print(raw_ostream &OS, const Module *M = nullptr) const override; - - // TODO: verify analysis -}; - class MachineUniformityInfoPrinterPass : public MachineFunctionPass { public: static char ID; @@ -206,19 +187,20 @@ MachineUniformityAnalysisPass::MachineUniformityAnalysisPass() INITIALIZE_PASS_BEGIN(MachineUniformityAnalysisPass, "machine-uniformity", "Machine Uniformity Info Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_END(MachineUniformityAnalysisPass, "machine-uniformity", "Machine Uniformity Info Analysis", true, true) void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<MachineCycleInfoWrapperPass>(); - AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } bool MachineUniformityAnalysisPass::runOnMachineFunction(MachineFunction &MF) { - auto &DomTree = getAnalysis<MachineDominatorTree>().getBase(); + auto &DomTree = + getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree().getBase(); auto &CI = getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo(); // FIXME: Query TTI::hasBranchDivergence. -run-pass seems to end up with a // default NoTTI diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp index a015d9bbd2d3..d22fbe322ec3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp @@ -20,6 +20,7 @@ // -verify-machineinstrs. //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineVerifier.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -38,8 +39,9 @@ #include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/LiveVariables.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConvergenceVerifier.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -54,9 +56,11 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/EHPersonalities.h" @@ -90,6 +94,9 @@ using namespace llvm; namespace { struct MachineVerifier { + MachineVerifier(MachineFunctionAnalysisManager &MFAM, const char *b) + : MFAM(&MFAM), Banner(b) {} + MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {} MachineVerifier(const char *b, LiveVariables *LiveVars, @@ -100,6 +107,7 @@ namespace { unsigned verify(const MachineFunction &MF); + MachineFunctionAnalysisManager *MFAM = nullptr; Pass *const PASS = nullptr; const char *Banner; const MachineFunction *MF = nullptr; @@ -220,6 +228,11 @@ namespace { LiveStacks *LiveStks = nullptr; SlotIndexes *Indexes = nullptr; + // This is calculated only when trying to verify convergence control tokens. + // Similar to the LLVM IR verifier, we calculate this locally instead of + // relying on the pass manager. + MachineDominatorTree DT; + void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); void visitMachineBundleBefore(const MachineInstr *MI); @@ -294,21 +307,21 @@ namespace { void verifyProperties(const MachineFunction &MF); }; - struct MachineVerifierPass : public MachineFunctionPass { + struct MachineVerifierLegacyPass : public MachineFunctionPass { static char ID; // Pass ID, replacement for typeid const std::string Banner; - MachineVerifierPass(std::string banner = std::string()) - : MachineFunctionPass(ID), Banner(std::move(banner)) { - initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry()); - } + MachineVerifierLegacyPass(std::string banner = std::string()) + : MachineFunctionPass(ID), Banner(std::move(banner)) { + initializeMachineVerifierLegacyPassPass(*PassRegistry::getPassRegistry()); + } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addUsedIfAvailable<LiveStacks>(); - AU.addUsedIfAvailable<LiveVariables>(); - AU.addUsedIfAvailable<SlotIndexes>(); - AU.addUsedIfAvailable<LiveIntervals>(); + AU.addUsedIfAvailable<LiveVariablesWrapperPass>(); + AU.addUsedIfAvailable<SlotIndexesWrapperPass>(); + AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -330,17 +343,31 @@ namespace { } // end anonymous namespace -char MachineVerifierPass::ID = 0; +PreservedAnalyses +MachineVerifierPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + // Skip functions that have known verification problems. + // FIXME: Remove this mechanism when all problematic passes have been + // fixed. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailsVerification)) + return PreservedAnalyses::all(); + unsigned FoundErrors = MachineVerifier(MFAM, Banner.c_str()).verify(MF); + if (FoundErrors) + report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors."); + return PreservedAnalyses::all(); +} + +char MachineVerifierLegacyPass::ID = 0; -INITIALIZE_PASS(MachineVerifierPass, "machineverifier", +INITIALIZE_PASS(MachineVerifierLegacyPass, "machineverifier", "Verify generated machine code", false, false) FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) { - return new MachineVerifierPass(Banner); + return new MachineVerifierLegacyPass(Banner); } -void llvm::verifyMachineFunction(MachineFunctionAnalysisManager *, - const std::string &Banner, +void llvm::verifyMachineFunction(const std::string &Banner, const MachineFunction &MF) { // TODO: Use MFAM after porting below analyses. // LiveVariables *LiveVars; @@ -421,12 +448,23 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { MachineFunctionProperties::Property::TracksDebugUserValues); if (PASS) { - LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>(); + auto *LISWrapper = PASS->getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); + LiveInts = LISWrapper ? &LISWrapper->getLIS() : nullptr; // We don't want to verify LiveVariables if LiveIntervals is available. + auto *LVWrapper = PASS->getAnalysisIfAvailable<LiveVariablesWrapperPass>(); if (!LiveInts) - LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); + LiveVars = LVWrapper ? &LVWrapper->getLV() : nullptr; LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>(); - Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>(); + auto *SIWrapper = PASS->getAnalysisIfAvailable<SlotIndexesWrapperPass>(); + Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; + } + if (MFAM) { + MachineFunction &Func = const_cast<MachineFunction &>(MF); + LiveInts = MFAM->getCachedResult<LiveIntervalsAnalysis>(Func); + if (!LiveInts) + LiveVars = MFAM->getCachedResult<LiveVariablesAnalysis>(Func); + // TODO: LiveStks = MFAM->getCachedResult<LiveStacksAnalysis>(Func); + Indexes = MFAM->getCachedResult<SlotIndexesAnalysis>(Func); } verifySlotIndexes(); @@ -1189,13 +1227,16 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { const MachineMemOperand &MMO = **MI->memoperands_begin(); if (MI->getOpcode() == TargetOpcode::G_ZEXTLOAD || MI->getOpcode() == TargetOpcode::G_SEXTLOAD) { - if (MMO.getSizeInBits() >= ValTy.getSizeInBits()) + if (TypeSize::isKnownGE(MMO.getSizeInBits().getValue(), + ValTy.getSizeInBits())) report("Generic extload must have a narrower memory type", MI); } else if (MI->getOpcode() == TargetOpcode::G_LOAD) { - if (MMO.getSize() > ValTy.getSizeInBytes()) + if (TypeSize::isKnownGT(MMO.getSize().getValue(), + ValTy.getSizeInBytes())) report("load memory size cannot exceed result size", MI); } else if (MI->getOpcode() == TargetOpcode::G_STORE) { - if (ValTy.getSizeInBytes() < MMO.getSize()) + if (TypeSize::isKnownLT(ValTy.getSizeInBytes(), + MMO.getSize().getValue())) report("store memory size cannot exceed value size", MI); } @@ -1289,12 +1330,22 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (!DstTy.isValid() || !PtrTy.isValid() || !OffsetTy.isValid()) break; - if (!PtrTy.getScalarType().isPointer()) + if (!PtrTy.isPointerOrPointerVector()) report("gep first operand must be a pointer", MI); - if (OffsetTy.getScalarType().isPointer()) + if (OffsetTy.isPointerOrPointerVector()) report("gep offset operand must not be a pointer", MI); + if (PtrTy.isPointerOrPointerVector()) { + const DataLayout &DL = MF->getDataLayout(); + unsigned AS = PtrTy.getAddressSpace(); + unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8; + if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits) { + report("gep offset operand must match index size for address space", + MI); + } + } + // TODO: Is the offset allowed to be a scalar with a vector? break; } @@ -1305,7 +1356,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (!DstTy.isValid() || !SrcTy.isValid() || !MaskTy.isValid()) break; - if (!DstTy.getScalarType().isPointer()) + if (!DstTy.isPointerOrPointerVector()) report("ptrmask result type must be a pointer", MI); if (!MaskTy.getScalarType().isScalar()) @@ -1331,15 +1382,13 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (!DstTy.isValid() || !SrcTy.isValid()) break; - LLT DstElTy = DstTy.getScalarType(); - LLT SrcElTy = SrcTy.getScalarType(); - if (DstElTy.isPointer() || SrcElTy.isPointer()) + if (DstTy.isPointerOrPointerVector() || SrcTy.isPointerOrPointerVector()) report("Generic extend/truncate can not operate on pointers", MI); verifyVectorElementMatch(DstTy, SrcTy, MI); - unsigned DstSize = DstElTy.getSizeInBits(); - unsigned SrcSize = SrcElTy.getSizeInBits(); + unsigned DstSize = DstTy.getScalarSizeInBits(); + unsigned SrcSize = SrcTy.getScalarSizeInBits(); switch (MI->getOpcode()) { default: if (DstSize <= SrcSize) @@ -1400,7 +1449,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (DstTy.isVector()) { // This case is the converse of G_CONCAT_VECTORS. if (!SrcTy.isVector() || SrcTy.getScalarType() != DstTy.getScalarType() || - SrcTy.getNumElements() != NumDsts * DstTy.getNumElements()) + SrcTy.isScalableVector() != DstTy.isScalableVector() || + SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits()) report("G_UNMERGE_VALUES source operand does not match vector " "destination operands", MI); @@ -1477,8 +1527,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg())) report("G_CONCAT_VECTOR source operand types are not homogeneous", MI); - if (DstTy.getNumElements() != - SrcTy.getNumElements() * (MI->getNumOperands() - 1)) + if (DstTy.getElementCount() != + SrcTy.getElementCount() * (MI->getNumOperands() - 1)) report("G_CONCAT_VECTOR num dest and source elements should match", MI); break; } @@ -1488,11 +1538,42 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { LLT SrcTy = MRI->getType(MI->getOperand(2).getReg()); if ((DstTy.isVector() != SrcTy.isVector()) || - (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements())) + (DstTy.isVector() && + DstTy.getElementCount() != SrcTy.getElementCount())) report("Generic vector icmp/fcmp must preserve number of lanes", MI); break; } + case TargetOpcode::G_SCMP: + case TargetOpcode::G_UCMP: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + LLT SrcTy2 = MRI->getType(MI->getOperand(2).getReg()); + + if (SrcTy.isPointerOrPointerVector() || SrcTy2.isPointerOrPointerVector()) { + report("Generic scmp/ucmp does not support pointers as operands", MI); + break; + } + + if (DstTy.isPointerOrPointerVector()) { + report("Generic scmp/ucmp does not support pointers as a result", MI); + break; + } + + if ((DstTy.isVector() != SrcTy.isVector()) || + (DstTy.isVector() && + DstTy.getElementCount() != SrcTy.getElementCount())) { + report("Generic vector scmp/ucmp must preserve number of lanes", MI); + break; + } + + if (SrcTy != SrcTy2) { + report("Generic scmp/ucmp must have same input types", MI); + break; + } + + break; + } case TargetOpcode::G_EXTRACT: { const MachineOperand &SrcOp = MI->getOperand(1); if (!SrcOp.isReg()) { @@ -1598,6 +1679,115 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report("G_BSWAP size must be a multiple of 16 bits", MI); break; } + case TargetOpcode::G_VSCALE: { + if (!MI->getOperand(1).isCImm()) { + report("G_VSCALE operand must be cimm", MI); + break; + } + if (MI->getOperand(1).getCImm()->isZero()) { + report("G_VSCALE immediate cannot be zero", MI); + break; + } + break; + } + case TargetOpcode::G_INSERT_SUBVECTOR: { + const MachineOperand &Src0Op = MI->getOperand(1); + if (!Src0Op.isReg()) { + report("G_INSERT_SUBVECTOR first source must be a register", MI); + break; + } + + const MachineOperand &Src1Op = MI->getOperand(2); + if (!Src1Op.isReg()) { + report("G_INSERT_SUBVECTOR second source must be a register", MI); + break; + } + + const MachineOperand &IndexOp = MI->getOperand(3); + if (!IndexOp.isImm()) { + report("G_INSERT_SUBVECTOR index must be an immediate", MI); + break; + } + + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT Src0Ty = MRI->getType(Src0Op.getReg()); + LLT Src1Ty = MRI->getType(Src1Op.getReg()); + + if (!DstTy.isVector()) { + report("Destination type must be a vector", MI); + break; + } + + if (!Src0Ty.isVector()) { + report("First source must be a vector", MI); + break; + } + + if (!Src1Ty.isVector()) { + report("Second source must be a vector", MI); + break; + } + + if (DstTy != Src0Ty) { + report("Destination type must match the first source vector type", MI); + break; + } + + if (Src0Ty.getElementType() != Src1Ty.getElementType()) { + report("Element type of source vectors must be the same", MI); + break; + } + + if (IndexOp.getImm() != 0 && + Src1Ty.getElementCount().getKnownMinValue() % IndexOp.getImm() != 0) { + report("Index must be a multiple of the second source vector's " + "minimum vector length", + MI); + break; + } + break; + } + case TargetOpcode::G_EXTRACT_SUBVECTOR: { + const MachineOperand &SrcOp = MI->getOperand(1); + if (!SrcOp.isReg()) { + report("G_EXTRACT_SUBVECTOR first source must be a register", MI); + break; + } + + const MachineOperand &IndexOp = MI->getOperand(2); + if (!IndexOp.isImm()) { + report("G_EXTRACT_SUBVECTOR index must be an immediate", MI); + break; + } + + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(SrcOp.getReg()); + + if (!DstTy.isVector()) { + report("Destination type must be a vector", MI); + break; + } + + if (!SrcTy.isVector()) { + report("First source must be a vector", MI); + break; + } + + if (DstTy.getElementType() != SrcTy.getElementType()) { + report("Element type of vectors must be the same", MI); + break; + } + + if (IndexOp.getImm() != 0 && + SrcTy.getElementCount().getKnownMinValue() % IndexOp.getImm() != 0) { + report("Index must be a multiple of the source vector's minimum vector " + "length", + MI); + break; + } + + break; + } case TargetOpcode::G_SHUFFLE_VECTOR: { const MachineOperand &MaskOp = MI->getOperand(3); if (!MaskOp.isShuffleMask()) { @@ -1635,6 +1825,85 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } + + case TargetOpcode::G_SPLAT_VECTOR: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + + if (!DstTy.isScalableVector()) { + report("Destination type must be a scalable vector", MI); + break; + } + + if (!SrcTy.isScalar()) { + report("Source type must be a scalar", MI); + break; + } + + if (TypeSize::isKnownGT(DstTy.getElementType().getSizeInBits(), + SrcTy.getSizeInBits())) { + report("Element type of the destination must be the same size or smaller " + "than the source type", + MI); + break; + } + + break; + } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + LLT IdxTy = MRI->getType(MI->getOperand(2).getReg()); + + if (!DstTy.isScalar() && !DstTy.isPointer()) { + report("Destination type must be a scalar or pointer", MI); + break; + } + + if (!SrcTy.isVector()) { + report("First source must be a vector", MI); + break; + } + + auto TLI = MF->getSubtarget().getTargetLowering(); + if (IdxTy.getSizeInBits() != + TLI->getVectorIdxTy(MF->getDataLayout()).getFixedSizeInBits()) { + report("Index type must match VectorIdxTy", MI); + break; + } + + break; + } + case TargetOpcode::G_INSERT_VECTOR_ELT: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT VecTy = MRI->getType(MI->getOperand(1).getReg()); + LLT ScaTy = MRI->getType(MI->getOperand(2).getReg()); + LLT IdxTy = MRI->getType(MI->getOperand(3).getReg()); + + if (!DstTy.isVector()) { + report("Destination type must be a vector", MI); + break; + } + + if (VecTy != DstTy) { + report("Destination type and vector type must match", MI); + break; + } + + if (!ScaTy.isScalar() && !ScaTy.isPointer()) { + report("Inserted element must be a scalar or pointer", MI); + break; + } + + auto TLI = MF->getSubtarget().getTargetLowering(); + if (IdxTy.getSizeInBits() != + TLI->getVectorIdxTy(MF->getDataLayout()).getFixedSizeInBits()) { + report("Index type must match VectorIdxTy", MI); + break; + } + + break; + } case TargetOpcode::G_DYN_STACKALLOC: { const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &AllocOp = MI->getOperand(1); @@ -1722,6 +1991,17 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } + case TargetOpcode::G_UBSANTRAP: { + const MachineOperand &KindOp = MI->getOperand(0); + if (!MI->getOperand(0).isImm()) { + report("Crash kind must be an immediate", &KindOp, 0); + break; + } + int64_t Kind = MI->getOperand(0).getImm(); + if (!isInt<8>(Kind)) + report("Crash kind must be 8 bit wide", &KindOp, 0); + break; + } case TargetOpcode::G_VECREDUCE_SEQ_FADD: case TargetOpcode::G_VECREDUCE_SEQ_FMUL: { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); @@ -1847,6 +2127,12 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report("Dst operand 0 must be a pointer", MI); break; } + case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE: { + const MachineOperand &AddrOp = MI->getOperand(1); + if (!AddrOp.isReg() || !MRI->getType(AddrOp.getReg()).isPointer()) + report("addr operand must be a pointer", &AddrOp, 1); + break; + } default: break; } @@ -2957,7 +3243,30 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { } } +static void +verifyConvergenceControl(const MachineFunction &MF, MachineDominatorTree &DT, + std::function<void(const Twine &Message)> FailureCB) { + MachineConvergenceVerifier CV; + CV.initialize(&errs(), FailureCB, MF); + + for (const auto &MBB : MF) { + CV.visit(MBB); + for (const auto &MI : MBB.instrs()) + CV.visit(MI); + } + + if (CV.sawTokens()) { + DT.recalculate(const_cast<MachineFunction &>(MF)); + CV.verify(DT); + } +} + void MachineVerifier::visitMachineFunctionAfter() { + auto FailureCB = [this](const Twine &Message) { + report(Message.str().c_str(), MF); + }; + verifyConvergenceControl(*MF, DT, FailureCB); + calcRegsPassed(); for (const MachineBasicBlock &MBB : *MF) @@ -3529,6 +3838,9 @@ void MachineVerifier::verifyStackFrame() { if (I.getOpcode() == FrameSetupOpcode) { if (BBState.ExitIsSetup) report("FrameSetup is after another FrameSetup", &I); + if (!MRI->isSSA() && !MF->getFrameInfo().adjustsStack()) + report("AdjustsStack not set in presence of a frame pseudo " + "instruction.", &I); BBState.ExitValue -= TII->getFrameTotalSize(I); BBState.ExitIsSetup = true; } @@ -3544,6 +3856,9 @@ void MachineVerifier::verifyStackFrame() { errs() << "FrameDestroy <" << Size << "> is after FrameSetup <" << AbsSPAdj << ">.\n"; } + if (!MRI->isSSA() && !MF->getFrameInfo().adjustsStack()) + report("AdjustsStack not set in presence of a frame pseudo " + "instruction.", &I); BBState.ExitValue += Size; BBState.ExitIsSetup = false; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp index 0bef513342ff..0f29ebe3ee79 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -22,6 +22,10 @@ #define DEBUG_TYPE "pipeliner" using namespace llvm; +static cl::opt<bool> SwapBranchTargetsMVE( + "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), + cl::desc("Swap target blocks of a conditional branch for MVE expander")); + void ModuloSchedule::print(raw_ostream &OS) { for (MachineInstr *MI : ScheduledInstrs) OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI; @@ -814,7 +818,7 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB, unsigned SplitReg = 0; for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), KernelBB->instr_end())) - if (BBJ.readsRegister(Def)) { + if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) { // We split the lifetime when we find the first use. if (SplitReg == 0) { SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); @@ -829,7 +833,7 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB, // Search through each of the epilog blocks for any uses to be renamed. for (auto &Epilog : EpilogBBs) for (auto &I : *Epilog) - if (I.readsRegister(Def)) + if (I.readsRegister(Def, /*TRI=*/nullptr)) I.substituteRegister(Def, SplitReg, 0, *TRI); break; } @@ -979,8 +983,8 @@ void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI, NewMMOs.push_back( MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); } else { - NewMMOs.push_back( - MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); + NewMMOs.push_back(MF.getMachineMemOperand( + MMO, 0, LocationSize::beforeOrAfterPointer())); } } NewMI.setMemRefs(MF, NewMMOs); @@ -1673,7 +1677,8 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks( // we don't need the phi anymore. if (getStage(Def) == Stage) { Register PhiReg = MI.getOperand(0).getReg(); - assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1); + assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(), + /*TRI=*/nullptr) != -1); MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.getOperand(0).setReg(PhiReg); PhiToDelete.push_back(&MI); @@ -1899,7 +1904,7 @@ Register PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB) { MachineInstr *MI = MRI.getUniqueVRegDef(Reg); - unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg); + unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr); return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg(); } @@ -2096,6 +2101,642 @@ void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() { MSE.cleanup(); } +MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + + // TODO: Offset information needs to be corrected. + NewMI->dropMemRefs(MF); + + return NewMI; +} + +/// Create a dedicated exit for Loop. Exit is the original exit for Loop. +/// If it is already dedicated exit, return it. Otherwise, insert a new +/// block between them and return the new block. +static MachineBasicBlock *createDedicatedExit(MachineBasicBlock *Loop, + MachineBasicBlock *Exit) { + if (Exit->pred_size() == 1) + return Exit; + + MachineFunction *MF = Loop->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + MachineBasicBlock *NewExit = + MF->CreateMachineBasicBlock(Loop->getBasicBlock()); + MF->insert(Loop->getIterator(), NewExit); + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + TII->analyzeBranch(*Loop, TBB, FBB, Cond); + if (TBB == Loop) + FBB = NewExit; + else if (FBB == Loop) + TBB = NewExit; + else + llvm_unreachable("unexpected loop structure"); + TII->removeBranch(*Loop); + TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc()); + Loop->replaceSuccessor(Exit, NewExit); + TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc()); + NewExit->addSuccessor(Exit); + + Exit->replacePhiUsesWith(Loop, NewExit); + + return NewExit; +} + +/// Insert branch code into the end of MBB. It branches to GreaterThan if the +/// remaining trip count for instructions in LastStage0Insts is greater than +/// RequiredTC, and to Otherwise otherwise. +void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB, + int RequiredTC, + InstrMapTy &LastStage0Insts, + MachineBasicBlock &GreaterThan, + MachineBasicBlock &Otherwise) { + SmallVector<MachineOperand, 4> Cond; + LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond, + LastStage0Insts); + + if (SwapBranchTargetsMVE) { + // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and + // FBB for optimal performance. + if (TII->reverseBranchCondition(Cond)) + llvm_unreachable("can not reverse branch condition"); + TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc()); + } else { + TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc()); + } +} + +/// Generate a pipelined loop that is unrolled by using MVE algorithm and any +/// other necessary blocks. The control flow is modified to execute the +/// pipelined loop if the trip count satisfies the condition, otherwise the +/// original loop. The original loop is also used to execute the remainder +/// iterations which occur due to unrolling. +void ModuloScheduleExpanderMVE::generatePipelinedLoop() { + // The control flow for pipelining with MVE: + // + // OrigPreheader: + // // The block that is originally the loop preheader + // goto Check + // + // Check: + // // Check whether the trip count satisfies the requirements to pipeline. + // if (LoopCounter > NumStages + NumUnroll - 2) + // // The minimum number of iterations to pipeline = + // // iterations executed in prolog/epilog (NumStages-1) + + // // iterations executed in one kernel run (NumUnroll) + // goto Prolog + // // fallback to the original loop + // goto NewPreheader + // + // Prolog: + // // All prolog stages. There are no direct branches to the epilogue. + // goto NewKernel + // + // NewKernel: + // // NumUnroll copies of the kernel + // if (LoopCounter > MVE-1) + // goto NewKernel + // goto Epilog + // + // Epilog: + // // All epilog stages. + // if (LoopCounter > 0) + // // The remainder is executed in the original loop + // goto NewPreheader + // goto NewExit + // + // NewPreheader: + // // Newly created preheader for the original loop. + // // The initial values of the phis in the loop are merged from two paths. + // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog + // goto OrigKernel + // + // OrigKernel: + // // The original loop block. + // if (LoopCounter != 0) + // goto OrigKernel + // goto NewExit + // + // NewExit: + // // Newly created dedicated exit for the original loop. + // // Merge values which are referenced after the loop + // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog + // goto OrigExit + // + // OrigExit: + // // The block that is originally the loop exit. + // // If it is already deicated exit, NewExit is not created. + + // An example of where each stage is executed: + // Assume #Stages 3, #MVE 4, #Iterations 12 + // Iter 0 1 2 3 4 5 6 7 8 9 10-11 + // ------------------------------------------------- + // Stage 0 Prolog#0 + // Stage 1 0 Prolog#1 + // Stage 2 1 0 Kernel Unroll#0 Iter#0 + // Stage 2 1 0 Kernel Unroll#1 Iter#0 + // Stage 2 1 0 Kernel Unroll#2 Iter#0 + // Stage 2 1 0 Kernel Unroll#3 Iter#0 + // Stage 2 1 0 Kernel Unroll#0 Iter#1 + // Stage 2 1 0 Kernel Unroll#1 Iter#1 + // Stage 2 1 0 Kernel Unroll#2 Iter#1 + // Stage 2 1 0 Kernel Unroll#3 Iter#1 + // Stage 2 1 Epilog#0 + // Stage 2 Epilog#1 + // Stage 0-2 OrigKernel + + LoopInfo = TII->analyzeLoopForPipelining(OrigKernel); + assert(LoopInfo && "Must be able to analyze loop!"); + + calcNumUnroll(); + + Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + + MF.insert(OrigKernel->getIterator(), Check); + MF.insert(OrigKernel->getIterator(), Prolog); + MF.insert(OrigKernel->getIterator(), NewKernel); + MF.insert(OrigKernel->getIterator(), Epilog); + MF.insert(OrigKernel->getIterator(), NewPreheader); + + NewExit = createDedicatedExit(OrigKernel, OrigExit); + + NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader); + TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc()); + + OrigPreheader->addSuccessor(Check); + TII->removeBranch(*OrigPreheader); + TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc()); + + Check->addSuccessor(Prolog); + Check->addSuccessor(NewPreheader); + + Prolog->addSuccessor(NewKernel); + + NewKernel->addSuccessor(NewKernel); + NewKernel->addSuccessor(Epilog); + + Epilog->addSuccessor(NewPreheader); + Epilog->addSuccessor(NewExit); + + InstrMapTy LastStage0Insts; + insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2, + LastStage0Insts, *Prolog, *NewPreheader); + + // VRMaps map (prolog/kernel/epilog phase#, original register#) to new + // register# + SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap; + generateProlog(PrologVRMap); + generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts); + generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts); +} + +/// Replace MI's use operands according to the maps. +void ModuloScheduleExpanderMVE::updateInstrUse( + MachineInstr *MI, int StageNum, int PhaseNum, + SmallVectorImpl<ValueMapTy> &CurVRMap, + SmallVectorImpl<ValueMapTy> *PrevVRMap) { + // If MI is in the prolog/kernel/epilog block, CurVRMap is + // PrologVRMap/KernelVRMap/EpilogVRMap respectively. + // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively. + // Refer to the appropriate map according to the stage difference between + // MI and the definition of an operand. + + for (MachineOperand &UseMO : MI->uses()) { + if (!UseMO.isReg() || !UseMO.getReg().isVirtual()) + continue; + int DiffStage = 0; + Register OrigReg = UseMO.getReg(); + MachineInstr *DefInst = MRI.getVRegDef(OrigReg); + if (!DefInst || DefInst->getParent() != OrigKernel) + continue; + unsigned InitReg = 0; + unsigned DefReg = OrigReg; + if (DefInst->isPHI()) { + ++DiffStage; + unsigned LoopReg; + getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg); + // LoopReg is guaranteed to be defined within the loop by canApply() + DefReg = LoopReg; + DefInst = MRI.getVRegDef(LoopReg); + } + unsigned DefStageNum = Schedule.getStage(DefInst); + DiffStage += StageNum - DefStageNum; + Register NewReg; + if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg)) + // NewReg is defined in a previous phase of the same block + NewReg = CurVRMap[PhaseNum - DiffStage][DefReg]; + else if (!PrevVRMap) + // Since this is the first iteration, refer the initial register of the + // loop + NewReg = InitReg; + else + // Cases where DiffStage is larger than PhaseNum. + // If MI is in the kernel block, the value is defined by the previous + // iteration and PhiVRMap is referenced. If MI is in the epilog block, the + // value is defined in the kernel block and KernelVRMap is referenced. + NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg]; + + const TargetRegisterClass *NRC = + MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg)); + if (NRC) + UseMO.setReg(NewReg); + else { + Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg)); + BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), + SplitReg) + .addReg(NewReg); + UseMO.setReg(SplitReg); + } + } +} + +/// Return a phi if Reg is referenced by the phi. +/// canApply() guarantees that at most only one such phi exists. +static MachineInstr *getLoopPhiUser(Register Reg, MachineBasicBlock *Loop) { + for (MachineInstr &Phi : Loop->phis()) { + unsigned InitVal, LoopVal; + getPhiRegs(Phi, Loop, InitVal, LoopVal); + if (LoopVal == Reg) + return Φ + } + return nullptr; +} + +/// Generate phis for registers defined by OrigMI. +void ModuloScheduleExpanderMVE::generatePhi( + MachineInstr *OrigMI, int UnrollNum, + SmallVectorImpl<ValueMapTy> &PrologVRMap, + SmallVectorImpl<ValueMapTy> &KernelVRMap, + SmallVectorImpl<ValueMapTy> &PhiVRMap) { + int StageNum = Schedule.getStage(OrigMI); + bool UsePrologReg; + if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum) + UsePrologReg = true; + else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum) + UsePrologReg = false; + else + return; + + // Examples that show which stages are merged by phi. + // Meaning of the symbol following the stage number: + // a/b: Stages with the same letter are merged (UsePrologReg == true) + // +: Merged with the initial value (UsePrologReg == false) + // *: No phis required + // + // #Stages 3, #MVE 4 + // Iter 0 1 2 3 4 5 6 7 8 + // ----------------------------------------- + // Stage 0a Prolog#0 + // Stage 1a 0b Prolog#1 + // Stage 2* 1* 0* Kernel Unroll#0 + // Stage 2* 1* 0+ Kernel Unroll#1 + // Stage 2* 1+ 0a Kernel Unroll#2 + // Stage 2+ 1a 0b Kernel Unroll#3 + // + // #Stages 3, #MVE 2 + // Iter 0 1 2 3 4 5 6 7 8 + // ----------------------------------------- + // Stage 0a Prolog#0 + // Stage 1a 0b Prolog#1 + // Stage 2* 1+ 0a Kernel Unroll#0 + // Stage 2+ 1a 0b Kernel Unroll#1 + // + // #Stages 3, #MVE 1 + // Iter 0 1 2 3 4 5 6 7 8 + // ----------------------------------------- + // Stage 0* Prolog#0 + // Stage 1a 0b Prolog#1 + // Stage 2+ 1a 0b Kernel Unroll#0 + + for (MachineOperand &DefMO : OrigMI->defs()) { + if (!DefMO.isReg() || DefMO.isDead()) + continue; + Register OrigReg = DefMO.getReg(); + auto NewReg = KernelVRMap[UnrollNum].find(OrigReg); + if (NewReg == KernelVRMap[UnrollNum].end()) + continue; + Register CorrespondReg; + if (UsePrologReg) { + int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1; + CorrespondReg = PrologVRMap[PrologNum][OrigReg]; + } else { + MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel); + if (!Phi) + continue; + CorrespondReg = getInitPhiReg(*Phi, OrigKernel); + } + + assert(CorrespondReg.isValid()); + Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg)); + BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), PhiReg) + .addReg(NewReg->second) + .addMBB(NewKernel) + .addReg(CorrespondReg) + .addMBB(Prolog); + PhiVRMap[UnrollNum][OrigReg] = PhiReg; + } +} + +static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, + MachineBasicBlock *NewMBB) { + for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) { + if (Phi.getOperand(Idx).getReg() == OrigReg) { + Phi.getOperand(Idx).setReg(NewReg); + Phi.getOperand(Idx + 1).setMBB(NewMBB); + return; + } + } +} + +/// Generate phis that merge values from multiple routes +void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg, + Register NewReg) { + SmallVector<MachineOperand *> UsesAfterLoop; + SmallVector<MachineInstr *> LoopPhis; + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg), + E = MRI.use_end(); + I != E; ++I) { + MachineOperand &O = *I; + if (O.getParent()->getParent() != OrigKernel && + O.getParent()->getParent() != Prolog && + O.getParent()->getParent() != NewKernel && + O.getParent()->getParent() != Epilog) + UsesAfterLoop.push_back(&O); + if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI()) + LoopPhis.push_back(O.getParent()); + } + + // Merge the route that only execute the pipelined loop (when there are no + // remaining iterations) with the route that execute the original loop. + if (!UsesAfterLoop.empty()) { + Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg)); + BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), PhiReg) + .addReg(OrigReg) + .addMBB(OrigKernel) + .addReg(NewReg) + .addMBB(Epilog); + + for (MachineOperand *MO : UsesAfterLoop) + MO->setReg(PhiReg); + + if (!LIS.hasInterval(PhiReg)) + LIS.createEmptyInterval(PhiReg); + } + + // Merge routes from the pipelined loop and the bypassed route before the + // original loop + if (!LoopPhis.empty()) { + for (MachineInstr *Phi : LoopPhis) { + unsigned InitReg, LoopReg; + getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg); + Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg)); + BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(), + TII->get(TargetOpcode::PHI), NewInit) + .addReg(InitReg) + .addMBB(Check) + .addReg(NewReg) + .addMBB(Epilog); + replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader); + } + } +} + +void ModuloScheduleExpanderMVE::generateProlog( + SmallVectorImpl<ValueMapTy> &PrologVRMap) { + PrologVRMap.clear(); + PrologVRMap.resize(Schedule.getNumStages() - 1); + DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap; + for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1; + ++PrologNum) { + for (MachineInstr *MI : Schedule.getInstructions()) { + if (MI->isPHI()) + continue; + int StageNum = Schedule.getStage(MI); + if (StageNum > PrologNum) + continue; + MachineInstr *NewMI = cloneInstr(MI); + updateInstrDef(NewMI, PrologVRMap[PrologNum], false); + NewMIMap[NewMI] = {PrologNum, StageNum}; + Prolog->push_back(NewMI); + } + } + + for (auto I : NewMIMap) { + MachineInstr *MI = I.first; + int PrologNum = I.second.first; + int StageNum = I.second.second; + updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr); + } + + LLVM_DEBUG({ + dbgs() << "prolog:\n"; + Prolog->dump(); + }); +} + +void ModuloScheduleExpanderMVE::generateKernel( + SmallVectorImpl<ValueMapTy> &PrologVRMap, + SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) { + KernelVRMap.clear(); + KernelVRMap.resize(NumUnroll); + SmallVector<ValueMapTy> PhiVRMap; + PhiVRMap.resize(NumUnroll); + DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap; + DenseMap<MachineInstr *, MachineInstr *> MIMapLastStage0; + for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) { + for (MachineInstr *MI : Schedule.getInstructions()) { + if (MI->isPHI()) + continue; + int StageNum = Schedule.getStage(MI); + MachineInstr *NewMI = cloneInstr(MI); + if (UnrollNum == NumUnroll - 1) + LastStage0Insts[MI] = NewMI; + updateInstrDef(NewMI, KernelVRMap[UnrollNum], + (UnrollNum == NumUnroll - 1 && StageNum == 0)); + generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap); + NewMIMap[NewMI] = {UnrollNum, StageNum}; + NewKernel->push_back(NewMI); + } + } + + for (auto I : NewMIMap) { + MachineInstr *MI = I.first; + int UnrollNum = I.second.first; + int StageNum = I.second.second; + updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap); + } + + // If remaining trip count is greater than NumUnroll-1, loop continues + insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel, + *Epilog); + + LLVM_DEBUG({ + dbgs() << "kernel:\n"; + NewKernel->dump(); + }); +} + +void ModuloScheduleExpanderMVE::generateEpilog( + SmallVectorImpl<ValueMapTy> &KernelVRMap, + SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) { + EpilogVRMap.clear(); + EpilogVRMap.resize(Schedule.getNumStages() - 1); + DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap; + for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1; + ++EpilogNum) { + for (MachineInstr *MI : Schedule.getInstructions()) { + if (MI->isPHI()) + continue; + int StageNum = Schedule.getStage(MI); + if (StageNum <= EpilogNum) + continue; + MachineInstr *NewMI = cloneInstr(MI); + updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum); + NewMIMap[NewMI] = {EpilogNum, StageNum}; + Epilog->push_back(NewMI); + } + } + + for (auto I : NewMIMap) { + MachineInstr *MI = I.first; + int EpilogNum = I.second.first; + int StageNum = I.second.second; + updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap); + } + + // If there are remaining iterations, they are executed in the original loop. + // Instructions related to loop control, such as loop counter comparison, + // are indicated by shouldIgnoreForPipelining() and are assumed to be placed + // in stage 0. Thus, the map is for the last one in the kernel. + insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit); + + LLVM_DEBUG({ + dbgs() << "epilog:\n"; + Epilog->dump(); + }); +} + +/// Calculate the number of unroll required and set it to NumUnroll +void ModuloScheduleExpanderMVE::calcNumUnroll() { + DenseMap<MachineInstr *, unsigned> Inst2Idx; + NumUnroll = 1; + for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I) + Inst2Idx[Schedule.getInstructions()[I]] = I; + + for (MachineInstr *MI : Schedule.getInstructions()) { + if (MI->isPHI()) + continue; + int StageNum = Schedule.getStage(MI); + for (const MachineOperand &MO : MI->uses()) { + if (!MO.isReg() || !MO.getReg().isVirtual()) + continue; + MachineInstr *DefMI = MRI.getVRegDef(MO.getReg()); + if (DefMI->getParent() != OrigKernel) + continue; + + int NumUnrollLocal = 1; + if (DefMI->isPHI()) { + ++NumUnrollLocal; + // canApply() guarantees that DefMI is not phi and is an instruction in + // the loop + DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel)); + } + NumUnrollLocal += StageNum - Schedule.getStage(DefMI); + if (Inst2Idx[MI] <= Inst2Idx[DefMI]) + --NumUnrollLocal; + NumUnroll = std::max(NumUnroll, NumUnrollLocal); + } + } + LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n"); +} + +/// Create new virtual registers for definitions of NewMI and update NewMI. +/// If the definitions are referenced after the pipelined loop, phis are +/// created to merge with other routes. +void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI, + ValueMapTy &VRMap, + bool LastDef) { + for (MachineOperand &MO : NewMI->operands()) { + if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef()) + continue; + Register Reg = MO.getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(Reg); + Register NewReg = MRI.createVirtualRegister(RC); + MO.setReg(NewReg); + VRMap[Reg] = NewReg; + if (LastDef) + mergeRegUsesAfterPipeline(Reg, NewReg); + } +} + +void ModuloScheduleExpanderMVE::expand() { + OrigKernel = Schedule.getLoop()->getTopBlock(); + OrigPreheader = Schedule.getLoop()->getLoopPreheader(); + OrigExit = Schedule.getLoop()->getExitBlock(); + + LLVM_DEBUG(Schedule.dump()); + + generatePipelinedLoop(); +} + +/// Check if ModuloScheduleExpanderMVE can be applied to L +bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) { + if (!L.getExitBlock()) { + LLVM_DEBUG( + dbgs() << "Can not apply MVE expander: No single exit block.\n";); + return false; + } + + MachineBasicBlock *BB = L.getTopBlock(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + + // Put some constraints on the operands of the phis to simplify the + // transformation + DenseSet<unsigned> UsedByPhi; + for (MachineInstr &MI : BB->phis()) { + // Registers defined by phis must be used only inside the loop and be never + // used by phis. + for (MachineOperand &MO : MI.defs()) + if (MO.isReg()) + for (MachineInstr &Ref : MRI.use_instructions(MO.getReg())) + if (Ref.getParent() != BB || Ref.isPHI()) { + LLVM_DEBUG(dbgs() + << "Can not apply MVE expander: A phi result is " + "referenced outside of the loop or by phi.\n";); + return false; + } + + // A source register from the loop block must be defined inside the loop. + // A register defined inside the loop must be referenced by only one phi at + // most. + unsigned InitVal, LoopVal; + getPhiRegs(MI, MI.getParent(), InitVal, LoopVal); + if (!Register(LoopVal).isVirtual() || + MRI.getVRegDef(LoopVal)->getParent() != BB) { + LLVM_DEBUG( + dbgs() << "Can not apply MVE expander: A phi source value coming " + "from the loop is not defined in the loop.\n";); + return false; + } + if (UsedByPhi.count(LoopVal)) { + LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the " + "loop is referenced by two or more phis.\n";); + return false; + } + UsedByPhi.insert(LoopVal); + } + + return true; +} + //===----------------------------------------------------------------------===// // ModuloScheduleTestPass implementation //===----------------------------------------------------------------------===// @@ -2122,8 +2763,8 @@ public: void runOnLoop(MachineFunction &MF, MachineLoop &L); void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<LiveIntervals>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addRequired<LiveIntervalsWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -2133,13 +2774,13 @@ char ModuloScheduleTest::ID = 0; INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", "Modulo Schedule test pass", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test", "Modulo Schedule test pass", false, false) bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) { - MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); for (auto *L : MLI) { if (L->getTopBlock() != L->getBottomBlock()) continue; @@ -2169,7 +2810,7 @@ static void parseSymbolString(StringRef S, int &Cycle, int &Stage) { } void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) { - LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS(); MachineBasicBlock *BB = L.getTopBlock(); dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n"; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp index e8391afb8e3f..26857c6a4088 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp @@ -12,8 +12,6 @@ namespace llvm { DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) { - if (Translator) - S = Translator(S); auto I = Strings.insert({S, DwarfStringPoolEntry()}); auto &Entry = I.first->second; if (I.second || !Entry.isIndexed()) { @@ -28,9 +26,6 @@ DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) { StringRef NonRelocatableStringpool::internString(StringRef S) { DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed}; - if (Translator) - S = Translator(S); - auto InsertResult = Strings.insert({S, Entry}); return InsertResult.first->getKey(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp index 18f8c001bd78..e5f40771eda8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/PHIElimination.h" #include "PHIEliminationUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -47,14 +48,16 @@ using namespace llvm; #define DEBUG_TYPE "phi-node-elimination" static cl::opt<bool> -DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), - cl::Hidden, cl::desc("Disable critical edge splitting " - "during PHI elimination")); + DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), + cl::Hidden, + cl::desc("Disable critical edge splitting " + "during PHI elimination")); static cl::opt<bool> -SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), - cl::Hidden, cl::desc("Split all critical edges during " - "PHI elimination")); + SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), + cl::Hidden, + cl::desc("Split all critical edges during " + "PHI elimination")); static cl::opt<bool> NoPhiElimLiveOutEarlyExit( "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden, @@ -62,92 +65,143 @@ static cl::opt<bool> NoPhiElimLiveOutEarlyExit( namespace { - class PHIElimination : public MachineFunctionPass { - MachineRegisterInfo *MRI = nullptr; // Machine register information - LiveVariables *LV = nullptr; - LiveIntervals *LIS = nullptr; - - public: - static char ID; // Pass identification, replacement for typeid - - PHIElimination() : MachineFunctionPass(ID) { - initializePHIEliminationPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - - private: - /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions - /// in predecessor basic blocks. - bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); - - void LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator LastPHIIt); +class PHIEliminationImpl { + MachineRegisterInfo *MRI = nullptr; // Machine register information + LiveVariables *LV = nullptr; + LiveIntervals *LIS = nullptr; + MachineLoopInfo *MLI = nullptr; + MachineDominatorTree *MDT = nullptr; + + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + + void LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator LastPHIIt, + bool AllEdgesCritical); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + void analyzePHINodes(const MachineFunction &MF); + + /// Split critical edges where necessary for good coalescer performance. + bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, + MachineLoopInfo *MLI, + std::vector<SparseBitVector<>> *LiveInSets); + + // These functions are temporary abstractions around LiveVariables and + // LiveIntervals, so they can go away when LiveVariables does. + bool isLiveIn(Register Reg, const MachineBasicBlock *MBB); + bool isLiveOutPastPHIs(Register Reg, const MachineBasicBlock *MBB); + + using BBVRegPair = std::pair<unsigned, Register>; + using VRegPHIUse = DenseMap<BBVRegPair, unsigned>; + + // Count the number of non-undef PHI uses of each register in each BB. + VRegPHIUse VRegPHIUseCount; + + // Defs of PHI sources which are implicit_def. + SmallPtrSet<MachineInstr *, 4> ImpDefs; + + // Map reusable lowered PHI node -> incoming join register. + using LoweredPHIMap = + DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>; + LoweredPHIMap LoweredPHIs; + + MachineFunctionPass *P = nullptr; + MachineFunctionAnalysisManager *MFAM = nullptr; + +public: + PHIEliminationImpl(MachineFunctionPass *P) : P(P) { + auto *LVWrapper = P->getAnalysisIfAvailable<LiveVariablesWrapperPass>(); + auto *LISWrapper = P->getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); + auto *MLIWrapper = P->getAnalysisIfAvailable<MachineLoopInfoWrapperPass>(); + auto *MDTWrapper = + P->getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); + LV = LVWrapper ? &LVWrapper->getLV() : nullptr; + LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; + MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr; + MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; + } - /// analyzePHINodes - Gather information about the PHI nodes in - /// here. In particular, we want to map the number of uses of a virtual - /// register which is used in a PHI node. We map that to the BB the - /// vreg is coming from. This is used later to determine when the vreg - /// is killed in the BB. - void analyzePHINodes(const MachineFunction& MF); + PHIEliminationImpl(MachineFunction &MF, MachineFunctionAnalysisManager &AM) + : LV(AM.getCachedResult<LiveVariablesAnalysis>(MF)), + LIS(AM.getCachedResult<LiveIntervalsAnalysis>(MF)), + MLI(AM.getCachedResult<MachineLoopAnalysis>(MF)), + MDT(AM.getCachedResult<MachineDominatorTreeAnalysis>(MF)), MFAM(&AM) {} - /// Split critical edges where necessary for good coalescer performance. - bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - MachineLoopInfo *MLI, - std::vector<SparseBitVector<>> *LiveInSets); + bool run(MachineFunction &MF); +}; - // These functions are temporary abstractions around LiveVariables and - // LiveIntervals, so they can go away when LiveVariables does. - bool isLiveIn(Register Reg, const MachineBasicBlock *MBB); - bool isLiveOutPastPHIs(Register Reg, const MachineBasicBlock *MBB); +class PHIElimination : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid - using BBVRegPair = std::pair<unsigned, Register>; - using VRegPHIUse = DenseMap<BBVRegPair, unsigned>; + PHIElimination() : MachineFunctionPass(ID) { + initializePHIEliminationPass(*PassRegistry::getPassRegistry()); + } - // Count the number of non-undef PHI uses of each register in each BB. - VRegPHIUse VRegPHIUseCount; + bool runOnMachineFunction(MachineFunction &MF) override { + PHIEliminationImpl Impl(this); + return Impl.run(MF); + } - // Defs of PHI sources which are implicit_def. - SmallPtrSet<MachineInstr*, 4> ImpDefs; + MachineFunctionProperties getSetProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } - // Map reusable lowered PHI node -> incoming join register. - using LoweredPHIMap = - DenseMap<MachineInstr*, unsigned, MachineInstrExpressionTrait>; - LoweredPHIMap LoweredPHIs; - }; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; } // end anonymous namespace +PreservedAnalyses +PHIEliminationPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + PHIEliminationImpl Impl(MF, MFAM); + bool Changed = Impl.run(MF); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserve<LiveIntervalsAnalysis>(); + PA.preserve<LiveVariablesAnalysis>(); + PA.preserve<SlotIndexesAnalysis>(); + PA.preserve<MachineDominatorTreeAnalysis>(); + PA.preserve<MachineLoopAnalysis>(); + return PA; +} + STATISTIC(NumLowered, "Number of phis lowered"); STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; -char& llvm::PHIEliminationID = PHIElimination::ID; +char &llvm::PHIEliminationID = PHIElimination::ID; INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE, - "Eliminate PHI nodes for register allocation", - false, false) -INITIALIZE_PASS_DEPENDENCY(LiveVariables) + "Eliminate PHI nodes for register allocation", false, + false) +INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass) INITIALIZE_PASS_END(PHIElimination, DEBUG_TYPE, "Eliminate PHI nodes for register allocation", false, false) void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addUsedIfAvailable<LiveVariables>(); - AU.addPreserved<LiveVariables>(); - AU.addPreserved<SlotIndexes>(); - AU.addPreserved<LiveIntervals>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addUsedIfAvailable<LiveVariablesWrapperPass>(); + AU.addPreserved<LiveVariablesWrapperPass>(); + AU.addPreserved<SlotIndexesWrapperPass>(); + AU.addPreserved<LiveIntervalsWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } -bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { +bool PHIEliminationImpl::run(MachineFunction &MF) { MRI = &MF.getRegInfo(); - LV = getAnalysisIfAvailable<LiveVariables>(); - LIS = getAnalysisIfAvailable<LiveIntervals>(); bool Changed = false; @@ -182,7 +236,6 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { } } - MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); for (auto &MBB : MF) Changed |= SplitPHIEdges(MF, MBB, MLI, (LV ? &LiveInSets : nullptr)); } @@ -191,7 +244,8 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI->leaveSSA(); // Populate VRegPHIUseCount - analyzePHINodes(MF); + if (LV || LIS) + analyzePHINodes(MF); // Eliminate PHI instructions by inserting copies into predecessor blocks. for (auto &MBB : MF) @@ -215,9 +269,8 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { } // TODO: we should use the incremental DomTree updater here. - if (Changed) - if (auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>()) - MDT->getBase().recalculate(MF); + if (Changed && MDT) + MDT->getBase().recalculate(MF); LoweredPHIs.clear(); ImpDefs.clear(); @@ -230,17 +283,29 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in /// predecessor basic blocks. -bool PHIElimination::EliminatePHINodes(MachineFunction &MF, - MachineBasicBlock &MBB) { +bool PHIEliminationImpl::EliminatePHINodes(MachineFunction &MF, + MachineBasicBlock &MBB) { if (MBB.empty() || !MBB.front().isPHI()) - return false; // Quick exit for basic blocks without PHIs. + return false; // Quick exit for basic blocks without PHIs. // Get an iterator to the last PHI node. MachineBasicBlock::iterator LastPHIIt = - std::prev(MBB.SkipPHIsAndLabels(MBB.begin())); + std::prev(MBB.SkipPHIsAndLabels(MBB.begin())); + + // If all incoming edges are critical, we try to deduplicate identical PHIs so + // that we generate fewer copies. If at any edge is non-critical, we either + // have less than two predecessors (=> no PHIs) or a predecessor has only us + // as a successor (=> identical PHI node can't occur in different block). + bool AllEdgesCritical = MBB.pred_size() >= 2; + for (MachineBasicBlock *Pred : MBB.predecessors()) { + if (Pred->succ_size() < 2) { + AllEdgesCritical = false; + break; + } + } while (MBB.front().isPHI()) - LowerPHINode(MBB, LastPHIIt); + LowerPHINode(MBB, LastPHIIt, AllEdgesCritical); return true; } @@ -266,8 +331,9 @@ static bool allPhiOperandsUndefined(const MachineInstr &MPhi, return true; } /// LowerPHINode - Lower the PHI node at the top of the specified block. -void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator LastPHIIt) { +void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator LastPHIIt, + bool AllEdgesCritical) { ++NumLowered; MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt); @@ -283,7 +349,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Create a new register for the incoming PHI arguments. MachineFunction &MF = *MBB.getParent(); unsigned IncomingReg = 0; - bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? + bool EliminateNow = true; // delay elimination of nodes in LoweredPHIs + bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register @@ -294,25 +361,34 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // If all sources of a PHI node are implicit_def or undef uses, just emit an // implicit_def instead of a copy. PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); + TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); else { // Can we reuse an earlier PHI node? This only happens for critical edges, - // typically those created by tail duplication. - unsigned &entry = LoweredPHIs[MPhi]; - if (entry) { + // typically those created by tail duplication. Typically, an identical PHI + // node can't occur, so avoid hashing/storing such PHIs, which is somewhat + // expensive. + unsigned *Entry = nullptr; + if (AllEdgesCritical) + Entry = &LoweredPHIs[MPhi]; + if (Entry && *Entry) { // An identical PHI node was already lowered. Reuse the incoming register. - IncomingReg = entry; + IncomingReg = *Entry; reusedIncoming = true; ++NumReused; LLVM_DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for " << *MPhi); } else { const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); - entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); + IncomingReg = MF.getRegInfo().createVirtualRegister(RC); + if (Entry) { + EliminateNow = false; + *Entry = IncomingReg; + } } + // Give the target possiblity to handle special cases fallthrough otherwise - PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(), - IncomingReg, DestReg); + PHICopy = TII->createPHIDestinationCopy( + MBB, AfterPHIsIt, MPhi->getDebugLoc(), IncomingReg, DestReg); } if (MPhi->peekDebugInstrNum()) { @@ -339,8 +415,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // by default, so it's before the OldKill. But some Target hooks for // createPHIDestinationCopy() may modify the default insert position of // PHICopy. - for (auto I = MBB.SkipPHIsAndLabels(MBB.begin()), E = MBB.end(); - I != E; ++I) { + for (auto I = MBB.SkipPHIsAndLabels(MBB.begin()), E = MBB.end(); I != E; + ++I) { if (I == PHICopy) break; @@ -392,11 +468,10 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, LiveInterval &IncomingLI = LIS->getOrCreateEmptyInterval(IncomingReg); VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); if (!IncomingVNI) - IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, - LIS->getVNInfoAllocator()); - IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex, - DestCopyIndex.getRegSlot(), - IncomingVNI)); + IncomingVNI = + IncomingLI.getNextValue(MBBStartIndex, LIS->getVNInfoAllocator()); + IncomingLI.addSegment(LiveInterval::Segment( + MBBStartIndex, DestCopyIndex.getRegSlot(), IncomingVNI)); } LiveInterval &DestLI = LIS->getInterval(DestReg); @@ -445,34 +520,36 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { - if (!MPhi->getOperand(i).isUndef()) { - --VRegPHIUseCount[BBVRegPair( - MPhi->getOperand(i + 1).getMBB()->getNumber(), - MPhi->getOperand(i).getReg())]; + if (LV || LIS) { + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { + if (!MPhi->getOperand(i).isUndef()) { + --VRegPHIUseCount[BBVRegPair( + MPhi->getOperand(i + 1).getMBB()->getNumber(), + MPhi->getOperand(i).getReg())]; + } } } // Now loop over all of the incoming arguments, changing them to copy into the // IncomingReg register in the corresponding predecessor basic block. - SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; + SmallPtrSet<MachineBasicBlock *, 8> MBBsInsertedInto; for (int i = NumSrcs - 1; i >= 0; --i) { Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg(); - unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); - bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || - isImplicitlyDefined(SrcReg, *MRI); + unsigned SrcSubReg = MPhi->getOperand(i * 2 + 1).getSubReg(); + bool SrcUndef = MPhi->getOperand(i * 2 + 1).isUndef() || + isImplicitlyDefined(SrcReg, *MRI); assert(SrcReg.isVirtual() && "Machine PHI Operands must all be virtual registers!"); // Get the MachineBasicBlock equivalent of the BasicBlock that is the source // path the PHI. - MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); + MachineBasicBlock &opBlock = *MPhi->getOperand(i * 2 + 2).getMBB(); // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. if (!MBBsInsertedInto.insert(&opBlock).second) - continue; // If the copy has already been emitted, we're done. + continue; // If the copy has already been emitted, we're done. MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg); if (SrcRegDef && TII->isUnspillableTerminator(SrcRegDef)) { @@ -499,7 +576,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Find a safe location to insert the copy, this may be the first terminator // in the block (or end()). MachineBasicBlock::iterator InsertPos = - findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); + findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. MachineInstr *NewSrcInstr = nullptr; @@ -508,9 +585,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // The source register is undefined, so there is no need for a real // COPY, but we still need to ensure joint dominance by defs. // Insert an IMPLICIT_DEF instruction. - NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), - IncomingReg); + NewSrcInstr = + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg); // Clean up the old implicit-def, if there even was one. if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) @@ -549,7 +626,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineBasicBlock::iterator KillInst = opBlock.end(); for (MachineBasicBlock::iterator Term = InsertPos; Term != opBlock.end(); ++Term) { - if (Term->readsRegister(SrcReg)) + if (Term->readsRegister(SrcReg, /*TRI=*/nullptr)) KillInst = Term; } @@ -563,7 +640,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, --KillInst; if (KillInst->isDebugInstr()) continue; - if (KillInst->readsRegister(SrcReg)) + if (KillInst->readsRegister(SrcReg, /*TRI=*/nullptr)) break; } } else { @@ -571,7 +648,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, KillInst = NewSrcInstr; } } - assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); + assert(KillInst->readsRegister(SrcReg, /*TRI=*/nullptr) && + "Cannot find kill instruction"); // Finally, mark it killed. LV->addVirtualRegisterKilled(SrcReg, *KillInst); @@ -607,7 +685,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineBasicBlock::iterator KillInst = opBlock.end(); for (MachineBasicBlock::iterator Term = InsertPos; Term != opBlock.end(); ++Term) { - if (Term->readsRegister(SrcReg)) + if (Term->readsRegister(SrcReg, /*TRI=*/nullptr)) KillInst = Term; } @@ -621,7 +699,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, --KillInst; if (KillInst->isDebugInstr()) continue; - if (KillInst->readsRegister(SrcReg)) + if (KillInst->readsRegister(SrcReg, /*TRI=*/nullptr)) break; } } else { @@ -629,7 +707,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, KillInst = std::prev(InsertPos); } } - assert(KillInst->readsRegister(SrcReg) && + assert(KillInst->readsRegister(SrcReg, /*TRI=*/nullptr) && "Cannot find kill instruction"); SlotIndex LastUseIndex = LIS->getInstructionIndex(*KillInst); @@ -645,7 +723,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. - if (reusedIncoming || !IncomingReg) { + if (EliminateNow) { if (LIS) LIS->RemoveMachineInstrFromMaps(*MPhi); MF.deleteMachineInstr(MPhi); @@ -656,7 +734,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, /// particular, we want to map the number of uses of a virtual register which is /// used in a PHI node. We map that to the BB the vreg is coming from. This is /// used later to determine when the vreg is killed in the BB. -void PHIElimination::analyzePHINodes(const MachineFunction& MF) { +void PHIEliminationImpl::analyzePHINodes(const MachineFunction &MF) { for (const auto &MBB : MF) { for (const auto &BBI : MBB) { if (!BBI.isPHI()) @@ -672,12 +750,11 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) { } } -bool PHIElimination::SplitPHIEdges(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineLoopInfo *MLI, - std::vector<SparseBitVector<>> *LiveInSets) { +bool PHIEliminationImpl::SplitPHIEdges( + MachineFunction &MF, MachineBasicBlock &MBB, MachineLoopInfo *MLI, + std::vector<SparseBitVector<>> *LiveInSets) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad()) - return false; // Quick exit for basic blocks without PHIs. + return false; // Quick exit for basic blocks without PHIs. const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr; bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); @@ -687,7 +764,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { Register Reg = BBI->getOperand(i).getReg(); - MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); + MachineBasicBlock *PreMBB = BBI->getOperand(i + 1).getMBB(); // Is there a critical edge from PreMBB to MBB? if (PreMBB->succ_size() == 1) continue; @@ -742,7 +819,8 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, } if (!ShouldSplit && !SplitAllCriticalEdges) continue; - if (!PreMBB->SplitCriticalEdge(&MBB, *this, LiveInSets)) { + if (!(P ? PreMBB->SplitCriticalEdge(&MBB, *P, LiveInSets) + : PreMBB->SplitCriticalEdge(&MBB, *MFAM, LiveInSets))) { LLVM_DEBUG(dbgs() << "Failed to split critical edge.\n"); continue; } @@ -753,7 +831,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, return Changed; } -bool PHIElimination::isLiveIn(Register Reg, const MachineBasicBlock *MBB) { +bool PHIEliminationImpl::isLiveIn(Register Reg, const MachineBasicBlock *MBB) { assert((LV || LIS) && "isLiveIn() requires either LiveVariables or LiveIntervals"); if (LIS) @@ -762,15 +840,15 @@ bool PHIElimination::isLiveIn(Register Reg, const MachineBasicBlock *MBB) { return LV->isLiveIn(Reg, *MBB); } -bool PHIElimination::isLiveOutPastPHIs(Register Reg, - const MachineBasicBlock *MBB) { +bool PHIEliminationImpl::isLiveOutPastPHIs(Register Reg, + const MachineBasicBlock *MBB) { assert((LV || LIS) && "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals"); // LiveVariables considers uses in PHIs to be in the predecessor basic block, // so that a register used only in a PHI is not live out of the block. In - // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than - // in the predecessor basic block, so that a register used only in a PHI is live - // out of the block. + // contrast, LiveIntervals considers uses in PHIs to be on the edge rather + // than in the predecessor basic block, so that a register used only in a PHI + // is live out of the block. if (LIS) { const LiveInterval &LI = LIS->getInterval(Reg); for (const MachineBasicBlock *SI : MBB->successors()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp deleted file mode 100644 index 43b23368ead2..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp +++ /dev/null @@ -1,97 +0,0 @@ -//===-- ParallelCG.cpp ----------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines functions that can be used for parallel code generation. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/ParallelCG.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/MemoryBufferRef.h" -#include "llvm/Support/ThreadPool.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Utils/SplitModule.h" - -using namespace llvm; - -static void codegen(Module *M, llvm::raw_pwrite_stream &OS, - function_ref<std::unique_ptr<TargetMachine>()> TMFactory, - CodeGenFileType FileType) { - std::unique_ptr<TargetMachine> TM = TMFactory(); - assert(TM && "Failed to create target machine!"); - - legacy::PassManager CodeGenPasses; - if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType)) - report_fatal_error("Failed to setup codegen"); - CodeGenPasses.run(*M); -} - -void llvm::splitCodeGen( - Module &M, ArrayRef<llvm::raw_pwrite_stream *> OSs, - ArrayRef<llvm::raw_pwrite_stream *> BCOSs, - const std::function<std::unique_ptr<TargetMachine>()> &TMFactory, - CodeGenFileType FileType, bool PreserveLocals) { - assert(BCOSs.empty() || BCOSs.size() == OSs.size()); - - if (OSs.size() == 1) { - if (!BCOSs.empty()) - WriteBitcodeToFile(M, *BCOSs[0]); - codegen(&M, *OSs[0], TMFactory, FileType); - return; - } - - // Create ThreadPool in nested scope so that threads will be joined - // on destruction. - { - ThreadPool CodegenThreadPool(hardware_concurrency(OSs.size())); - int ThreadCount = 0; - - SplitModule( - M, OSs.size(), - [&](std::unique_ptr<Module> MPart) { - // We want to clone the module in a new context to multi-thread the - // codegen. We do it by serializing partition modules to bitcode - // (while still on the main thread, in order to avoid data races) and - // spinning up new threads which deserialize the partitions into - // separate contexts. - // FIXME: Provide a more direct way to do this in LLVM. - SmallString<0> BC; - raw_svector_ostream BCOS(BC); - WriteBitcodeToFile(*MPart, BCOS); - - if (!BCOSs.empty()) { - BCOSs[ThreadCount]->write(BC.begin(), BC.size()); - BCOSs[ThreadCount]->flush(); - } - - llvm::raw_pwrite_stream *ThreadOS = OSs[ThreadCount++]; - // Enqueue the task - CodegenThreadPool.async( - [TMFactory, FileType, ThreadOS](const SmallString<0> &BC) { - LLVMContext Ctx; - Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( - MemoryBufferRef(StringRef(BC.data(), BC.size()), - "<split-module>"), - Ctx); - if (!MOrErr) - report_fatal_error("Failed to read bitcode"); - std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); - - codegen(MPartInCtx.get(), *ThreadOS, TMFactory, FileType); - }, - // Pass BC using std::move to ensure that it get moved rather than - // copied into the thread's context. - std::move(BC)); - }, - PreserveLocals); - } -} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 76b3b16af16b..746ec0fa9da0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -169,11 +169,11 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); if (Aggressive) { - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); } } @@ -487,8 +487,8 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) @@ -615,8 +615,7 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, PHIBBs.insert(UI.getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - for (unsigned i = 0, e = Uses.size(); i != e; ++i) { - MachineOperand *UseMO = Uses[i]; + for (MachineOperand *UseMO : Uses) { MachineInstr *UseMI = UseMO->getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); if (PHIBBs.count(UseMBB)) @@ -1428,9 +1427,9 @@ bool PeepholeOptimizer::foldImmediate( continue; DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg); assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); - if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) { + if (TII->foldImmediate(MI, *II->second, Reg, MRI)) { ++NumImmFold; - // FoldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI + // foldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI // is not deleted, and we happened to get a same MI, we can delete MI and // replace its users. if (MRI->getVRegDef(Reg) && @@ -1577,7 +1576,7 @@ bool PeepholeOptimizer::findTargetRecurrence( return false; MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg)); - unsigned Idx = MI.findRegisterUseOperandIdx(Reg); + unsigned Idx = MI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr); // Only interested in recurrences whose instructions have only one def, which // is a virtual register. @@ -1670,8 +1669,9 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; - MLI = &getAnalysis<MachineLoopInfo>(); + DT = Aggressive ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree() + : nullptr; + MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); MF.setDelegate(this); bool Changed = false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp index ffd70a29f171..2f7cfdd275b4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -85,10 +85,10 @@ namespace { AU.setPreservesCFG(); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<TargetPassConfig>(); - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -279,7 +279,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { return false; TII = Fn.getSubtarget().getInstrInfo(); - MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 0777acf63318..19950f3eb67b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -230,6 +230,21 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const { break; } + case Intrinsic::memcpy_inline: { + // Only expand llvm.memcpy.inline with non-constant length in this + // codepath, leaving the current SelectionDAG expansion for constant + // length memcpy intrinsics undisturbed. + auto *Memcpy = cast<MemCpyInlineInst>(Inst); + if (isa<ConstantInt>(Memcpy->getLength())) + break; + + Function *ParentFunc = Memcpy->getFunction(); + const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); + expandMemCpyAsLoop(Memcpy, TTI); + Changed = true; + Memcpy->eraseFromParent(); + break; + } case Intrinsic::memmove: { auto *Memmove = cast<MemMoveInst>(Inst); Function *ParentFunc = Memmove->getFunction(); @@ -263,6 +278,19 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const { break; } + case Intrinsic::memset_inline: { + // Only expand llvm.memset.inline with non-constant length in this + // codepath, leaving the current SelectionDAG expansion for constant + // length memset intrinsics undisturbed. + auto *Memset = cast<MemSetInlineInst>(Inst); + if (isa<ConstantInt>(Memset->getLength())) + break; + + expandMemSetAsLoop(Memset); + Changed = true; + Memset->eraseFromParent(); + break; + } default: llvm_unreachable("unhandled intrinsic"); } @@ -278,8 +306,10 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { default: break; case Intrinsic::memcpy: + case Intrinsic::memcpy_inline: case Intrinsic::memmove: case Intrinsic::memset: + case Intrinsic::memset_inline: Changed |= expandMemIntrinsicUses(F); break; case Intrinsic::load_relative: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 8af17e63e25c..3db5e17615fd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -150,8 +150,8 @@ char &llvm::PrologEpilogCodeInserterID = PEI::ID; INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion & Frame Finalization", false, @@ -166,8 +166,8 @@ STATISTIC(NumBytesStackSpace, void PEI::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -228,9 +228,8 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); - // Calculate the MaxCallFrameSize and AdjustsStack variables for the - // function's frame information. Also eliminates call frame pseudo - // instructions. + // Calculate the MaxCallFrameSize value for the function's frame + // information. Also eliminates call frame pseudo instructions. calculateCallFrameInfo(MF); // Determine placement of CSR spill/restore code and prolog/epilog code: @@ -350,17 +349,13 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { return true; } -/// Calculate the MaxCallFrameSize and AdjustsStack -/// variables for the function's frame information and eliminate call frame -/// pseudo instructions. +/// Calculate the MaxCallFrameSize variable for the function's frame +/// information and eliminate call frame pseudo instructions. void PEI::calculateCallFrameInfo(MachineFunction &MF) { const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned MaxCallFrameSize = 0; - bool AdjustsStack = MFI.adjustsStack(); - // Get the function call frame set-up and tear-down instruction opcode unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); @@ -370,26 +365,15 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) { if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) return; + // (Re-)Compute the MaxCallFrameSize. + [[maybe_unused]] uint32_t MaxCFSIn = + MFI.isMaxCallFrameSizeComputed() ? MFI.getMaxCallFrameSize() : UINT32_MAX; std::vector<MachineBasicBlock::iterator> FrameSDOps; - for (MachineBasicBlock &BB : MF) - for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) - if (TII.isFrameInstr(*I)) { - unsigned Size = TII.getFrameSize(*I); - if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; - AdjustsStack = true; - FrameSDOps.push_back(I); - } else if (I->isInlineAsm()) { - // Some inline asm's need a stack frame, as indicated by operand 1. - unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); - if (ExtraInfo & InlineAsm::Extra_IsAlignStack) - AdjustsStack = true; - } - - assert(!MFI.isMaxCallFrameSizeComputed() || - (MFI.getMaxCallFrameSize() >= MaxCallFrameSize && - !(AdjustsStack && !MFI.adjustsStack()))); - MFI.setAdjustsStack(AdjustsStack); - MFI.setMaxCallFrameSize(MaxCallFrameSize); + MFI.computeMaxCallFrameSize(MF, &FrameSDOps); + assert(MFI.getMaxCallFrameSize() <= MaxCFSIn && + "Recomputing MaxCFS gave a larger value."); + assert((FrameSDOps.empty() || MF.getFrameInfo().adjustsStack()) && + "AdjustsStack not set in presence of a frame pseudo instruction."); if (TFI->canSimplifyCallFramePseudos(MF)) { // If call frames are not being included as part of the stack frame, and @@ -1460,7 +1444,7 @@ bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, // pointer as the base register. if (MI.getOpcode() == TargetOpcode::STATEPOINT) { assert((!MI.isDebugValue() || OpIdx == 0) && - "Frame indicies can only appear as the first operand of a " + "Frame indices can only appear as the first operand of a " "DBG_VALUE machine instruction"); Register Reg; MachineOperand &Offset = MI.getOperand(OpIdx + 1); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp index 0e1a2c921c5c..d102d22e87af 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/PseudoSourceValueManager.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -122,7 +123,12 @@ const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() { const PseudoSourceValue * PseudoSourceValueManager::getFixedStack(int FI) { - std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI]; + // Frame index is often continuously positive, but can be negative. Use + // zig-zag encoding for dense index into FSValues vector. + unsigned Idx = (2 * unsigned(FI)) ^ (FI >> (sizeof(FI) * 8 - 1)); + if (FSValues.size() <= Idx) + FSValues.resize(Idx + 1); + std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[Idx]; if (!V) V = std::make_unique<FixedStackPseudoSourceValue>(FI, TM); return V.get(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp index 6b2e69da76f2..ff0fd61078c0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp @@ -264,7 +264,7 @@ raw_ostream &operator<<(raw_ostream &OS, const Print<Block> &P) { MachineBasicBlock *BB = P.Obj.Addr->getCode(); unsigned NP = BB->pred_size(); std::vector<int> Ns; - auto PrintBBs = [&OS](std::vector<int> Ns) -> void { + auto PrintBBs = [&OS](const std::vector<int> &Ns) -> void { unsigned N = Ns.size(); for (int I : Ns) { OS << "%bb." << I; @@ -870,7 +870,7 @@ void DataFlowGraph::build(const Config &config) { std::set<RegisterId> BaseSet; if (BuildCfg.Classes.empty()) { // Insert every register. - for (unsigned R = 0, E = getPRI().getTRI().getNumRegs(); R != E; ++R) + for (unsigned R = 1, E = getPRI().getTRI().getNumRegs(); R != E; ++R) BaseSet.insert(R); } else { for (const TargetRegisterClass *RC : BuildCfg.Classes) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 61a668907be7..07fa92889d88 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" @@ -421,9 +421,9 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, return; VisitedBBs.insert(MBB); - LivePhysRegs LiveRegs(*TRI); + LiveRegUnits LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) + if (LiveRegs.available(PhysReg)) return; if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) @@ -469,11 +469,11 @@ MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI, bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, MCRegister PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); - LivePhysRegs LiveRegs(*TRI); + LiveRegUnits LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); // Yes if the register is live out of the basic block. - if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) + if (!LiveRegs.available(PhysReg)) return true; // Walk backwards through the block to see if the register is live at some @@ -481,7 +481,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, for (MachineInstr &Last : instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) { LiveRegs.stepBackward(Last); - if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) + if (!LiveRegs.available(PhysReg)) return InstIds.lookup(&Last) > InstIds.lookup(MI); } return false; @@ -504,9 +504,9 @@ bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI, bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); - LivePhysRegs LiveRegs(*TRI); + LiveRegUnits LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) + if (LiveRegs.available(PhysReg)) return false; auto Last = MBB->getLastNonDebugInstr(); @@ -525,9 +525,9 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, MachineInstr * ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const { - LivePhysRegs LiveRegs(*TRI); + LiveRegUnits LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); - if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg)) + if (LiveRegs.available(PhysReg)) return nullptr; auto Last = MBB->getLastNonDebugInstr(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp index 900f0e9079d6..60deb62bc908 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/Spiller.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -61,7 +62,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis, VRM = &vrm; LIS = &lis; Matrix = &mat; - MRI->freezeReservedRegs(vrm.getMachineFunction()); + MRI->freezeReservedRegs(); RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); } @@ -115,11 +116,8 @@ void RegAllocBase::allocatePhysRegs() { // selectOrSplit failed to find a register! // Probably caused by an inline asm. MachineInstr *MI = nullptr; - for (MachineRegisterInfo::reg_instr_iterator - I = MRI->reg_instr_begin(VirtReg->reg()), - E = MRI->reg_instr_end(); - I != E;) { - MI = &*(I++); + for (MachineInstr &MIR : MRI->reg_instructions(VirtReg->reg())) { + MI = &MIR; if (MI->isInlineAsm()) break; } @@ -132,7 +130,7 @@ void RegAllocBase::allocatePhysRegs() { MI->emitError("inline assembly requires more registers than available"); } else if (MI) { LLVMContext &Context = - MI->getParent()->getParent()->getMMI().getModule()->getContext(); + MI->getParent()->getParent()->getFunction().getContext(); Context.emitError("ran out of registers during register allocation"); } else { report_fatal_error("ran out of registers during register allocation"); @@ -181,8 +179,7 @@ void RegAllocBase::enqueue(const LiveInterval *LI) { if (VRM->hasPhys(Reg)) return; - const TargetRegisterClass &RC = *MRI->getRegClass(Reg); - if (ShouldAllocateClass(*TRI, RC)) { + if (shouldAllocateRegister(Reg)) { LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n'); enqueueImpl(LI); } else { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h index a8bf305a50c9..a1ede08a1535 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h @@ -37,6 +37,7 @@ #define LLVM_LIB_CODEGEN_REGALLOCBASE_H #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocCommon.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -68,22 +69,33 @@ protected: LiveIntervals *LIS = nullptr; LiveRegMatrix *Matrix = nullptr; RegisterClassInfo RegClassInfo; - const RegClassFilterFunc ShouldAllocateClass; +private: + /// Private, callees should go through shouldAllocateRegister + const RegAllocFilterFunc shouldAllocateRegisterImpl; + +protected: /// Inst which is a def of an original reg and whose defs are already all /// dead after remat is saved in DeadRemats. The deletion of such inst is /// postponed till all the allocations are done, so its remat expr is /// always available for the remat of all the siblings of the original reg. SmallPtrSet<MachineInstr *, 32> DeadRemats; - RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) : - ShouldAllocateClass(F) {} + RegAllocBase(const RegAllocFilterFunc F = nullptr) + : shouldAllocateRegisterImpl(F) {} virtual ~RegAllocBase() = default; // A RegAlloc pass should call this before allocatePhysRegs. void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat); + /// Get whether a given register should be allocated + bool shouldAllocateRegister(Register Reg) { + if (!shouldAllocateRegisterImpl) + return true; + return shouldAllocateRegisterImpl(*TRI, *MRI, Reg); + } + // The top-level driver. The output is a VirtRegMap that us updated with // physical register assignments. void allocatePhysRegs(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp index 666199139630..caf9c32a5a34 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "AllocationOrder.h" -#include "LiveDebugVariables.h" #include "RegAllocBase.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" @@ -74,7 +74,7 @@ class RABasic : public MachineFunctionPass, void LRE_WillShrinkVirtReg(Register) override; public: - RABasic(const RegClassFilterFunc F = allocateAllRegClasses); + RABasic(const RegAllocFilterFunc F = nullptr); /// Return the pass name. StringRef getPassName() const override { return "Basic Register Allocator"; } @@ -130,14 +130,14 @@ char &llvm::RABasicID = RABasic::ID; INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator", false, false) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_PASS_DEPENDENCY(MachineScheduler) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false, @@ -168,28 +168,26 @@ void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) { enqueue(&LI); } -RABasic::RABasic(RegClassFilterFunc F): - MachineFunctionPass(ID), - RegAllocBase(F) { -} +RABasic::RABasic(RegAllocFilterFunc F) + : MachineFunctionPass(ID), RegAllocBase(F) {} void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<AAResultsWrapperPass>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervalsWrapperPass>(); + AU.addPreserved<LiveIntervalsWrapperPass>(); + AU.addPreserved<SlotIndexesWrapperPass>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); AU.addRequiredID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); AU.addRequired<LiveRegMatrix>(); @@ -226,19 +224,17 @@ bool RABasic::spillInterferences(const LiveInterval &VirtReg, assert(!Intfs.empty() && "expected interference"); // Spill each interfering vreg allocated to PhysReg or an alias. - for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { - const LiveInterval &Spill = *Intfs[i]; - + for (const LiveInterval *Spill : Intfs) { // Skip duplicates. - if (!VRM->hasPhys(Spill.reg())) + if (!VRM->hasPhys(Spill->reg())) continue; // Deallocate the interfering vreg by removing it from the union. // A LiveInterval instance may not be in a union during modification! - Matrix->unassign(Spill); + Matrix->unassign(*Spill); // Spill the extracted interval. - LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); + LiveRangeEdit LRE(Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); } return true; @@ -312,10 +308,11 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { MF = &mf; RegAllocBase::init(getAnalysis<VirtRegMap>(), - getAnalysis<LiveIntervals>(), + getAnalysis<LiveIntervalsWrapperPass>().getLIS(), getAnalysis<LiveRegMatrix>()); - VirtRegAuxInfo VRAI(*MF, *LIS, *VRM, getAnalysis<MachineLoopInfo>(), - getAnalysis<MachineBlockFrequencyInfo>()); + VirtRegAuxInfo VRAI( + *MF, *LIS, *VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), + getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()); VRAI.calculateSpillWeightsAndHints(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI)); @@ -334,6 +331,6 @@ FunctionPass* llvm::createBasicRegisterAllocator() { return new RABasic(); } -FunctionPass* llvm::createBasicRegisterAllocator(RegClassFilterFunc F) { +FunctionPass *llvm::createBasicRegisterAllocator(RegAllocFilterFunc F) { return new RABasic(F); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp index 47ad9c168b92..a1dccc4d5972 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp index e81d47930136..6e5ce72240d2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegAllocFast.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" @@ -174,14 +175,12 @@ private: DenseMap<const MachineInstr *, uint64_t> Instr2PosIndex; }; -class RegAllocFast : public MachineFunctionPass { +class RegAllocFastImpl { public: - static char ID; - - RegAllocFast(const RegClassFilterFunc F = allocateAllRegClasses, - bool ClearVirtRegs_ = true) - : MachineFunctionPass(ID), ShouldAllocateClass(F), - StackSlotForVirtReg(-1), ClearVirtRegs(ClearVirtRegs_) {} + RegAllocFastImpl(const RegAllocFilterFunc F = nullptr, + bool ClearVirtRegs_ = true) + : ShouldAllocateRegisterImpl(F), StackSlotForVirtReg(-1), + ClearVirtRegs(ClearVirtRegs_) {} private: MachineFrameInfo *MFI = nullptr; @@ -189,7 +188,7 @@ private: const TargetRegisterInfo *TRI = nullptr; const TargetInstrInfo *TII = nullptr; RegisterClassInfo RegClassInfo; - const RegClassFilterFunc ShouldAllocateClass; + const RegAllocFilterFunc ShouldAllocateRegisterImpl; /// Basic block currently being allocated. MachineBasicBlock *MBB = nullptr; @@ -197,8 +196,6 @@ private: /// Maps virtual regs to the frame index where these values are spilled. IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; - bool ClearVirtRegs; - /// Everything we know about a live virtual register. struct LiveReg { MachineInstr *LastUse = nullptr; ///< Last instr to use reg. @@ -256,12 +253,23 @@ private: SmallVector<MachineInstr *, 32> Coalesced; - using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>; - /// Set of register units that are used in the current instruction, and so + /// Track register units that are used in the current instruction, and so /// cannot be allocated. - RegUnitSet UsedInInstr; - RegUnitSet PhysRegUses; - SmallVector<uint16_t, 8> DefOperandIndexes; + /// + /// In the first phase (tied defs/early clobber), we consider also physical + /// uses, afterwards, we don't. If the lowest bit isn't set, it's a solely + /// physical use (markPhysRegUsedInInstr), otherwise, it's a normal use. To + /// avoid resetting the entire vector after every instruction, we track the + /// instruction "generation" in the remaining 31 bits -- this means, that if + /// UsedInInstr[Idx] < InstrGen, the register unit is unused. InstrGen is + /// never zero and always incremented by two. + /// + /// Don't allocate inline storage: the number of register units is typically + /// quite large (e.g., AArch64 > 100, X86 > 200, AMDGPU > 1000). + uint32_t InstrGen; + SmallVector<unsigned, 0> UsedInInstr; + + SmallVector<unsigned, 8> DefOperandIndexes; // Register masks attached to the current instruction. SmallVector<const uint32_t *> RegMasks; @@ -274,7 +282,7 @@ private: /// Mark a physreg as used in this instruction. void markRegUsedInInstr(MCPhysReg PhysReg) { for (MCRegUnit Unit : TRI->regunits(PhysReg)) - UsedInInstr.insert(Unit); + UsedInInstr[Unit] = InstrGen | 1; } // Check if physreg is clobbered by instruction's regmask(s). @@ -288,26 +296,25 @@ private: bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const { if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg)) return true; - for (MCRegUnit Unit : TRI->regunits(PhysReg)) { - if (UsedInInstr.count(Unit)) - return true; - if (LookAtPhysRegUses && PhysRegUses.count(Unit)) + for (MCRegUnit Unit : TRI->regunits(PhysReg)) + if (UsedInInstr[Unit] >= (InstrGen | !LookAtPhysRegUses)) return true; - } return false; } /// Mark physical register as being used in a register use operand. /// This is only used by the special livethrough handling code. void markPhysRegUsedInInstr(MCPhysReg PhysReg) { - for (MCRegUnit Unit : TRI->regunits(PhysReg)) - PhysRegUses.insert(Unit); + for (MCRegUnit Unit : TRI->regunits(PhysReg)) { + assert(UsedInInstr[Unit] <= InstrGen && "non-phys use before phys use?"); + UsedInInstr[Unit] = InstrGen; + } } /// Remove mark of physical register being used in the instruction. void unmarkRegUsedInInstr(MCPhysReg PhysReg) { for (MCRegUnit Unit : TRI->regunits(PhysReg)) - UsedInInstr.erase(Unit); + UsedInInstr[Unit] = 0; } enum : unsigned { @@ -318,38 +325,14 @@ private: }; public: - StringRef getPassName() const override { return "Fast Register Allocator"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoPHIs); - } - - MachineFunctionProperties getSetProperties() const override { - if (ClearVirtRegs) { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - - return MachineFunctionProperties(); - } + bool ClearVirtRegs; - MachineFunctionProperties getClearedProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::IsSSA); - } + bool runOnMachineFunction(MachineFunction &MF); private: - bool runOnMachineFunction(MachineFunction &MF) override; - void allocateBasicBlock(MachineBasicBlock &MBB); - void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts, + void addRegClassDefCounts(MutableArrayRef<unsigned> RegClassDefCounts, Register Reg) const; void findAndSortDefOperandIndexes(const MachineInstr &MI); @@ -408,6 +391,46 @@ private: void dumpState() const; }; +class RegAllocFast : public MachineFunctionPass { + RegAllocFastImpl Impl; + +public: + static char ID; + + RegAllocFast(const RegAllocFilterFunc F = nullptr, bool ClearVirtRegs_ = true) + : MachineFunctionPass(ID), Impl(F, ClearVirtRegs_) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + return Impl.runOnMachineFunction(MF); + } + + StringRef getPassName() const override { return "Fast Register Allocator"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } + + MachineFunctionProperties getSetProperties() const override { + if (Impl.ClearVirtRegs) { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + return MachineFunctionProperties(); + } + + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; + } // end anonymous namespace char RegAllocFast::ID = 0; @@ -415,18 +438,20 @@ char RegAllocFast::ID = 0; INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, false) -bool RegAllocFast::shouldAllocateRegister(const Register Reg) const { +bool RegAllocFastImpl::shouldAllocateRegister(const Register Reg) const { assert(Reg.isVirtual()); - const TargetRegisterClass &RC = *MRI->getRegClass(Reg); - return ShouldAllocateClass(*TRI, RC); + if (!ShouldAllocateRegisterImpl) + return true; + + return ShouldAllocateRegisterImpl(*TRI, *MRI, Reg); } -void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { +void RegAllocFastImpl::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { for (MCRegUnit Unit : TRI->regunits(PhysReg)) RegUnitStates[Unit] = NewState; } -bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const { +bool RegAllocFastImpl::isPhysRegFree(MCPhysReg PhysReg) const { for (MCRegUnit Unit : TRI->regunits(PhysReg)) { if (RegUnitStates[Unit] != regFree) return false; @@ -436,7 +461,7 @@ bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const { /// This allocates space for the specified virtual register to be held on the /// stack. -int RegAllocFast::getStackSpaceFor(Register VirtReg) { +int RegAllocFastImpl::getStackSpaceFor(Register VirtReg) { // Find the location Reg would belong... int SS = StackSlotForVirtReg[VirtReg]; // Already has space allocated? @@ -464,7 +489,7 @@ static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, } /// Returns false if \p VirtReg is known to not live out of the current block. -bool RegAllocFast::mayLiveOut(Register VirtReg) { +bool RegAllocFastImpl::mayLiveOut(Register VirtReg) { if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) { // Cannot be live-out if there are no successors. return !MBB->succ_empty(); @@ -517,7 +542,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) { } /// Returns false if \p VirtReg is known to not be live into the current block. -bool RegAllocFast::mayLiveIn(Register VirtReg) { +bool RegAllocFastImpl::mayLiveIn(Register VirtReg) { if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) return !MBB->pred_empty(); @@ -536,8 +561,9 @@ bool RegAllocFast::mayLiveIn(Register VirtReg) { /// Insert spill instruction for \p AssignedReg before \p Before. Update /// DBG_VALUEs with \p VirtReg operands with the stack slot. -void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, - MCPhysReg AssignedReg, bool Kill, bool LiveOut) { +void RegAllocFastImpl::spill(MachineBasicBlock::iterator Before, + Register VirtReg, MCPhysReg AssignedReg, bool Kill, + bool LiveOut) { LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " in " << printReg(AssignedReg, TRI)); int FI = getStackSpaceFor(VirtReg); @@ -596,8 +622,8 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, } /// Insert reload instruction for \p PhysReg before \p Before. -void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg, - MCPhysReg PhysReg) { +void RegAllocFastImpl::reload(MachineBasicBlock::iterator Before, + Register VirtReg, MCPhysReg PhysReg) { LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " << printReg(PhysReg, TRI) << '\n'); int FI = getStackSpaceFor(VirtReg); @@ -610,7 +636,7 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg, /// This is not just MBB.begin() because surprisingly we have EH_LABEL /// instructions marking the begin of a basic block. This means we must insert /// new instructions after such labels... -MachineBasicBlock::iterator RegAllocFast::getMBBBeginInsertionPoint( +MachineBasicBlock::iterator RegAllocFastImpl::getMBBBeginInsertionPoint( MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const { MachineBasicBlock::iterator I = MBB.begin(); while (I != MBB.end()) { @@ -637,7 +663,7 @@ MachineBasicBlock::iterator RegAllocFast::getMBBBeginInsertionPoint( } /// Reload all currently assigned virtual registers. -void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) { +void RegAllocFastImpl::reloadAtBegin(MachineBasicBlock &MBB) { if (LiveVirtRegs.empty()) return; @@ -680,7 +706,7 @@ void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) { /// Handle the direct use of a physical register. Check that the register is /// not used by a virtreg. Kill the physreg, marking it free. This may add /// implicit kills to MO->getParent() and invalidate MO. -bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) { +bool RegAllocFastImpl::usePhysReg(MachineInstr &MI, MCPhysReg Reg) { assert(Register::isPhysicalRegister(Reg) && "expected physreg"); bool displacedAny = displacePhysReg(MI, Reg); setPhysRegState(Reg, regPreAssigned); @@ -688,7 +714,7 @@ bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) { return displacedAny; } -bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) { +bool RegAllocFastImpl::definePhysReg(MachineInstr &MI, MCPhysReg Reg) { bool displacedAny = displacePhysReg(MI, Reg); setPhysRegState(Reg, regPreAssigned); return displacedAny; @@ -697,7 +723,7 @@ bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) { /// Mark PhysReg as reserved or free after spilling any virtregs. This is very /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. -bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) { +bool RegAllocFastImpl::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) { bool displacedAny = false; for (MCRegUnit Unit : TRI->regunits(PhysReg)) { @@ -726,7 +752,7 @@ bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) { return displacedAny; } -void RegAllocFast::freePhysReg(MCPhysReg PhysReg) { +void RegAllocFastImpl::freePhysReg(MCPhysReg PhysReg) { LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':'); MCRegister FirstUnit = *TRI->regunits(PhysReg).begin(); @@ -753,7 +779,7 @@ void RegAllocFast::freePhysReg(MCPhysReg PhysReg) { /// for allocation. Returns 0 when PhysReg is free or disabled with all aliases /// disabled - it can be allocated directly. /// \returns spillImpossible when PhysReg or an alias can't be spilled. -unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { +unsigned RegAllocFastImpl::calcSpillCost(MCPhysReg PhysReg) const { for (MCRegUnit Unit : TRI->regunits(PhysReg)) { switch (unsigned VirtReg = RegUnitStates[Unit]) { case regFree: @@ -772,8 +798,9 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { return 0; } -void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition, - Register VirtReg, MCPhysReg Reg) { +void RegAllocFastImpl::assignDanglingDebugValues(MachineInstr &Definition, + Register VirtReg, + MCPhysReg Reg) { auto UDBGValIter = DanglingDbgValues.find(VirtReg); if (UDBGValIter == DanglingDbgValues.end()) return; @@ -809,8 +836,8 @@ void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition, /// This method updates local state so that we know that PhysReg is the /// proper container for VirtReg now. The physical register must not be used /// for anything else when this is called. -void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR, - MCPhysReg PhysReg) { +void RegAllocFastImpl::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR, + MCPhysReg PhysReg) { Register VirtReg = LR.VirtReg; LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to " << printReg(PhysReg, TRI) << '\n'); @@ -824,7 +851,7 @@ void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR, static bool isCoalescable(const MachineInstr &MI) { return MI.isFullCopy(); } -Register RegAllocFast::traceCopyChain(Register Reg) const { +Register RegAllocFastImpl::traceCopyChain(Register Reg) const { static const unsigned ChainLengthLimit = 3; unsigned C = 0; do { @@ -843,7 +870,7 @@ Register RegAllocFast::traceCopyChain(Register Reg) const { /// Check if any of \p VirtReg's definitions is a copy. If it is follow the /// chain of copies to check whether we reach a physical register we can /// coalesce with. -Register RegAllocFast::traceCopies(Register VirtReg) const { +Register RegAllocFastImpl::traceCopies(Register VirtReg) const { static const unsigned DefLimit = 3; unsigned C = 0; for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) { @@ -861,8 +888,8 @@ Register RegAllocFast::traceCopies(Register VirtReg) const { } /// Allocates a physical register for VirtReg. -void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0, - bool LookAtPhysRegUses) { +void RegAllocFastImpl::allocVirtReg(MachineInstr &MI, LiveReg &LR, + Register Hint0, bool LookAtPhysRegUses) { const Register VirtReg = LR.VirtReg; assert(LR.PhysReg == 0); @@ -950,7 +977,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0, assignVirtToPhysReg(MI, LR, BestReg); } -void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { +void RegAllocFastImpl::allocVirtRegUndef(MachineOperand &MO) { assert(MO.isUndef() && "expected undef use"); Register VirtReg = MO.getReg(); assert(VirtReg.isVirtual() && "Expected virtreg"); @@ -980,8 +1007,9 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { /// Variation of defineVirtReg() with special handling for livethrough regs /// (tied or earlyclobber) that may interfere with preassigned uses. /// \return true if MI's MachineOperands were re-arranged/invalidated. -bool RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, - Register VirtReg) { +bool RegAllocFastImpl::defineLiveThroughVirtReg(MachineInstr &MI, + unsigned OpNum, + Register VirtReg) { if (!shouldAllocateRegister(VirtReg)) return false; LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); @@ -1016,8 +1044,8 @@ bool RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, /// - The value is live out and all uses are in different basic blocks. /// /// \return true if MI's MachineOperands were re-arranged/invalidated. -bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, - Register VirtReg, bool LookAtPhysRegUses) { +bool RegAllocFastImpl::defineVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg, bool LookAtPhysRegUses) { assert(VirtReg.isVirtual() && "Not a virtual register"); if (!shouldAllocateRegister(VirtReg)) return false; @@ -1094,8 +1122,8 @@ bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, /// Allocates a register for a VirtReg use. /// \return true if MI's MachineOperands were re-arranged/invalidated. -bool RegAllocFast::useVirtReg(MachineInstr &MI, MachineOperand &MO, - Register VirtReg) { +bool RegAllocFastImpl::useVirtReg(MachineInstr &MI, MachineOperand &MO, + Register VirtReg) { assert(VirtReg.isVirtual() && "Not a virtual register"); if (!shouldAllocateRegister(VirtReg)) return false; @@ -1150,8 +1178,8 @@ bool RegAllocFast::useVirtReg(MachineInstr &MI, MachineOperand &MO, /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. /// \return true if MI's MachineOperands were re-arranged/invalidated. -bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, - MCPhysReg PhysReg) { +bool RegAllocFastImpl::setPhysReg(MachineInstr &MI, MachineOperand &MO, + MCPhysReg PhysReg) { if (!MO.getSubReg()) { MO.setReg(PhysReg); MO.setIsRenamable(true); @@ -1190,7 +1218,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, #ifndef NDEBUG -void RegAllocFast::dumpState() const { +void RegAllocFastImpl::dumpState() const { for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE; ++Unit) { switch (unsigned VirtReg = RegUnitStates[Unit]) { @@ -1235,8 +1263,8 @@ void RegAllocFast::dumpState() const { #endif /// Count number of defs consumed from each register class by \p Reg -void RegAllocFast::addRegClassDefCounts( - std::vector<unsigned> &RegClassDefCounts, Register Reg) const { +void RegAllocFastImpl::addRegClassDefCounts( + MutableArrayRef<unsigned> RegClassDefCounts, Register Reg) const { assert(RegClassDefCounts.size() == TRI->getNumRegClasses()); if (Reg.isVirtual()) { @@ -1269,13 +1297,9 @@ void RegAllocFast::addRegClassDefCounts( /// Compute \ref DefOperandIndexes so it contains the indices of "def" operands /// that are to be allocated. Those are ordered in a way that small classes, /// early clobbers and livethroughs are allocated first. -void RegAllocFast::findAndSortDefOperandIndexes(const MachineInstr &MI) { +void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) { DefOperandIndexes.clear(); - // Track number of defs which may consume a register from the class. - std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0); - assert(RegClassDefCounts[0] == 0); - LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n"); for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { const MachineOperand &MO = MI.getOperand(I); @@ -1289,15 +1313,27 @@ void RegAllocFast::findAndSortDefOperandIndexes(const MachineInstr &MI) { } } - if (MO.isDef()) { - if (Reg.isVirtual() && shouldAllocateRegister(Reg)) - DefOperandIndexes.push_back(I); - - addRegClassDefCounts(RegClassDefCounts, Reg); - } + if (MO.isDef() && Reg.isVirtual() && shouldAllocateRegister(Reg)) + DefOperandIndexes.push_back(I); } - llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) { + // Most instructions only have one virtual def, so there's no point in + // computing the possible number of defs for every register class. + if (DefOperandIndexes.size() <= 1) + return; + + // Track number of defs which may consume a register from the class. This is + // used to assign registers for possibly-too-small classes first. Example: + // defs are eax, 3 * gr32_abcd, 2 * gr32 => we want to assign the gr32_abcd + // registers first so that the gr32 don't use the gr32_abcd registers before + // we assign these. + SmallVector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0); + + for (const MachineOperand &MO : MI.operands()) + if (MO.isReg() && MO.isDef()) + addRegClassDefCounts(RegClassDefCounts, MO.getReg()); + + llvm::sort(DefOperandIndexes, [&](unsigned I0, unsigned I1) { const MachineOperand &MO0 = MI.getOperand(I0); const MachineOperand &MO1 = MI.getOperand(I1); Register Reg0 = MO0.getReg(); @@ -1343,7 +1379,7 @@ static bool isTiedToNotUndef(const MachineOperand &MO) { return !TiedMO.isUndef(); } -void RegAllocFast::allocateInstruction(MachineInstr &MI) { +void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) { // The basic algorithm here is: // 1. Mark registers of def operands as free // 2. Allocate registers to use operands and place reload instructions for @@ -1356,7 +1392,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // - The "free def operands" step has to come last instead of first for tied // operands and early-clobbers. - UsedInInstr.clear(); + InstrGen += 2; + // In the event we ever get more than 2**31 instructions... + if (LLVM_UNLIKELY(InstrGen == 0)) { + UsedInInstr.assign(UsedInInstr.size(), 0); + InstrGen = 2; + } RegMasks.clear(); BundleVirtRegsMap.clear(); @@ -1417,12 +1458,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // heuristic to figure out a good operand order before doing // assignments. if (NeedToAssignLiveThroughs) { - PhysRegUses.clear(); - while (ReArrangedImplicitOps) { ReArrangedImplicitOps = false; findAndSortDefOperandIndexes(MI); - for (uint16_t OpIdx : DefOperandIndexes) { + for (unsigned OpIdx : DefOperandIndexes) { MachineOperand &MO = MI.getOperand(OpIdx); LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); Register Reg = MO.getReg(); @@ -1605,7 +1644,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { } } -void RegAllocFast::handleDebugValue(MachineInstr &MI) { +void RegAllocFastImpl::handleDebugValue(MachineInstr &MI) { // Ignore DBG_VALUEs that aren't based on virtual registers. These are // mostly constants and frame indices. assert(MI.isDebugValue() && "not a DBG_VALUE*"); @@ -1648,7 +1687,7 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { } } -void RegAllocFast::handleBundle(MachineInstr &MI) { +void RegAllocFastImpl::handleBundle(MachineInstr &MI) { MachineBasicBlock::instr_iterator BundledMI = MI.getIterator(); ++BundledMI; while (BundledMI->isBundledWithPred()) { @@ -1671,7 +1710,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) { } } -void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { +void RegAllocFastImpl::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); @@ -1732,7 +1771,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { LLVM_DEBUG(MBB.dump()); } -bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { +bool RegAllocFastImpl::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" << "********** Function: " << MF.getName() << '\n'); MRI = &MF.getRegInfo(); @@ -1740,13 +1779,11 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { TRI = STI.getRegisterInfo(); TII = STI.getInstrInfo(); MFI = &MF.getFrameInfo(); - MRI->freezeReservedRegs(MF); + MRI->freezeReservedRegs(); RegClassInfo.runOnMachineFunction(MF); unsigned NumRegUnits = TRI->getNumRegUnits(); - UsedInInstr.clear(); - UsedInInstr.setUniverse(NumRegUnits); - PhysRegUses.clear(); - PhysRegUses.setUniverse(NumRegUnits); + InstrGen = 0; + UsedInInstr.assign(NumRegUnits, 0); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers @@ -1771,9 +1808,40 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { return true; } +PreservedAnalyses RegAllocFastPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + MFPropsModifier _(*this, MF); + RegAllocFastImpl Impl(Opts.Filter, Opts.ClearVRegs); + bool Changed = Impl.runOnMachineFunction(MF); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +void RegAllocFastPass::printPipeline( + raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { + bool PrintFilterName = Opts.FilterName != "all"; + bool PrintNoClearVRegs = !Opts.ClearVRegs; + bool PrintSemicolon = PrintFilterName && PrintNoClearVRegs; + + OS << "regallocfast"; + if (PrintFilterName || PrintNoClearVRegs) { + OS << '<'; + if (PrintFilterName) + OS << "filter=" << Opts.FilterName; + if (PrintSemicolon) + OS << ';'; + if (PrintNoClearVRegs) + OS << "no-clear-vregs"; + OS << '>'; + } +} + FunctionPass *llvm::createFastRegisterAllocator() { return new RegAllocFast(); } -FunctionPass *llvm::createFastRegisterAllocator(RegClassFilterFunc Ftor, +FunctionPass *llvm::createFastRegisterAllocator(RegAllocFilterFunc Ftor, bool ClearVirtRegs) { return new RegAllocFast(Ftor, ClearVirtRegs); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp index a208bf89fadf..5001b4fec58f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -14,7 +14,6 @@ #include "RegAllocGreedy.h" #include "AllocationOrder.h" #include "InterferenceCache.h" -#include "LiveDebugVariables.h" #include "RegAllocBase.h" #include "RegAllocEvictionAdvisor.h" #include "RegAllocPriorityAdvisor.h" @@ -31,6 +30,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -155,13 +155,13 @@ char &llvm::RAGreedyID = RAGreedy::ID; INITIALIZE_PASS_BEGIN(RAGreedy, "greedy", "Greedy Register Allocator", false, false) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_PASS_DEPENDENCY(MachineScheduler) INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) INITIALIZE_PASS_DEPENDENCY(EdgeBundles) @@ -192,31 +192,29 @@ FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) { +FunctionPass *llvm::createGreedyRegisterAllocator(RegAllocFilterFunc Ftor) { return new RAGreedy(Ftor); } -RAGreedy::RAGreedy(RegClassFilterFunc F): - MachineFunctionPass(ID), - RegAllocBase(F) { -} +RAGreedy::RAGreedy(RegAllocFilterFunc F) + : MachineFunctionPass(ID), RegAllocBase(F) {} void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addPreserved<MachineBlockFrequencyInfo>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); + AU.addRequired<LiveIntervalsWrapperPass>(); + AU.addPreserved<LiveIntervalsWrapperPass>(); + AU.addRequired<SlotIndexesWrapperPass>(); + AU.addPreserved<SlotIndexesWrapperPass>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); AU.addRequired<LiveRegMatrix>(); @@ -1664,8 +1662,8 @@ unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg, // Remove any gaps with regmask clobbers. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) - for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I) - GapWeight[RegMaskGaps[I]] = huge_valf; + for (unsigned Gap : RegMaskGaps) + GapWeight[Gap] = huge_valf; // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -2306,9 +2304,9 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) { if (Reg.isPhysical()) continue; - // This may be a skipped class + // This may be a skipped register. if (!VRM->hasPhys(Reg)) { - assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) && + assert(!shouldAllocateRegister(Reg) && "We have an unallocated variable which should have been handled"); continue; } @@ -2698,7 +2696,7 @@ bool RAGreedy::hasVirtRegAlloc() { const TargetRegisterClass *RC = MRI->getRegClass(Reg); if (!RC) continue; - if (ShouldAllocateClass(*TRI, *RC)) + if (shouldAllocateRegister(Reg)) return true; } @@ -2716,7 +2714,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { MF->verify(this, "Before greedy register allocator"); RegAllocBase::init(getAnalysis<VirtRegMap>(), - getAnalysis<LiveIntervals>(), + getAnalysis<LiveIntervalsWrapperPass>().getLIS(), getAnalysis<LiveRegMatrix>()); // Early return if there is no virtual register to be allocated to a @@ -2724,14 +2722,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { if (!hasVirtRegAlloc()) return false; - Indexes = &getAnalysis<SlotIndexes>(); + Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI(); // Renumber to get accurate and consistent results from // SlotIndexes::getApproxInstrDistance. Indexes->packIndexes(); - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); - DomTree = &getAnalysis<MachineDominatorTree>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); + DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); - Loops = &getAnalysis<MachineLoopInfo>(); + Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); Bundles = &getAnalysis<EdgeBundles>(); SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h index 1941643bba9e..2e7608a53e9c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h @@ -281,7 +281,7 @@ private: bool ReverseLocalAssignment = false; public: - RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); + RAGreedy(const RegAllocFilterFunc F = nullptr); /// Return the pass name. StringRef getPassName() const override { return "Greedy Register Allocator"; } @@ -425,7 +425,7 @@ private: ZeroCostFoldedReloads || Copies); } - void add(RAGreedyStats other) { + void add(const RAGreedyStats &other) { Reloads += other.Reloads; FoldedReloads += other.FoldedReloads; ZeroCostFoldedReloads += other.ZeroCostFoldedReloads; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp index b8ee5dc0f849..e6f28d6af29f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -120,8 +120,8 @@ public: /// Construct a PBQP register allocator. RegAllocPBQP(char *cPassID = nullptr) : MachineFunctionPass(ID), customPassID(cPassID) { - initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeSlotIndexesWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsWrapperPassPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); } @@ -544,21 +544,21 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesCFG(); au.addRequired<AAResultsWrapperPass>(); au.addPreserved<AAResultsWrapperPass>(); - au.addRequired<SlotIndexes>(); - au.addPreserved<SlotIndexes>(); - au.addRequired<LiveIntervals>(); - au.addPreserved<LiveIntervals>(); + au.addRequired<SlotIndexesWrapperPass>(); + au.addPreserved<SlotIndexesWrapperPass>(); + au.addRequired<LiveIntervalsWrapperPass>(); + au.addPreserved<LiveIntervalsWrapperPass>(); //au.addRequiredID(SplitCriticalEdgesID); if (customPassID) au.addRequiredID(*customPassID); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); - au.addRequired<MachineBlockFrequencyInfo>(); - au.addPreserved<MachineBlockFrequencyInfo>(); - au.addRequired<MachineLoopInfo>(); - au.addPreserved<MachineLoopInfo>(); - au.addRequired<MachineDominatorTree>(); - au.addPreserved<MachineDominatorTree>(); + au.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + au.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); + au.addRequired<MachineLoopInfoWrapperPass>(); + au.addPreserved<MachineLoopInfoWrapperPass>(); + au.addRequired<MachineDominatorTreeWrapperPass>(); + au.addPreserved<MachineDominatorTreeWrapperPass>(); au.addRequired<VirtRegMap>(); au.addPreserved<VirtRegMap>(); MachineFunctionPass::getAnalysisUsage(au); @@ -791,25 +791,26 @@ void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) { } bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { - LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS(); MachineBlockFrequencyInfo &MBFI = - getAnalysis<MachineBlockFrequencyInfo>(); + getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); VirtRegMap &VRM = getAnalysis<VirtRegMap>(); - PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI); + PBQPVirtRegAuxInfo VRAI( + MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI); VRAI.calculateSpillWeightsAndHints(); // FIXME: we create DefaultVRAI here to match existing behavior pre-passing // the VRAI through the spiller to the live range editor. However, it probably // makes more sense to pass the PBQP VRAI. The existing behavior had // LiveRangeEdit make its own VirtRegAuxInfo object. - VirtRegAuxInfo DefaultVRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), - MBFI); + VirtRegAuxInfo DefaultVRAI( + MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI); std::unique_ptr<Spiller> VRegSpiller( createInlineSpiller(*this, MF, VRM, DefaultVRAI)); - MF.getRegInfo().freezeReservedRegs(MF); + MF.getRegInfo().freezeReservedRegs(); LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp index e031019a4c91..0650aaff56ea 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp @@ -51,13 +51,13 @@ public: private: void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<SlotIndexes>(); + AU.addRequired<SlotIndexesWrapperPass>(); RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); } std::unique_ptr<RegAllocPriorityAdvisor> getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { return std::make_unique<DefaultPriorityAdvisor>( - MF, RA, &getAnalysis<SlotIndexes>()); + MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI()); } bool doInitialization(Module &M) override { if (NotAsRequested) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp index 6657cf3c1ef4..ca5e0b428c47 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -142,6 +142,9 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { RegMask[Reg / 32] &= ~(1u << Reg % 32); }; + // Don't include $noreg in any regmasks. + SetRegAsDefined(MCRegister::NoRegister); + // Some targets can clobber registers "inside" a call, typically in // linker-generated code. for (const MCPhysReg Reg : TRI->getIntraCallClobberedRegs(&MF)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp index 5548430d1b0a..72b07eb1902d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp @@ -484,9 +484,10 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { // the storage. However, right now we don't necessarily bump all // the types to storage size. For instance, we can consider // s16 G_AND legal whereas the storage size is going to be 32. - assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() && - "Types with difference size cannot be handled by the default " - "mapping"); + assert( + TypeSize::isKnownLE(OrigTy.getSizeInBits(), NewTy.getSizeInBits()) && + "Types with difference size cannot be handled by the default " + "mapping"); LLVM_DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to " << OrigTy); MRI.setType(NewReg, OrigTy); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp index 17a9f55cccc0..9312bc03bc52 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -80,10 +80,10 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { LastCalleeSavedRegs.clear(); // Build a CSRAlias map. Every CSR alias saves the last // overlapping CSR. - CalleeSavedAliases.assign(TRI->getNumRegs(), 0); + CalleeSavedAliases.assign(TRI->getNumRegUnits(), 0); for (const MCPhysReg *I = CSR; *I; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) - CalleeSavedAliases[*AI] = *I; + for (MCRegUnit U : TRI->regunits(*I)) + CalleeSavedAliases[U] = *I; LastCalleeSavedRegs.push_back(*I); } @@ -96,8 +96,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { for (const MCPhysReg *I = CSR; *I; ++I) for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) CSRHintsForAllocOrder[*AI] = STI.ignoreCSRForAllocationOrder(mf, *AI); - if (IgnoreCSRForAllocOrder.size() != CSRHintsForAllocOrder.size() || - IgnoreCSRForAllocOrder != CSRHintsForAllocOrder) { + if (IgnoreCSRForAllocOrder != CSRHintsForAllocOrder) { Update = true; IgnoreCSRForAllocOrder = CSRHintsForAllocOrder; } @@ -106,7 +105,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { // Different reserved registers? const BitVector &RR = MF->getRegInfo().getReservedRegs(); - if (Reserved.size() != RR.size() || RR != Reserved) { + if (RR != Reserved) { Update = true; Reserved = RR; } @@ -150,7 +149,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { uint8_t Cost = RegCosts[PhysReg]; MinCost = std::min(MinCost, Cost); - if (CalleeSavedAliases[PhysReg] && + if (getLastCalleeSavedAlias(PhysReg) && !STI.ignoreCSRForAllocationOrder(*MF, PhysReg)) // PhysReg aliases a CSR, save it for later. CSRAlias.push_back(PhysReg); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp index 7e9c992031f8..1c35a88b4dc4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -406,9 +406,9 @@ char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer", "Register Coalescer", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer", "Register Coalescer", false, false) @@ -588,11 +588,11 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AAResultsWrapperPass>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<LiveIntervalsWrapperPass>(); + AU.addPreserved<LiveIntervalsWrapperPass>(); + AU.addPreserved<SlotIndexesWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -723,7 +723,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true); + int UIdx = + ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), /*TRI=*/nullptr, true); if (UIdx != -1) { ValSEndInst->getOperand(UIdx).setIsKill(false); } @@ -848,7 +849,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return { false, false }; // If DefMI is a two-address instruction then commuting it will change the // destination register. - int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg()); + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg(), /*TRI=*/nullptr); assert(DefIdx != -1); unsigned UseOpIdx; if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) @@ -1338,14 +1339,13 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (SrcIdx && DstIdx) return false; - [[maybe_unused]] const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg(); + const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg(); const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { if (DstReg.isPhysical()) { Register NewDstReg = DstReg; - unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), - DefMI->getOperand(0).getSubReg()); + unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx); if (NewDstIdx) NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); @@ -1855,8 +1855,8 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); // Replace SrcReg with DstReg in all UseMI operands. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = UseMI->getOperand(Ops[i]); + for (unsigned Op : Ops) { + MachineOperand &MO = UseMI->getOperand(Op); // Adjust <undef> flags in case of sub-register joins. We don't want to // turn a full def into a read-modify-write sub-register def and vice @@ -4136,9 +4136,9 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { void RegisterCoalescer::coalesceLocals() { copyCoalesceWorkList(LocalWorkList); - for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) { - if (LocalWorkList[j]) - WorkList.push_back(LocalWorkList[j]); + for (MachineInstr *MI : LocalWorkList) { + if (MI) + WorkList.push_back(MI); } LocalWorkList.clear(); } @@ -4206,9 +4206,9 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { const TargetSubtargetInfo &STI = fn.getSubtarget(); TRI = STI.getRegisterInfo(); TII = STI.getInstrInfo(); - LIS = &getAnalysis<LiveIntervals>(); + LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - Loops = &getAnalysis<MachineLoopInfo>(); + Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); if (EnableGlobalCopies == cl::BOU_UNSET) JoinGlobalCopies = STI.enableJoinGlobalCopies(); else @@ -4248,8 +4248,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> // DPR inflation. array_pod_sort(InflateRegs.begin(), InflateRegs.end()); - InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), - InflateRegs.end()); + InflateRegs.erase(llvm::unique(InflateRegs), InflateRegs.end()); LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); for (Register Reg : InflateRegs) { @@ -4299,5 +4298,5 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { } void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { - LIS->print(O, m); + LIS->print(O); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp index f86aa3a16720..59a1911555e9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp @@ -64,7 +64,7 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, const MachineRegisterInfo &MRI, Register Reg, LaneBitmask PrevMask, LaneBitmask NewMask) { - //assert((NewMask & !PrevMask) == 0 && "Must not add bits"); + assert((NewMask & ~PrevMask).none() && "Must not add bits"); if (NewMask.any() || PrevMask.none()) return; @@ -617,17 +617,11 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, ++I; } } - for (auto *I = Uses.begin(); I != Uses.end();) { - LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit, - Pos.getBaseIndex()); - LaneBitmask LaneMask = I->LaneMask & LiveBefore; - if (LaneMask.none()) { - I = Uses.erase(I); - } else { - I->LaneMask = LaneMask; - ++I; - } - } + + // For uses just copy the information from LIS. + for (auto &[RegUnit, LaneMask] : Uses) + LaneMask = getLiveLanesAt(LIS, MRI, true, RegUnit, Pos.getBaseIndex()); + if (AddFlagsMI != nullptr) { for (const RegisterMaskPair &P : DeadDefs) { Register RegUnit = P.RegUnit; @@ -879,7 +873,7 @@ void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) { const MachineInstr &MI = *CurrPos; RegisterOperands RegOpers; - RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false); + RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/false); if (TrackLaneMasks) { SlotIndex SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot(); RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); @@ -1047,7 +1041,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Account for register pressure similar to RegPressureTracker::recede(). RegisterOperands RegOpers; RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/true); - assert(RegOpers.DeadDefs.size() == 0); + assert(RegOpers.DeadDefs.empty()); if (TrackLaneMasks) RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); else if (RequireIntervals) @@ -1060,18 +1054,27 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Kill liveness at live defs. for (const RegisterMaskPair &P : RegOpers.Defs) { Register Reg = P.RegUnit; - LaneBitmask LiveLanes = LiveRegs.contains(Reg); + LaneBitmask LiveAfter = LiveRegs.contains(Reg); LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg); LaneBitmask DefLanes = P.LaneMask; - LaneBitmask LiveAfter = (LiveLanes & ~DefLanes) | UseLanes; - decreaseRegPressure(Reg, LiveLanes, LiveAfter); + LaneBitmask LiveBefore = (LiveAfter & ~DefLanes) | UseLanes; + + // There may be parts of the register that were dead before the + // instruction, but became live afterwards. Similarly, some parts + // may have been killed in this instruction. + decreaseRegPressure(Reg, LiveAfter, LiveAfter & LiveBefore); + increaseRegPressure(Reg, LiveAfter, ~LiveAfter & LiveBefore); } // Generate liveness for uses. for (const RegisterMaskPair &P : RegOpers.Uses) { Register Reg = P.RegUnit; - LaneBitmask LiveLanes = LiveRegs.contains(Reg); - LaneBitmask LiveAfter = LiveLanes | P.LaneMask; - increaseRegPressure(Reg, LiveLanes, LiveAfter); + // If this register was also in a def operand, we've handled it + // with defs. + if (getRegLanes(RegOpers.Defs, Reg).any()) + continue; + LaneBitmask LiveAfter = LiveRegs.contains(Reg); + LaneBitmask LiveBefore = LiveAfter | P.LaneMask; + increaseRegPressure(Reg, LiveAfter, LiveBefore); } } @@ -1285,9 +1288,9 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { if (RequireIntervals) SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); - // Account for register pressure similar to RegPressureTracker::recede(). + // Account for register pressure similar to RegPressureTracker::advance(). RegisterOperands RegOpers; - RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, false); + RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/false); if (TrackLaneMasks) RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index bc3ef1c0329a..0128f87748a7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -54,10 +54,10 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervalsWrapperPass>(); + AU.addPreserved<LiveIntervalsWrapperPass>(); + AU.addRequired<SlotIndexesWrapperPass>(); + AU.addPreserved<SlotIndexesWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -114,8 +114,8 @@ char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID; INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, DEBUG_TYPE, "Rename Independent Subregisters", false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_END(RenameIndependentSubregs, DEBUG_TYPE, "Rename Independent Subregisters", false, false) @@ -334,10 +334,17 @@ void RenameIndependentSubregs::computeMainRangesFixFlags( DebugLoc(), MCDesc, Reg); SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef); SlotIndex RegDefIdx = DefIdx.getRegSlot(); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(Reg); for (LiveInterval::SubRange &SR : LI.subranges()) { + Mask = Mask & ~SR.LaneMask; VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator); SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI)); } + + if (!Mask.none()) { + LiveInterval::SubRange *SR = LI.createSubRange(Allocator, Mask); + SR->createDeadDef(RegDefIdx, Allocator); + } } } } @@ -383,7 +390,7 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Renaming independent subregister live ranges in " << MF.getName() << '\n'); - LIS = &getAnalysis<LiveIntervals>(); + LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); TII = MF.getSubtarget().getInstrInfo(); // Iterate over all vregs. Note that we query getNumVirtRegs() the newly diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 432c63fb65f4..9fbb7b461364 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -6,9 +6,8 @@ // //===----------------------------------------------------------------------===// // -// Replaces LLVM IR instructions with vector operands (i.e., the frem -// instruction or calls to LLVM intrinsics) with matching calls to functions -// from a vector library (e.g libmvec, SVML) using TargetLibraryInfo interface. +// Replaces calls to LLVM Intrinsics with matching calls to functions from a +// vector library (e.g libmvec, SVML) using TargetLibraryInfo interface. // //===----------------------------------------------------------------------===// @@ -25,6 +24,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/VFABIDemangler.h" #include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -70,84 +70,68 @@ Function *getTLIFunction(Module *M, FunctionType *VectorFTy, return TLIFunc; } -/// Replace the instruction \p I with a call to the corresponding function from -/// the vector library (\p TLIVecFunc). -static void replaceWithTLIFunction(Instruction &I, VFInfo &Info, +/// Replace the intrinsic call \p II to \p TLIVecFunc, which is the +/// corresponding function from the vector library. +static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info, Function *TLIVecFunc) { - IRBuilder<> IRBuilder(&I); - auto *CI = dyn_cast<CallInst>(&I); - SmallVector<Value *> Args(CI ? CI->args() : I.operands()); + IRBuilder<> IRBuilder(II); + SmallVector<Value *> Args(II->args()); if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) { auto *MaskTy = - VectorType::get(Type::getInt1Ty(I.getContext()), Info.Shape.VF); + VectorType::get(Type::getInt1Ty(II->getContext()), Info.Shape.VF); Args.insert(Args.begin() + OptMaskpos.value(), Constant::getAllOnesValue(MaskTy)); } - // If it is a call instruction, preserve the operand bundles. + // Preserve the operand bundles. SmallVector<OperandBundleDef, 1> OpBundles; - if (CI) - CI->getOperandBundlesAsDefs(OpBundles); + II->getOperandBundlesAsDefs(OpBundles); auto *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles); - I.replaceAllUsesWith(Replacement); + II->replaceAllUsesWith(Replacement); // Preserve fast math flags for FP math. if (isa<FPMathOperator>(Replacement)) - Replacement->copyFastMathFlags(&I); + Replacement->copyFastMathFlags(II); } -/// Returns true when successfully replaced \p I with a suitable function taking -/// vector arguments, based on available mappings in the \p TLI. Currently only -/// works when \p I is a call to vectorized intrinsic or the frem instruction. +/// Returns true when successfully replaced \p II, which is a call to a +/// vectorized intrinsic, with a suitable function taking vector arguments, +/// based on available mappings in the \p TLI. static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, - Instruction &I) { + IntrinsicInst *II) { + assert(II != nullptr && "Intrinsic cannot be null"); // At the moment VFABI assumes the return type is always widened unless it is // a void type. - auto *VTy = dyn_cast<VectorType>(I.getType()); + auto *VTy = dyn_cast<VectorType>(II->getType()); ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0)); - - // Compute the argument types of the corresponding scalar call and the scalar - // function name. For calls, it additionally finds the function to replace - // and checks that all vector operands match the previously found EC. + // Compute the argument types of the corresponding scalar call and check that + // all vector operands match the previously found EC. SmallVector<Type *, 8> ScalarArgTypes; - std::string ScalarName; - Function *FuncToReplace = nullptr; - auto *CI = dyn_cast<CallInst>(&I); - if (CI) { - FuncToReplace = CI->getCalledFunction(); - Intrinsic::ID IID = FuncToReplace->getIntrinsicID(); - assert(IID != Intrinsic::not_intrinsic && "Not an intrinsic"); - for (auto Arg : enumerate(CI->args())) { - auto *ArgTy = Arg.value()->getType(); - if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) { - ScalarArgTypes.push_back(ArgTy); - } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) { - ScalarArgTypes.push_back(VectorArgTy->getElementType()); - // When return type is void, set EC to the first vector argument, and - // disallow vector arguments with different ECs. - if (EC.isZero()) - EC = VectorArgTy->getElementCount(); - else if (EC != VectorArgTy->getElementCount()) - return false; - } else - // Exit when it is supposed to be a vector argument but it isn't. + Intrinsic::ID IID = II->getIntrinsicID(); + for (auto Arg : enumerate(II->args())) { + auto *ArgTy = Arg.value()->getType(); + if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) { + ScalarArgTypes.push_back(ArgTy); + } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) { + ScalarArgTypes.push_back(VectorArgTy->getElementType()); + // When return type is void, set EC to the first vector argument, and + // disallow vector arguments with different ECs. + if (EC.isZero()) + EC = VectorArgTy->getElementCount(); + else if (EC != VectorArgTy->getElementCount()) return false; - } - // Try to reconstruct the name for the scalar version of the instruction, - // using scalar argument types. - ScalarName = Intrinsic::isOverloaded(IID) - ? Intrinsic::getName(IID, ScalarArgTypes, I.getModule()) - : Intrinsic::getName(IID).str(); - } else { - assert(VTy && "Return type must be a vector"); - auto *ScalarTy = VTy->getScalarType(); - LibFunc Func; - if (!TLI.getLibFunc(I.getOpcode(), ScalarTy, Func)) + } else + // Exit when it is supposed to be a vector argument but it isn't. return false; - ScalarName = TLI.getName(Func); - ScalarArgTypes = {ScalarTy, ScalarTy}; } + // Try to reconstruct the name for the scalar version of the instruction, + // using scalar argument types. + std::string ScalarName = + Intrinsic::isOverloaded(IID) + ? Intrinsic::getName(IID, ScalarArgTypes, II->getModule()) + : Intrinsic::getName(IID).str(); + // Try to find the mapping for the scalar version of this intrinsic and the // exact vector width of the call operands in the TargetLibraryInfo. First, // check with a non-masked variant, and if that fails try with a masked one. @@ -162,7 +146,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, // Replace the call to the intrinsic with a call to the vector library // function. - Type *ScalarRetTy = I.getType()->getScalarType(); + Type *ScalarRetTy = II->getType()->getScalarType(); FunctionType *ScalarFTy = FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false); const std::string MangledName = VD->getVectorFunctionABIVariantString(); @@ -174,22 +158,19 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, // specification when being created, this is why we need to add extra check to // make sure that the operands of the vector function obtained via VFABI match // the operands of the original vector instruction. - if (CI) { - for (auto VFParam : OptInfo->Shape.Parameters) { - if (VFParam.ParamKind == VFParamKind::GlobalPredicate) - continue; + for (auto &VFParam : OptInfo->Shape.Parameters) { + if (VFParam.ParamKind == VFParamKind::GlobalPredicate) + continue; - // tryDemangleForVFABI must return valid ParamPos, otherwise it could be - // a bug in the VFABI parser. - assert(VFParam.ParamPos < CI->arg_size() && - "ParamPos has invalid range."); - Type *OrigTy = CI->getArgOperand(VFParam.ParamPos)->getType(); - if (OrigTy->isVectorTy() != (VFParam.ParamKind == VFParamKind::Vector)) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Will not replace: " << ScalarName - << ". Wrong type at index " << VFParam.ParamPos - << ": " << *OrigTy << "\n"); - return false; - } + // tryDemangleForVFABI must return valid ParamPos, otherwise it could be + // a bug in the VFABI parser. + assert(VFParam.ParamPos < II->arg_size() && "ParamPos has invalid range"); + Type *OrigTy = II->getArgOperand(VFParam.ParamPos)->getType(); + if (OrigTy->isVectorTy() != (VFParam.ParamKind == VFParamKind::Vector)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Will not replace: " << ScalarName + << ". Wrong type at index " << VFParam.ParamPos << ": " + << *OrigTy << "\n"); + return false; } } @@ -197,45 +178,32 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, if (!VectorFTy) return false; - Function *TLIFunc = getTLIFunction(I.getModule(), VectorFTy, - VD->getVectorFnName(), FuncToReplace); - - replaceWithTLIFunction(I, *OptInfo, TLIFunc); + Function *TLIFunc = + getTLIFunction(II->getModule(), VectorFTy, VD->getVectorFnName(), + II->getCalledFunction()); + replaceWithTLIFunction(II, *OptInfo, TLIFunc); LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName << "` with call to `" << TLIFunc->getName() << "`.\n"); ++NumCallsReplaced; return true; } -/// Supported instruction \p I must be a vectorized frem or a call to an -/// intrinsic that returns either void or a vector. -static bool isSupportedInstruction(Instruction *I) { - Type *Ty = I->getType(); - if (auto *CI = dyn_cast<CallInst>(I)) - return (Ty->isVectorTy() || Ty->isVoidTy()) && CI->getCalledFunction() && - CI->getCalledFunction()->getIntrinsicID() != - Intrinsic::not_intrinsic; - if (I->getOpcode() == Instruction::FRem && Ty->isVectorTy()) - return true; - return false; -} - static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { - bool Changed = false; SmallVector<Instruction *> ReplacedCalls; for (auto &I : instructions(F)) { - if (!isSupportedInstruction(&I)) - continue; - if (replaceWithCallToVeclib(TLI, I)) { - ReplacedCalls.push_back(&I); - Changed = true; + // Process only intrinsic calls that return void or a vector. + if (auto *II = dyn_cast<IntrinsicInst>(&I)) { + if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy()) + continue; + + if (replaceWithCallToVeclib(TLI, II)) + ReplacedCalls.push_back(&I); } } - // Erase the calls to the intrinsics that have been replaced - // with calls to the vector library. - for (auto *CI : ReplacedCalls) - CI->eraseFromParent(); - return Changed; + // Erase any intrinsic calls that were replaced with vector library calls. + for (auto *I : ReplacedCalls) + I->eraseFromParent(); + return !ReplacedCalls.empty(); } //////////////////////////////////////////////////////////////////////////////// @@ -246,7 +214,7 @@ PreservedAnalyses ReplaceWithVeclib::run(Function &F, const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto Changed = runImpl(TLI, F); if (Changed) { - LLVM_DEBUG(dbgs() << "Instructions replaced with vector libraries: " + LLVM_DEBUG(dbgs() << "Intrinsic calls replaced with vector libraries: " << NumCallsReplaced << "\n"); PreservedAnalyses PA; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp index 0a26247a4d16..e41d1bfb0e53 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp @@ -886,7 +886,7 @@ public: if (!TL) report_fatal_error("TargetLowering instance is required"); - auto *DL = &F.getParent()->getDataLayout(); + auto *DL = &F.getDataLayout(); auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); auto &ACT = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); @@ -946,7 +946,7 @@ PreservedAnalyses SafeStackPass::run(Function &F, if (!TL) report_fatal_error("TargetLowering instance is required"); - auto &DL = F.getParent()->getDataLayout(); + auto &DL = F.getDataLayout(); // preserve DominatorTree auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h index 6126c7a67854..8614c72f3050 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h @@ -12,6 +12,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/StackLifetime.h" +#include "llvm/Support/Alignment.h" namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp index de8e6f63794d..8d9a5041fc2f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -331,8 +331,10 @@ void SUnit::biasCriticalPath() { unsigned MaxDepth = BestI->getSUnit()->getDepth(); for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E; ++I) { - if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) + if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) { + MaxDepth = I->getSUnit()->getDepth(); BestI = I; + } } if (BestI != Preds.begin()) std::swap(*Preds.begin(), *BestI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 0190fa345eb3..68dece6cf73e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -282,7 +282,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { } else { Dep.setLatency(0); } - ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep); + ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep, &SchedModel); UseSU->addPred(Dep); } } @@ -323,7 +323,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { Dep.setLatency( SchedModel.computeOutputLatency(MI, OperIdx, DefInstr)); } - ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep); + ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep, + &SchedModel); DefSU->addPred(Dep); } } @@ -453,7 +454,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { SDep Dep(SU, SDep::Data, Reg); Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, I->OperandIndex)); - ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep); + ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep, + &SchedModel); UseSU->addPred(Dep); } @@ -1103,7 +1105,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, dbgs() << "Loading SUnits:\n"; loads.dump()); } -static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, +static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs, MachineInstr &MI, bool addToLiveRegs) { for (MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.readsReg()) @@ -1113,8 +1115,10 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, continue; // Things that are available after the instruction are killed by it. - bool IsKill = LiveRegs.available(MRI, Reg); - MO.setIsKill(IsKill); + bool IsKill = LiveRegs.available(Reg); + + // Exception: Do not kill reserved registers + MO.setIsKill(IsKill && !MRI.isReserved(Reg)); if (addToLiveRegs) LiveRegs.addReg(Reg); } @@ -1144,7 +1148,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { continue; LiveRegs.removeReg(Reg); } else if (MO.isRegMask()) { - LiveRegs.removeRegsInMask(MO); + LiveRegs.removeRegsNotPreserved(MO.getRegMask()); } } @@ -1202,7 +1206,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { oss << "<exit>"; else SU->getInstr()->print(oss, /*IsStandalone=*/true); - return oss.str(); + return s; } /// Return the basic block label. It is not necessarilly unique because a block diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp index 9c720864358e..61341e1f2d04 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp @@ -130,7 +130,11 @@ public: class SelectLike { SelectLike(Instruction *I) : I(I) {} + /// The select (/or) instruction. Instruction *I; + /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as + /// opposed to the original condition. + bool Inverted = false; public: /// Match a select or select-like instruction, returning a SelectLike. @@ -153,14 +157,22 @@ public: bool isValid() { return I; } operator bool() { return isValid(); } + /// Invert the select by inverting the condition and switching the operands. + void setInverted() { + assert(!Inverted && "Trying to invert an inverted SelectLike"); + assert(isa<Instruction>(getCondition()) && + cast<Instruction>(getCondition())->getOpcode() == + Instruction::Xor); + Inverted = true; + } + bool isInverted() const { return Inverted; } + Instruction *getI() { return I; } const Instruction *getI() const { return I; } Type *getType() const { return I->getType(); } - /// Return the condition for the SelectLike instruction. For example the - /// condition of a select or c in `or(zext(c), x)` - Value *getCondition() const { + Value *getNonInvertedCondition() const { if (auto *Sel = dyn_cast<SelectInst>(I)) return Sel->getCondition(); // Or(zext) case @@ -177,11 +189,24 @@ public: llvm_unreachable("Unhandled case in getCondition"); } + /// Return the condition for the SelectLike instruction. For example the + /// condition of a select or c in `or(zext(c), x)` + Value *getCondition() const { + Value *CC = getNonInvertedCondition(); + // For inverted conditions the CC is checked when created to be a not + // (xor) instruction. + if (Inverted) + return cast<Instruction>(CC)->getOperand(0); + return CC; + } + /// Return the true value for the SelectLike instruction. Note this may not /// exist for all SelectLike instructions. For example, for `or(zext(c), x)` /// the true value would be `or(x,1)`. As this value does not exist, nullptr /// is returned. - Value *getTrueValue() const { + Value *getTrueValue(bool HonorInverts = true) const { + if (Inverted && HonorInverts) + return getFalseValue(/*HonorInverts=*/false); if (auto *Sel = dyn_cast<SelectInst>(I)) return Sel->getTrueValue(); // Or(zext) case - The true value is Or(X), so return nullptr as the value @@ -195,7 +220,9 @@ public: /// Return the false value for the SelectLike instruction. For example the /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is /// `select(c, x|1, x)`) - Value *getFalseValue() const { + Value *getFalseValue(bool HonorInverts = true) const { + if (Inverted && HonorInverts) + return getTrueValue(/*HonorInverts=*/false); if (auto *Sel = dyn_cast<SelectInst>(I)) return Sel->getFalseValue(); // Or(zext) case - return the operand which is not the zext. @@ -216,8 +243,8 @@ public: /// InstCostMap. This may need to be generated for select-like instructions. Scaled64 getTrueOpCost(DenseMap<const Instruction *, CostInfo> &InstCostMap, const TargetTransformInfo *TTI) { - if (auto *Sel = dyn_cast<SelectInst>(I)) - if (auto *I = dyn_cast<Instruction>(Sel->getTrueValue())) + if (isa<SelectInst>(I)) + if (auto *I = dyn_cast<Instruction>(getTrueValue())) return InstCostMap.contains(I) ? InstCostMap[I].NonPredCost : Scaled64::getZero(); @@ -242,8 +269,8 @@ public: Scaled64 getFalseOpCost(DenseMap<const Instruction *, CostInfo> &InstCostMap, const TargetTransformInfo *TTI) { - if (auto *Sel = dyn_cast<SelectInst>(I)) - if (auto *I = dyn_cast<Instruction>(Sel->getFalseValue())) + if (isa<SelectInst>(I)) + if (auto *I = dyn_cast<Instruction>(getFalseValue())) return InstCostMap.contains(I) ? InstCostMap[I].NonPredCost : Scaled64::getZero(); @@ -510,9 +537,10 @@ getTrueOrFalseValue(SelectOptimizeImpl::SelectLike SI, bool isTrue, for (SelectInst *DefSI = dyn_cast<SelectInst>(SI.getI()); DefSI != nullptr && Selects.count(DefSI); DefSI = dyn_cast<SelectInst>(V)) { - assert(DefSI->getCondition() == SI.getCondition() && - "The condition of DefSI does not match with SI"); - V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); + if (DefSI->getCondition() == SI.getCondition()) + V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); + else // Handle inverted SI + V = (!isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); } if (isa<BinaryOperator>(SI.getI())) { @@ -621,31 +649,39 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { SelectLike LastSI = ASI.back(); BasicBlock *StartBlock = SI.getI()->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI())); + // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With + // RemoveDIs turned on, SplitPt would instead point to the next + // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs, + // tell splitBasicBlock that we want to include any DbgVariableRecords + // attached to SplitPt in the splice. + SplitPt.setHeadBit(true); BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock)); // Delete the unconditional branch that was just created by the split. StartBlock->getTerminator()->eraseFromParent(); - // Move any debug/pseudo instructions that were in-between the select - // group to the newly-created end block. - SmallVector<Instruction *, 2> DebugPseudoINS; + // Move any debug/pseudo instructions and not's that were in-between the + // select group to the newly-created end block. + SmallVector<Instruction *, 2> SinkInstrs; auto DIt = SI.getI()->getIterator(); while (&*DIt != LastSI.getI()) { if (DIt->isDebugOrPseudoInst()) - DebugPseudoINS.push_back(&*DIt); + SinkInstrs.push_back(&*DIt); + if (match(&*DIt, m_Not(m_Specific(SI.getCondition())))) + SinkInstrs.push_back(&*DIt); DIt++; } - for (auto *DI : DebugPseudoINS) { + for (auto *DI : SinkInstrs) DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt()); - } - // Duplicate implementation for DPValues, the non-instruction debug-info - // record. Helper lambda for moving DPValues to the end block. - auto TransferDPValues = [&](Instruction &I) { - for (auto &DPValue : llvm::make_early_inc_range(I.getDbgValueRange())) { - DPValue.removeFromParent(); - EndBlock->insertDPValueBefore(&DPValue, - EndBlock->getFirstInsertionPt()); + // Duplicate implementation for DbgRecords, the non-instruction debug-info + // format. Helper lambda for moving DbgRecords to the end block. + auto TransferDbgRecords = [&](Instruction &I) { + for (auto &DbgRecord : + llvm::make_early_inc_range(I.getDbgRecordRange())) { + DbgRecord.removeFromParent(); + EndBlock->insertDbgRecordBefore(&DbgRecord, + EndBlock->getFirstInsertionPt()); } }; @@ -654,7 +690,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // middle" of the select group. auto R = make_range(std::next(SI.getI()->getIterator()), std::next(LastSI.getI()->getIterator())); - llvm::for_each(R, TransferDPValues); + llvm::for_each(R, TransferDbgRecords); // These are the new basic blocks for the conditional branch. // At least one will become an actual new basic block. @@ -758,6 +794,13 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, ++BBIt; continue; } + + // Skip not(select(..)), if the not is part of the same select group + if (match(NI, m_Not(m_Specific(SI.getCondition())))) { + ++BBIt; + continue; + } + // We only allow selects in the same group, not other select-like // instructions. if (!isa<SelectInst>(NI)) @@ -766,6 +809,10 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, SelectLike NSI = SelectLike::match(NI); if (NSI && SI.getCondition() == NSI.getCondition()) { SIGroup.push_back(NSI); + } else if (NSI && match(NSI.getCondition(), + m_Not(m_Specific(SI.getCondition())))) { + NSI.setInverted(); + SIGroup.push_back(NSI); } else break; ++BBIt; @@ -776,6 +823,12 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, if (!isSelectKindSupported(SI)) continue; + LLVM_DEBUG({ + dbgs() << "New Select group with\n"; + for (auto SI : SIGroup) + dbgs() << " " << *SI.getI() << "\n"; + }); + SIGroups.push_back(SIGroup); } } @@ -847,7 +900,7 @@ void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops( bool SelectOptimizeImpl::isConvertToBranchProfitableBase( const SelectGroup &ASI) { SelectLike SI = ASI.front(); - LLVM_DEBUG(dbgs() << "Analyzing select group containing " << SI.getI() + LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI() << "\n"); OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI()); OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI()); @@ -1018,8 +1071,8 @@ void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I, Slice.push(II); // Explore all the operands of the current instruction to expand the slice. - for (unsigned k = 0; k < II->getNumOperands(); ++k) - if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k))) + for (Value *Op : II->operand_values()) + if (auto *OpI = dyn_cast<Instruction>(Op)) Worklist.push(OpI); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5038f8a1fc15..aa9032ea2574 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -37,8 +37,8 @@ #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -47,6 +47,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" @@ -76,7 +77,10 @@ #include <utility> #include <variant> +#include "MatchContext.h" + using namespace llvm; +using namespace llvm::SDPatternMatch; #define DEBUG_TYPE "dagcombine" @@ -166,27 +170,16 @@ namespace { /// back and when processing we pop off of the back. /// /// The worklist will not contain duplicates but may contain null entries - /// due to nodes being deleted from the underlying DAG. + /// due to nodes being deleted from the underlying DAG. For fast lookup and + /// deduplication, the index of the node in this vector is stored in the + /// node in SDNode::CombinerWorklistIndex. SmallVector<SDNode *, 64> Worklist; - /// Mapping from an SDNode to its position on the worklist. - /// - /// This is used to find and remove nodes from the worklist (by nulling - /// them) when they are deleted from the underlying DAG. It relies on - /// stable indices of nodes within the worklist. - DenseMap<SDNode *, unsigned> WorklistMap; - /// This records all nodes attempted to be added to the worklist since we /// considered a new worklist entry. As we keep do not add duplicate nodes /// in the worklist, this is different from the tail of the worklist. SmallSetVector<SDNode *, 32> PruningList; - /// Set of nodes which have been combined (at least once). - /// - /// This is used to allow us to reliably add any operands of a DAG node - /// which have not yet been combined to the worklist. - SmallPtrSet<SDNode *, 32> CombinedNodes; - /// Map from candidate StoreNode to the pair of RootNode and count. /// The count is used to track how many times we have seen the StoreNode /// with the same RootNode bail out in dependence check. If we have seen @@ -234,10 +227,10 @@ namespace { } if (N) { - bool GoodWorklistEntry = WorklistMap.erase(N); - (void)GoodWorklistEntry; - assert(GoodWorklistEntry && + assert(N->getCombinerWorklistIndex() >= 0 && "Found a worklist entry without a corresponding map entry!"); + // Set to -2 to indicate that we combined the node. + N->setCombinerWorklistIndex(-2); } return N; } @@ -269,7 +262,8 @@ namespace { /// Add to the worklist making sure its instance is at the back (next to be /// processed.) - void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) { + void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true, + bool SkipIfCombinedBefore = false) { assert(N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist"); @@ -278,26 +272,33 @@ namespace { if (N->getOpcode() == ISD::HANDLENODE) return; + if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2) + return; + if (IsCandidateForPruning) ConsiderForPruning(N); - if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) + if (N->getCombinerWorklistIndex() < 0) { + N->setCombinerWorklistIndex(Worklist.size()); Worklist.push_back(N); + } } /// Remove all instances of N from the worklist. void removeFromWorklist(SDNode *N) { - CombinedNodes.erase(N); PruningList.remove(N); StoreRootCountMap.erase(N); - auto It = WorklistMap.find(N); - if (It == WorklistMap.end()) + int WorklistIndex = N->getCombinerWorklistIndex(); + // If not in the worklist, the index might be -1 or -2 (was combined + // before). As the node gets deleted anyway, there's no need to update + // the index. + if (WorklistIndex < 0) return; // Not in the worklist. // Null out the entry rather than erasing it to avoid a linear operation. - Worklist[It->second] = nullptr; - WorklistMap.erase(It); + Worklist[WorklistIndex] = nullptr; + N->setCombinerWorklistIndex(-1); } void deleteAndRecombine(SDNode *N); @@ -334,16 +335,11 @@ namespace { } bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) { - TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); - KnownBits Known; - if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false)) - return false; - - // Revisit the node. - AddToWorklist(Op.getNode()); - - CommitTargetLoweringOpt(TLO); - return true; + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorNumElements()) + : APInt(1, 1); + return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false); } /// Check the specified vector node value to see if it can be simplified or @@ -439,7 +435,7 @@ namespace { SDValue visitSUBE(SDNode *N); SDValue visitUSUBO_CARRY(SDNode *N); SDValue visitSSUBO_CARRY(SDNode *N); - SDValue visitMUL(SDNode *N); + template <class MatchContextClass> SDValue visitMUL(SDNode *N); SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); @@ -458,7 +454,7 @@ namespace { SDValue visitAND(SDNode *N); SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitOR(SDNode *N); - SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); + SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL); SDValue visitXOR(SDNode *N); SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL); SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL); @@ -478,6 +474,7 @@ namespace { SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); SDValue visitVSELECT(SDNode *N); + SDValue visitVP_SELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSETCCCARRY(SDNode *N); @@ -530,6 +527,7 @@ namespace { bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N); SDValue visitSTORE(SDNode *N); + SDValue visitATOMIC_STORE(SDNode *N); SDValue visitLIFETIME_END(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); @@ -539,6 +537,7 @@ namespace { SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); + SDValue visitVECTOR_COMPRESS(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); SDValue visitMGATHER(SDNode *N); @@ -597,8 +596,8 @@ namespace { SDValue foldSextSetcc(SDNode *N); SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); - SDValue foldSubToUSubSat(EVT DstVT, SDNode *N); - SDValue foldABSToABD(SDNode *N); + SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL); + SDValue foldABSToABD(SDNode *N, const SDLoc &DL); SDValue unfoldMaskedMerge(SDNode *N); SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, @@ -609,6 +608,9 @@ namespace { SDValue &CC, bool MatchStrict = false) const; bool isOneUseSetCC(SDValue N) const; + SDValue foldAddToAvg(SDNode *N, const SDLoc &DL); + SDValue foldSubToAvg(SDNode *N, const SDLoc &DL); + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); @@ -842,11 +844,9 @@ namespace { SelectionDAG &getDAG() const { return DAG; } - /// Returns a type large enough to hold any valid shift amount - before type - /// legalization these can be huge. + /// Convenience wrapper around TargetLowering::getShiftAmountTy. EVT getShiftAmountTy(EVT LHSTy) { - assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); - return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes); + return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout()); } /// This method returns true if we are running before type legalization or @@ -892,138 +892,6 @@ public: void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); } }; -class EmptyMatchContext { - SelectionDAG &DAG; - const TargetLowering &TLI; - -public: - EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root) - : DAG(DAG), TLI(TLI) {} - - bool match(SDValue OpN, unsigned Opcode) const { - return Opcode == OpN->getOpcode(); - } - - // Same as SelectionDAG::getNode(). - template <typename... ArgT> SDValue getNode(ArgT &&...Args) { - return DAG.getNode(std::forward<ArgT>(Args)...); - } - - bool isOperationLegalOrCustom(unsigned Op, EVT VT, - bool LegalOnly = false) const { - return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly); - } -}; - -class VPMatchContext { - SelectionDAG &DAG; - const TargetLowering &TLI; - SDValue RootMaskOp; - SDValue RootVectorLenOp; - -public: - VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root) - : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() { - assert(Root->isVPOpcode()); - if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode())) - RootMaskOp = Root->getOperand(*RootMaskPos); - - if (auto RootVLenPos = - ISD::getVPExplicitVectorLengthIdx(Root->getOpcode())) - RootVectorLenOp = Root->getOperand(*RootVLenPos); - } - - /// whether \p OpVal is a node that is functionally compatible with the - /// NodeType \p Opc - bool match(SDValue OpVal, unsigned Opc) const { - if (!OpVal->isVPOpcode()) - return OpVal->getOpcode() == Opc; - - auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(), - !OpVal->getFlags().hasNoFPExcept()); - if (BaseOpc != Opc) - return false; - - // Make sure the mask of OpVal is true mask or is same as Root's. - unsigned VPOpcode = OpVal->getOpcode(); - if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) { - SDValue MaskOp = OpVal.getOperand(*MaskPos); - if (RootMaskOp != MaskOp && - !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode())) - return false; - } - - // Make sure the EVL of OpVal is same as Root's. - if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode)) - if (RootVectorLenOp != OpVal.getOperand(*VLenPos)) - return false; - return true; - } - - // Specialize based on number of operands. - // TODO emit VP intrinsics where MaskOp/VectorLenOp != null - // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return - // DAG.getNode(Opcode, DL, VT); } - SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) { - unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); - assert(ISD::getVPMaskIdx(VPOpcode) == 1 && - ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2); - return DAG.getNode(VPOpcode, DL, VT, - {Operand, RootMaskOp, RootVectorLenOp}); - } - - SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, - SDValue N2) { - unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); - assert(ISD::getVPMaskIdx(VPOpcode) == 2 && - ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3); - return DAG.getNode(VPOpcode, DL, VT, - {N1, N2, RootMaskOp, RootVectorLenOp}); - } - - SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, - SDValue N2, SDValue N3) { - unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); - assert(ISD::getVPMaskIdx(VPOpcode) == 3 && - ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4); - return DAG.getNode(VPOpcode, DL, VT, - {N1, N2, N3, RootMaskOp, RootVectorLenOp}); - } - - SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand, - SDNodeFlags Flags) { - unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); - assert(ISD::getVPMaskIdx(VPOpcode) == 1 && - ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2); - return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp}, - Flags); - } - - SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, - SDValue N2, SDNodeFlags Flags) { - unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); - assert(ISD::getVPMaskIdx(VPOpcode) == 2 && - ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3); - return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp}, - Flags); - } - - SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, - SDValue N2, SDValue N3, SDNodeFlags Flags) { - unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); - assert(ISD::getVPMaskIdx(VPOpcode) == 3 && - ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4); - return DAG.getNode(VPOpcode, DL, VT, - {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags); - } - - bool isOperationLegalOrCustom(unsigned Op, EVT VT, - bool LegalOnly = false) const { - unsigned VPOp = ISD::getVPForBaseOpcode(Op); - return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly); - } -}; - } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -1211,7 +1079,44 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, // (load/store (add, (add, x, y), offset2)) -> // (load/store (add, (add, x, offset2), y)). - if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) + if (N0.getOpcode() != ISD::ADD) + return false; + + // Check for vscale addressing modes. + // (load/store (add/sub (add x, y), vscale)) + // (load/store (add/sub (add x, y), (lsl vscale, C))) + // (load/store (add/sub (add x, y), (mul vscale, C))) + if ((N1.getOpcode() == ISD::VSCALE || + ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) && + N1.getOperand(0).getOpcode() == ISD::VSCALE && + isa<ConstantSDNode>(N1.getOperand(1)))) && + N1.getValueType().getFixedSizeInBits() <= 64) { + int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE + ? N1.getConstantOperandVal(0) + : (N1.getOperand(0).getConstantOperandVal(0) * + (N1.getOpcode() == ISD::SHL + ? (1LL << N1.getConstantOperandVal(1)) + : N1.getConstantOperandVal(1))); + if (Opc == ISD::SUB) + ScalableOffset = -ScalableOffset; + if (all_of(N->uses(), [&](SDNode *Node) { + if (auto *LoadStore = dyn_cast<MemSDNode>(Node); + LoadStore && LoadStore->getBasePtr().getNode() == N) { + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.ScalableOffset = ScalableOffset; + EVT VT = LoadStore->getMemoryVT(); + unsigned AS = LoadStore->getAddressSpace(); + Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); + return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, + AS); + } + return false; + })) + return true; + } + + if (Opc != ISD::ADD) return false; auto *C2 = dyn_cast<ConstantSDNode>(N1); @@ -1279,8 +1184,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, return false; } -// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression -// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc. +/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if +/// \p N0 is the same kind of operation as \p Opc. SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { @@ -1293,19 +1198,20 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N01 = N0.getOperand(1); if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { + SDNodeFlags NewFlags; + if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() && + Flags.hasNoUnsignedWrap()) + NewFlags.setNoUnsignedWrap(true); + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) - return DAG.getNode(Opc, DL, VT, N00, OpNode); + return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags); return SDValue(); } if (TLI.isReassocProfitable(DAG, N0, N1)) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use - SDNodeFlags NewFlags; - if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() && - Flags.hasNoUnsignedWrap()) - NewFlags.setNoUnsignedWrap(true); SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags); return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags); } @@ -1378,7 +1284,8 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, return SDValue(); } -// Try to reassociate commutative binops. +/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the +/// same kind of operation as \p Opc. SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative."); @@ -1861,13 +1768,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); // Add any operands of the new node which have not yet been combined to the - // worklist as well. Because the worklist uniques things already, this - // won't repeatedly process the same operand. + // worklist as well. getNextWorklistEntry flags nodes that have been + // combined before. Because the worklist uniques things already, this won't + // repeatedly process the same operand. for (const SDValue &ChildN : N->op_values()) - if (!CombinedNodes.count(ChildN.getNode())) - AddToWorklist(ChildN.getNode()); + AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true, + /*SkipIfCombinedBefore=*/true); - CombinedNodes.insert(N); SDValue RV = combine(N); if (!RV.getNode()) @@ -1944,7 +1851,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SMULFIXSAT: case ISD::UMULFIX: case ISD::UMULFIXSAT: return visitMULFIX(N); - case ISD::MUL: return visitMUL(N); + case ISD::MUL: return visitMUL<EmptyMatchContext>(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); case ISD::SREM: @@ -2036,6 +1943,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); case ISD::STORE: return visitSTORE(N); + case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N); case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); @@ -2048,6 +1956,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MLOAD: return visitMLOAD(N); case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); + case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N); case ISD::LIFETIME_END: return visitLIFETIME_END(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); @@ -2393,24 +2302,12 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, return true; } - if (N.getOpcode() != ISD::SETCC || - N.getValueType().getScalarType() != MVT::i1 || - cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE) - return false; - - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - assert(Op0.getValueType() == Op1.getValueType()); - - if (isNullOrNullSplat(Op0)) - Op = Op1; - else if (isNullOrNullSplat(Op1)) - Op = Op0; - else + if (N.getValueType().getScalarType() != MVT::i1 || + !sd_match( + N, m_c_SetCC(m_Value(Op), m_Zero(), m_SpecificCondCode(ISD::SETNE)))) return false; Known = DAG.computeKnownBits(Op); - return (Known.Zero | 1).isAllOnes(); } @@ -2621,7 +2518,8 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { return SelectOp; } -static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { +static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Expecting add or sub"); @@ -2636,16 +2534,12 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { return SDValue(); // Match the zext operand as a setcc of a boolean. - if (Z.getOperand(0).getOpcode() != ISD::SETCC || - Z.getOperand(0).getValueType() != MVT::i1) + if (Z.getOperand(0).getValueType() != MVT::i1) return SDValue(); // Match the compare as: setcc (X & 1), 0, eq. - SDValue SetCC = Z.getOperand(0); - ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); - if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || - SetCC.getOperand(0).getOpcode() != ISD::AND || - !isOneConstant(SetCC.getOperand(0).getOperand(1))) + if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(), + m_SpecificCondCode(ISD::SETEQ)))) return SDValue(); // We are adding/subtracting a constant and an inverted low bit. Turn that @@ -2653,16 +2547,37 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) EVT VT = C.getValueType(); - SDLoc DL(N); - SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); - SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : - DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); + SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT); + SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) + : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); } +// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1) +SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) { + SDValue N0 = N->getOperand(0); + EVT VT = N0.getValueType(); + SDValue A, B; + + if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) && + sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)), + m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), + m_SpecificInt(1))))) { + return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B); + } + if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) && + sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)), + m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), + m_SpecificInt(1))))) { + return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B); + } + return SDValue(); +} + /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into /// a shift and add with a different constant. -static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { +static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Expecting add or sub"); @@ -2690,7 +2605,6 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { // Eliminate the 'not' by adjusting the shift and add/sub constant: // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) - SDLoc DL(N); if (SDValue NewC = DAG.FoldConstantArithmetic( IsAdd ? ISD::ADD : ISD::SUB, DL, VT, {ConstantOp, DAG.getConstant(1, DL, VT)})) { @@ -2733,8 +2647,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, N1, N0); if (areBitwiseNotOfEachother(N0, N1)) - return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), - SDLoc(N), VT); + return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT); // fold vector ops if (VT.isVector()) { @@ -2830,66 +2743,53 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1)) return SD; } + + SDValue A, B, C, D; + // fold ((0-A) + B) -> B-A - if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + if (sd_match(N0, m_Neg(m_Value(A)))) + return DAG.getNode(ISD::SUB, DL, VT, N1, A); // fold (A + (0-B)) -> A-B - if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) - return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); + if (sd_match(N1, m_Neg(m_Value(B)))) + return DAG.getNode(ISD::SUB, DL, VT, N0, B); // fold (A+(B-A)) -> B - if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) - return N1.getOperand(0); + if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0)))) + return B; // fold ((B-A)+A) -> B - if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) - return N0.getOperand(0); + if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1)))) + return B; // fold ((A-B)+(C-A)) -> (C-B) - if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && - N0.getOperand(0) == N1.getOperand(1)) - return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), - N0.getOperand(1)); + if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) && + sd_match(N1, m_Sub(m_Value(C), m_Specific(A)))) + return DAG.getNode(ISD::SUB, DL, VT, C, B); // fold ((A-B)+(B-C)) -> (A-C) - if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && - N0.getOperand(1) == N1.getOperand(0)) - return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), - N1.getOperand(1)); + if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) && + sd_match(N1, m_Sub(m_Specific(B), m_Value(C)))) + return DAG.getNode(ISD::SUB, DL, VT, A, C); // fold (A+(B-(A+C))) to (B-C) - if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && - N0 == N1.getOperand(1).getOperand(0)) - return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), - N1.getOperand(1).getOperand(1)); - // fold (A+(B-(C+A))) to (B-C) - if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && - N0 == N1.getOperand(1).getOperand(1)) - return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), - N1.getOperand(1).getOperand(0)); + if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C))))) + return DAG.getNode(ISD::SUB, DL, VT, B, C); // fold (A+((B-A)+or-C)) to (B+or-C) - if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && - N1.getOperand(0).getOpcode() == ISD::SUB && - N0 == N1.getOperand(0).getOperand(1)) - return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), - N1.getOperand(1)); + if (sd_match(N1, + m_AnyOf(m_Add(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)), + m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C))))) + return DAG.getNode(N1.getOpcode(), DL, VT, B, C); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant - if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && - N0->hasOneUse() && N1->hasOneUse()) { - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - - if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), - DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); - } + if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) && + sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) && + (isConstantOrConstantVector(A) || isConstantOrConstantVector(C))) + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C), + DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D)); // fold (add (umax X, C), -C) --> (usubsat X, C) if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) { @@ -2937,17 +2837,76 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // Limit this to after legalization if the add has wrap flags (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() && !N->getFlags().hasNoSignedWrap()))) { - SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), - DAG.getAllOnesConstant(DL, VT)); + SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not); } } // (x - y) + -1 -> add (xor y, -1), x if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && - isAllOnesOrAllOnesSplat(N1)) { - SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1); - return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); + isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) { + SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT); + return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0)); + } + + // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB). + // This can help if the inner add has multiple uses. + APInt CM, CA; + if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) { + if (VT.getScalarSizeInBits() <= 64) { + if (sd_match(N0, m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)), + m_ConstInt(CM)))) && + TLI.isLegalAddImmediate( + (CA * CM + CB->getAPIntValue()).getSExtValue())) { + SDNodeFlags Flags; + // If all the inputs are nuw, the outputs can be nuw. If all the input + // are _also_ nsw the outputs can be too. + if (N->getFlags().hasNoUnsignedWrap() && + N0->getFlags().hasNoUnsignedWrap() && + N0.getOperand(0)->getFlags().hasNoUnsignedWrap()) { + Flags.setNoUnsignedWrap(true); + if (N->getFlags().hasNoSignedWrap() && + N0->getFlags().hasNoSignedWrap() && + N0.getOperand(0)->getFlags().hasNoSignedWrap()) + Flags.setNoSignedWrap(true); + } + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A, + DAG.getConstant(CM, DL, VT), Flags); + return DAG.getNode( + ISD::ADD, DL, VT, Mul, + DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags); + } + // Also look in case there is an intermediate add. + if (sd_match(N0, m_OneUse(m_Add( + m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)), + m_ConstInt(CM))), + m_Value(B)))) && + TLI.isLegalAddImmediate( + (CA * CM + CB->getAPIntValue()).getSExtValue())) { + SDNodeFlags Flags; + // If all the inputs are nuw, the outputs can be nuw. If all the input + // are _also_ nsw the outputs can be too. + SDValue OMul = + N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0); + if (N->getFlags().hasNoUnsignedWrap() && + N0->getFlags().hasNoUnsignedWrap() && + OMul->getFlags().hasNoUnsignedWrap() && + OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) { + Flags.setNoUnsignedWrap(true); + if (N->getFlags().hasNoSignedWrap() && + N0->getFlags().hasNoSignedWrap() && + OMul->getFlags().hasNoSignedWrap() && + OMul.getOperand(0)->getFlags().hasNoSignedWrap()) + Flags.setNoSignedWrap(true); + } + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A, + DAG.getConstant(CM, DL, VT), Flags); + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags); + return DAG.getNode( + ISD::ADD, DL, VT, Add, + DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags); + } + } } if (SDValue Combined = visitADDLikeCommutative(N0, N1, N)) @@ -2959,6 +2918,28 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { return SDValue(); } +// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1) +SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) { + SDValue N0 = N->getOperand(0); + EVT VT = N0.getValueType(); + SDValue A, B; + + if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) && + sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)), + m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), + m_SpecificInt(1))))) { + return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B); + } + if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) && + sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)), + m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), + m_SpecificInt(1))))) { + return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B); + } + + return SDValue(); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2968,16 +2949,23 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (SDValue Combined = visitADDLike(N)) return Combined; - if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG)) + return V; + + if (SDValue V = foldAddSubOfSignBit(N, DL, DAG)) return V; - if (SDValue V = foldAddSubOfSignBit(N, DAG)) + // Try to match AVGFLOOR fixedwidth pattern + if (SDValue V = foldAddToAvg(N, DL)) return V; // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - DAG.haveNoCommonBitsSet(N0, N1)) - return DAG.getNode(ISD::OR, DL, VT, N0, N1); + DAG.haveNoCommonBitsSet(N0, N1)) { + SDNodeFlags Flags; + Flags.setDisjoint(true); + return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags); + } // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) { @@ -3139,17 +3127,15 @@ static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, /// Helper for doing combines based on N0 and N1 being added to each other. SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, - SDNode *LocReference) { + SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) - if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && - isNullOrNullSplat(N1.getOperand(0).getOperand(0))) + SDValue Y, N; + if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N)))) return DAG.getNode(ISD::SUB, DL, VT, N0, - DAG.getNode(ISD::SHL, DL, VT, - N1.getOperand(0).getOperand(1), - N1.getOperand(1))); + DAG.getNode(ISD::SHL, DL, VT, Y, N)); if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL)) return V; @@ -3163,8 +3149,7 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, // Limit this to after legalization if the add has wrap flags (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() && !N0->getFlags().hasNoSignedWrap()))) { - SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), - DAG.getAllOnesConstant(DL, VT)); + SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); return DAG.getNode(ISD::SUB, DL, VT, N1, Not); } @@ -3447,7 +3432,7 @@ SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) { } /** - * If we are facing some sort of diamond carry propapagtion pattern try to + * If we are facing some sort of diamond carry propagation pattern try to * break it up to generate something like: * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry) * @@ -3488,7 +3473,7 @@ static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, Z = Carry0.getOperand(2); } else if (Carry0.getOpcode() == ISD::UADDO && isOneConstant(Carry0.getOperand(1))) { - EVT VT = Combiner.getSetCCResultType(Carry0.getValueType()); + EVT VT = Carry0->getValueType(1); Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT); } else { // We couldn't find a suitable Z. @@ -3608,6 +3593,8 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, return SDValue(); SDLoc DL(N); + CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1), + Carry1->getValueType(0)); SDValue Merged = DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0), Carry0.getOperand(1), CarryIn); @@ -3741,7 +3728,7 @@ static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to // usubsat(a,b), optionally as a truncated type. -SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) { +SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) { if (N->getOpcode() != ISD::SUB || !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT))) return SDValue(); @@ -3756,18 +3743,18 @@ SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) { SDValue MaxLHS = Op0.getOperand(0); SDValue MaxRHS = Op0.getOperand(1); if (MaxLHS == Op1) - return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N)); + return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL); if (MaxRHS == Op1) - return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N)); + return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL); } if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) { SDValue MinLHS = Op1.getOperand(0); SDValue MinRHS = Op1.getOperand(1); if (MinLHS == Op0) - return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N)); + return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL); if (MinRHS == Op0) - return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N)); + return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL); } // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit))) @@ -3778,10 +3765,10 @@ SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) { SDValue MinRHS = Op1.getOperand(0).getOperand(1); if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0) return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS, - DAG, SDLoc(N)); + DAG, DL); if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0) return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS, - DAG, SDLoc(N)); + DAG, DL); } return SDValue(); @@ -3802,6 +3789,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + unsigned BitWidth = VT.getScalarSizeInBits(); SDLoc DL(N); auto PeekThroughFreeze = [](SDValue N) { @@ -3832,16 +3820,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - // fold (sub x, c) -> (add x, -c) - if (N1C) { + if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); - } if (isNullOrNullSplat(N0)) { - unsigned BitWidth = VT.getScalarSizeInBits(); // Right-shifting everything out but the sign bit followed by negation is // the same as flipping arithmetic/logical shift type without the negation: // -(X >>u 31) -> (X >>s 31) @@ -3932,63 +3916,34 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); } - // fold ((A+(B+or-C))-B) -> A+or-C - if (N0.getOpcode() == ISD::ADD && - (N0.getOperand(1).getOpcode() == ISD::SUB || - N0.getOperand(1).getOpcode() == ISD::ADD) && - N0.getOperand(1).getOperand(0) == N1) - return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), - N0.getOperand(1).getOperand(1)); - - // fold ((A+(C+B))-B) -> A+C - if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && - N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), - N0.getOperand(1).getOperand(0)); + SDValue A, B, C; + + // fold ((A+(B+C))-B) -> A+C + if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C))))) + return DAG.getNode(ISD::ADD, DL, VT, A, C); + + // fold ((A+(B-C))-B) -> A-C + if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C))))) + return DAG.getNode(ISD::SUB, DL, VT, A, C); // fold ((A-(B-C))-C) -> A-B - if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && - N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), - N0.getOperand(1).getOperand(0)); + if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1))))) + return DAG.getNode(ISD::SUB, DL, VT, A, B); // fold (A-(B-C)) -> A+(C-B) - if (N1.getOpcode() == ISD::SUB && N1.hasOneUse()) + if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C))))) return DAG.getNode(ISD::ADD, DL, VT, N0, - DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), - N1.getOperand(0))); + DAG.getNode(ISD::SUB, DL, VT, C, B)); // A - (A & B) -> A & (~B) - if (N1.getOpcode() == ISD::AND) { - SDValue A = N1.getOperand(0); - SDValue B = N1.getOperand(1); - if (A != N0) - std::swap(A, B); - if (A == N0 && - (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) { - SDValue InvB = - DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::AND, DL, VT, A, InvB); - } - } + if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) && + (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) + return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT)); - // fold (X - (-Y * Z)) -> (X + (Y * Z)) - if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { - if (N1.getOperand(0).getOpcode() == ISD::SUB && - isNullOrNullSplat(N1.getOperand(0).getOperand(0))) { - SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, - N1.getOperand(0).getOperand(1), - N1.getOperand(1)); - return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); - } - if (N1.getOperand(1).getOpcode() == ISD::SUB && - isNullOrNullSplat(N1.getOperand(1).getOperand(0))) { - SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, - N1.getOperand(0), - N1.getOperand(1).getOperand(1)); - return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); - } - } + // fold (A - (-B * C)) -> (A + (B * C)) + if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C))))) + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getNode(ISD::MUL, DL, VT, B, C)); // If either operand of a sub is undef, the result is undef if (N0.isUndef()) @@ -3996,24 +3951,25 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1.isUndef()) return N1; - if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG)) return V; - if (SDValue V = foldAddSubOfSignBit(N, DAG)) + if (SDValue V = foldAddSubOfSignBit(N, DL, DAG)) return V; - if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N))) + // Try to match AVGCEIL fixedwidth pattern + if (SDValue V = foldSubToAvg(N, DL)) return V; - if (SDValue V = foldSubToUSubSat(VT, N)) + if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL)) return V; - // (x - y) - 1 -> add (xor y, -1), x - if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) { - SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), - DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); - } + if (SDValue V = foldSubToUSubSat(VT, N, DL)) + return V; + + // (A - B) - 1 -> add (xor B, -1), A + if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One()))) + return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT)); // Look for: // sub y, (xor x, -1) @@ -4026,7 +3982,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // Hoist one-use addition by non-opaque constant: // (x + C) - y -> (x - y) + C - if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && + if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) && + N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); @@ -4062,17 +4019,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, N0, SExt); } - // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) - if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { - if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { - SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); - SDValue S0 = N1.getOperand(0); - if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) - if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) - if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1)) - return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); - } - } + // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A) + if ((!LegalOperations || hasOperation(ISD::ABS, VT)) && + sd_match(N1, m_Sra(m_Value(A), m_SpecificInt(BitWidth - 1))) && + sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1)))) + return DAG.getNode(ISD::ABS, DL, VT, A); // If the relocation model supports it, consider symbol offsets. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) @@ -4112,8 +4063,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { SDValue ShAmt = N1.getOperand(1); ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); - if (ShAmtC && - ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) { + if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) { SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); } @@ -4124,7 +4074,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // N0 - (X << BW-1) --> N0 + (X << BW-1) if (N1.getOpcode() == ISD::SHL) { ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1)); - if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1) + if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1)) return DAG.getNode(ISD::ADD, DL, VT, N1, N0); } @@ -4157,23 +4107,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } - // max(a,b) - min(a,b) --> abd(a,b) - auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) { - if (N0.getOpcode() != Max || N1.getOpcode() != Min) - return SDValue(); - if ((N0.getOperand(0) != N1.getOperand(0) || - N0.getOperand(1) != N1.getOperand(1)) && - (N0.getOperand(0) != N1.getOperand(1) || - N0.getOperand(1) != N1.getOperand(0))) - return SDValue(); - if (!hasOperation(Abd, VT)) - return SDValue(); - return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1)); - }; - if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS)) - return R; - if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU)) - return R; + // smax(a,b) - smin(a,b) --> abds(a,b) + if (hasOperation(ISD::ABDS, VT) && + sd_match(N0, m_SMax(m_Value(A), m_Value(B))) && + sd_match(N1, m_SMin(m_Specific(A), m_Specific(B)))) + return DAG.getNode(ISD::ABDS, DL, VT, A, B); + + // umax(a,b) - umin(a,b) --> abdu(a,b) + if (hasOperation(ISD::ABDU, VT) && + sd_match(N0, m_UMax(m_Value(A), m_Value(B))) && + sd_match(N1, m_UMin(m_Specific(A), m_Specific(B)))) + return DAG.getNode(ISD::ABDU, DL, VT, A, B); return SDValue(); } @@ -4266,13 +4210,11 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) { return CombineTo(N, DAG.getConstant(0, DL, VT), DAG.getConstant(0, DL, CarryVT)); - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - // fold (subox, c) -> (addo x, -c) - if (IsSigned && N1C && !N1C->isMinSignedValue()) { - return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, - DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); - } + if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) + if (IsSigned && !N1C->isMinSignedValue()) + return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, + DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); // fold (subo x, 0) -> x + no borrow if (isNullOrNullSplat(N1)) @@ -4357,11 +4299,14 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitMUL(SDNode *N) { +template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + unsigned BitWidth = VT.getScalarSizeInBits(); SDLoc DL(N); + bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>; + MatchContextClass Matcher(DAG, TLI, N); // fold (mul x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) @@ -4374,7 +4319,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MUL, DL, VT, N1, N0); + return Matcher.getNode(ISD::MUL, DL, VT, N1, N0); bool N1IsConst = false; bool N1IsOpaqueConst = false; @@ -4382,12 +4327,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) - return FoldedVOp; + // TODO: Change this to use SimplifyVBinOp when it supports VP op. + if (!UseVP) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); - assert((!N1IsConst || - ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && + assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) && "Splat APInt should be element width"); } else { N1IsConst = isa<ConstantSDNode>(N1); @@ -4405,12 +4351,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N1IsConst && ConstValue1.isOne()) return N0; - if (SDValue NewSel = foldBinOpIntoSelect(N)) - return NewSel; + if (!UseVP) + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnes()) - return DAG.getNegative(N0, DL, VT); + return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && @@ -4418,35 +4365,36 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (SDValue LogBase2 = BuildLogBase2(N1, DL)) { EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); - return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); + return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc); } } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); - EVT ShiftVT = getShiftAmountTy(N0.getValueType()); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(0, DL, VT), - DAG.getNode(ISD::SHL, DL, VT, N0, - DAG.getConstant(Log2Val, DL, ShiftVT))); + return Matcher.getNode( + ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + Matcher.getNode(ISD::SHL, DL, VT, N0, + DAG.getShiftAmountConstant(Log2Val, VT, DL))); } // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the // hi result is in use in case we hit this mid-legalization. - for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) { - if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) { - SDVTList LoHiVT = DAG.getVTList(VT, VT); - // TODO: Can we match commutable operands with getNodeIfExists? - if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1})) - if (LoHi->hasAnyUseOfValue(1)) - return SDValue(LoHi, 0); - if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0})) - if (LoHi->hasAnyUseOfValue(1)) - return SDValue(LoHi, 0); + if (!UseVP) { + for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) { + if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) { + SDVTList LoHiVT = DAG.getVTList(VT, VT); + // TODO: Can we match commutable operands with getNodeIfExists? + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + } } } @@ -4465,7 +4413,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // x * 0xf800 --> (x << 16) - (x << 11) // x * -0x8800 --> -((x << 15) + (x << 11)) // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16) - if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { + if (!UseVP && N1IsConst && + TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). @@ -4483,7 +4432,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2(); ShAmt += TZeros; - assert(ShAmt < VT.getScalarSizeInBits() && + assert(ShAmt < BitWidth && "multiply-by-constant generated out of bounds shift"); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT)); @@ -4499,7 +4448,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N0.getOpcode() == ISD::SHL) { + if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) { SDValue N01 = N0.getOperand(1); if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01})) return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3); @@ -4511,34 +4460,33 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue Sh, Y; // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). - if (N0.getOpcode() == ISD::SHL && - isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) { + if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) && + isConstantOrConstantVector(N0.getOperand(1))) { Sh = N0; Y = N1; - } else if (N1.getOpcode() == ISD::SHL && - isConstantOrConstantVector(N1.getOperand(1)) && - N1->hasOneUse()) { + } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) && + isConstantOrConstantVector(N1.getOperand(1))) { Sh = N1; Y = N0; } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1)); + SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y); + return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N0.getOpcode() == ISD::ADD && + if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) && DAG.isConstantIntBuildVectorOrConstantInt(N1) && DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) - return DAG.getNode( + return Matcher.getNode( ISD::ADD, DL, VT, - DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); + Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), + Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). ConstantSDNode *NC1 = isConstOrConstSplat(N1); - if (N0.getOpcode() == ISD::VSCALE && NC1) { + if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) { const APInt &C0 = N0.getConstantOperandAPInt(0); const APInt &C1 = NC1->getAPIntValue(); return DAG.getVScale(DL, VT, C0 * C1); @@ -4546,13 +4494,23 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). APInt MulVal; - if (N0.getOpcode() == ISD::STEP_VECTOR && + if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR && ISD::isConstantSplatVector(N1.getNode(), MulVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); APInt NewStep = C0 * MulVal; return DAG.getStepVector(DL, VT, NewStep); } + // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X) + SDValue X; + if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) && + sd_context_match( + N, Matcher, + m_Mul(m_Or(m_Sra(m_Value(X), m_SpecificInt(BitWidth - 1)), m_One()), + m_Deferred(X)))) { + return Matcher.getNode(ISD::ABS, DL, VT, X); + } + // Fold ((mul x, 0/undef) -> 0, // (mul x, 1) -> x) -> x) // -> and(x, mask) @@ -4584,13 +4542,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // reassociate mul - if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) - return RMUL; + // TODO: Change reassociateOps to support vp ops. + if (!UseVP) + if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) + return RMUL; // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y)) - if (SDValue SD = - reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1)) - return SD; + // TODO: Change reassociateReduction to support vp ops. + if (!UseVP) + if (SDValue SD = + reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1)) + return SD; // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) @@ -5115,9 +5077,9 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { // fold (mulhs x, 1) -> (sra x, size(x)-1) if (isOneConstant(N1)) - return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, - DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL, - getShiftAmountTy(N0.getValueType()))); + return DAG.getNode( + ISD::SRA, DL, VT, N0, + DAG.getShiftAmountConstant(N0.getScalarValueSizeInBits() - 1, VT, DL)); // fold (mulhs x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) @@ -5135,8 +5097,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, DL, - getShiftAmountTy(N1.getValueType()))); + DAG.getShiftAmountConstant(SimpleSize, NewVT, DL)); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -5175,7 +5136,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { // fold (mulhu x, 1) -> 0 if (isOneConstant(N1)) - return DAG.getConstant(0, DL, N0.getValueType()); + return DAG.getConstant(0, DL, VT); // fold (mulhu x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) @@ -5206,8 +5167,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, DL, - getShiftAmountTy(N1.getValueType()))); + DAG.getShiftAmountConstant(SimpleSize, NewVT, DL)); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -5227,6 +5187,7 @@ SDValue DAGCombiner::visitAVG(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); + bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS; // fold (avg c1, c2) if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) @@ -5237,30 +5198,60 @@ SDValue DAGCombiner::visitAVG(SDNode *N) { !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0); - if (VT.isVector()) { + if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (avgfloor x, 0) -> x >> 1 - if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) { - if (Opcode == ISD::AVGFLOORS) - return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT)); - if (Opcode == ISD::AVGFLOORU) - return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT)); - } - } - // fold (avg x, undef) -> x if (N0.isUndef()) return N1; if (N1.isUndef()) return N0; - // Fold (avg x, x) --> x + // fold (avg x, x) --> x if (N0 == N1 && Level >= AfterLegalizeTypes) return N0; - // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1 + // fold (avgfloor x, 0) -> x >> 1 + SDValue X, Y; + if (sd_match(N, m_c_BinOp(ISD::AVGFLOORS, m_Value(X), m_Zero()))) + return DAG.getNode(ISD::SRA, DL, VT, X, + DAG.getShiftAmountConstant(1, VT, DL)); + if (sd_match(N, m_c_BinOp(ISD::AVGFLOORU, m_Value(X), m_Zero()))) + return DAG.getNode(ISD::SRL, DL, VT, X, + DAG.getShiftAmountConstant(1, VT, DL)); + + // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y)) + // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y)) + if (!IsSigned && + sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && + X.getValueType() == Y.getValueType() && + hasOperation(Opcode, X.getValueType())) { + SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU); + } + if (IsSigned && + sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) && + X.getValueType() == Y.getValueType() && + hasOperation(Opcode, X.getValueType())) { + SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y); + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS); + } + + // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0 + // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0 + // Check if avgflooru isn't legal/custom but avgceilu is. + if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) && + (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) { + if (DAG.isKnownNeverZero(N1)) + return DAG.getNode( + ISD::AVGCEILU, DL, VT, N0, + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT))); + if (DAG.isKnownNeverZero(N0)) + return DAG.getNode( + ISD::AVGCEILU, DL, VT, N1, + DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT))); + } return SDValue(); } @@ -5281,24 +5272,25 @@ SDValue DAGCombiner::visitABD(SDNode *N) { !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0); - if (VT.isVector()) { + if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (abds x, 0) -> abs x - // fold (abdu x, 0) -> x - if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) { - if (Opcode == ISD::ABDS) - return DAG.getNode(ISD::ABS, DL, VT, N0); - if (Opcode == ISD::ABDU) - return N0; - } - } - // fold (abd x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); + SDValue X; + + // fold (abds x, 0) -> abs x + if (sd_match(N, m_c_BinOp(ISD::ABDS, m_Value(X), m_Zero())) && + (!LegalOperations || hasOperation(ISD::ABS, VT))) + return DAG.getNode(ISD::ABS, DL, VT, X); + + // fold (abdu x, 0) -> x + if (sd_match(N, m_c_BinOp(ISD::ABDU, m_Value(X), m_Zero()))) + return X; + // fold (abds x, y) -> (abdu x, y) iff both args are known positive if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1)) @@ -5386,8 +5378,7 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, DL, - getShiftAmountTy(Lo.getValueType()))); + DAG.getShiftAmountConstant(SimpleSize, NewVT, DL)); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -5440,8 +5431,7 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, DL, - getShiftAmountTy(Lo.getValueType()))); + DAG.getShiftAmountConstant(SimpleSize, NewVT, DL)); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -5700,10 +5690,17 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; + // reassociate minmax + if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags())) + return RMINMAX; + // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. - // Only do this if the current op isn't legal and the flipped is. - if (!TLI.isOperationLegal(Opcode, VT) && - (N0.isUndef() || DAG.SignBitIsZero(N0)) && + // Only do this if: + // 1. The current op isn't legal and the flipped is. + // 2. The saturation pattern is broken by canonicalization in InstCombine. + bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT); + bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX; + if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) && (N1.isUndef() || DAG.SignBitIsZero(N1))) { unsigned AltOpcode; switch (Opcode) { @@ -5713,7 +5710,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { case ISD::UMAX: AltOpcode = ISD::SMAX; break; default: llvm_unreachable("Unknown MINMAX opcode"); } - if (TLI.isOperationLegal(AltOpcode, VT)) + if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT)) return DAG.getNode(AltOpcode, DL, VT, N0, N1); } @@ -6377,7 +6374,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { // TODO: Rewrite this to return a new 'AND' instead of using CombineTo. if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64 && N0->hasOneUse()) { + VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) { if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal @@ -6828,35 +6825,25 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { /// For targets that support usubsat, match a bit-hack form of that operation /// that ends in 'and' and convert it. -static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - EVT VT = N1.getValueType(); - - // Canonicalize SRA as operand 1. - if (N0.getOpcode() == ISD::SRA) - std::swap(N0, N1); - - // xor/add with SMIN (signmask) are logically equivalent. - if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD) - return SDValue(); - - if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() || - N0.getOperand(0) != N1.getOperand(0)) - return SDValue(); - +static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL) { + EVT VT = N->getValueType(0); unsigned BitWidth = VT.getScalarSizeInBits(); - ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true); - ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true); - if (!XorC || !XorC->getAPIntValue().isSignMask() || - !SraC || SraC->getAPIntValue() != BitWidth - 1) - return SDValue(); + APInt SignMask = APInt::getSignMask(BitWidth); // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128 - SDLoc DL(N); - SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT); - return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask); + // xor/add with SMIN (signmask) are logically equivalent. + SDValue X; + if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))), + m_OneUse(m_Sra(m_Deferred(X), + m_SpecificInt(BitWidth - 1))))) && + !sd_match(N, m_And(m_OneUse(m_Add(m_Value(X), m_SpecificInt(SignMask))), + m_OneUse(m_Sra(m_Deferred(X), + m_SpecificInt(BitWidth - 1)))))) + return SDValue(); + + return DAG.getNode(ISD::USUBSAT, DL, VT, X, + DAG.getConstant(SignMask, DL, VT)); } /// Given a bitwise logic operation N with a matching bitwise logic operand, @@ -6946,34 +6933,34 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); + SDLoc DL(N); // x & x --> x if (N0 == N1) return N0; // fold (and c1, c2) -> c1&c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::AND, DL, VT, N1, N0); if (areBitwiseNotOfEachother(N0, N1)) - return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), SDLoc(N), - VT); + return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT); // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 - return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), - SDLoc(N), N1.getValueType()); + return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), DL, + N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) @@ -6993,8 +6980,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { uint64_t ElementSize = LoadVT.getVectorElementType().getScalarSizeInBits(); if (Splat->getAPIntValue().isMask(ElementSize)) { - auto NewLoad = DAG.getMaskedLoad( - ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), + SDValue NewLoad = DAG.getMaskedLoad( + ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(), MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), ISD::ZEXTLOAD, MLoad->isExpandingLoad()); @@ -7016,7 +7003,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned BitWidth = VT.getScalarSizeInBits(); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth))) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); if (SDValue R = foldAndOrOfSETCC(N, DAG)) return R; @@ -7025,12 +7012,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return NewSel; // reassociate and - if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags())) return RAND; // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y)) - if (SDValue SD = reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, SDLoc(N), - VT, N0, N1)) + if (SDValue SD = + reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1)) return SD; // fold (and (or x, C), D) -> D if (C & D) == D @@ -7050,18 +7037,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (DAG.MaskedValueIsZero(N0Op0, Mask)) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0Op0); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0); // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable. if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) && TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) && TLI.isTypeDesirableForOp(ISD::AND, SrcVT) && - TLI.isNarrowingProfitable(VT, SrcVT)) { - SDLoc DL(N); + TLI.isNarrowingProfitable(VT, SrcVT)) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, DAG.getNode(ISD::AND, DL, SrcVT, N0Op0, DAG.getZExtOrTrunc(N1, DL, SrcVT))); - } } // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2))) @@ -7073,7 +7058,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { DAG.isConstantIntBuildVectorOrConstantInt(N1) && DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) && N0->hasOneUse() && N0Op0->hasOneUse()) { - SDLoc DL(N); SDValue NewMask = DAG.getNode(ISD::AND, DL, VT, N1, DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1))); @@ -7094,8 +7078,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::LOAD && N0.getOperand(0).getResNo() == 0) || (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { - LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? - N0 : N0.getOperand(0) ); + auto *Load = + cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0)); // Get the constant (if applicable) the zero'th operand is being ANDed with. // This can be a pure constant or a vector splat, in which case we treat the @@ -7205,9 +7189,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // (and (extract_subvector (zext|anyext|sext v) _) iN_mask) // => (extract_subvector (iN_zeroext v)) SDValue ZeroExtExtendee = - DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee); + DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee, + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee, N0.getOperand(1)); } } @@ -7224,8 +7208,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; SDValue ZExtLoad = DAG.getMaskedGather( - DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops, - GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD); + DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(), + GN0->getIndexType(), ISD::ZEXTLOAD); CombineTo(N, ZExtLoad); AddToWorklist(ZExtLoad.getNode()); @@ -7277,7 +7261,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SubRHS; if (SubRHS.getOpcode() == ISD::SIGN_EXTEND && SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0)); } } @@ -7291,7 +7275,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ISD::isUNINDEXEDLoad(N0.getNode()) && (ISD::isEXTLoad(N0.getNode()) || (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); + auto *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. @@ -7346,10 +7330,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...). if (IsAndZeroExtMask(N0, N1)) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); if (hasOperation(ISD::USUBSAT, VT)) - if (SDValue V = foldAndToUsubsat(N, DAG)) + if (SDValue V = foldAndToUsubsat(N, DAG, DL)) return V; // Postpone until legalization completed to avoid interference with bswap @@ -7472,8 +7456,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (OpSizeInBits > 16) { SDLoc DL(N); Res = DAG.getNode(ISD::SRL, DL, VT, Res, - DAG.getConstant(OpSizeInBits - 16, DL, - getShiftAmountTy(VT))); + DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL)); } return Res; } @@ -7591,7 +7574,7 @@ static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) { // (rotr (bswap A), 16) static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, - SDValue N1, EVT VT, EVT ShiftAmountTy) { + SDValue N1, EVT VT) { assert(N->getOpcode() == ISD::OR && VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32"); if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) @@ -7623,7 +7606,7 @@ static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SDLoc DL(N); SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0)); - SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy); + SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL); return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); } @@ -7643,13 +7626,11 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); - if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT, - getShiftAmountTy(VT))) + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT)) return BSwap; // Try again with commuted operands. - if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT, - getShiftAmountTy(VT))) + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT)) return BSwap; @@ -7686,7 +7667,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). - SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); + SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) @@ -7698,9 +7679,8 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { /// This contains all DAGCombine rules which reduce two values combined by /// an Or operation to a single value \see visitANDLike(). -SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { +SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) { EVT VT = N1.getValueType(); - SDLoc DL(N); // fold (or x, undef) -> -1 if (!LegalOperations && (N0.isUndef() || N1.isUndef())) @@ -7753,6 +7733,8 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { EVT VT = N0.getValueType(); + unsigned BW = VT.getScalarSizeInBits(); + SDLoc DL(N); auto peekThroughResize = [](SDValue V) { if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE) @@ -7775,36 +7757,30 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, if (SDValue NotOperand = getBitwiseNotOperand(N01, N00, /* AllowUndefs */ false)) { if (peekThroughResize(NotOperand) == N1Resized) - return DAG.getNode(ISD::OR, SDLoc(N), VT, - DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1); + return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT), + N1); } // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) if (SDValue NotOperand = getBitwiseNotOperand(N00, N01, /* AllowUndefs */ false)) { if (peekThroughResize(NotOperand) == N1Resized) - return DAG.getNode(ISD::OR, SDLoc(N), VT, - DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1); + return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT), + N1); } } - if (N0.getOpcode() == ISD::XOR) { - // fold or (xor x, y), x --> or x, y - // or (xor x, y), (x and/or y) --> or x, y - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); - if (N00 == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); - if (N01 == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1); + SDValue X, Y; - if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01); - } - } + // fold or (xor X, N1), N1 --> or X, N1 + if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1)))) + return DAG.getNode(ISD::OR, DL, VT, X, N1); + + // fold or (xor x, y), (x and/or y) --> or x, y + if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) && + (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) || + sd_match(N1, m_Or(m_Specific(X), m_Specific(Y))))) + return DAG.getNode(ISD::OR, DL, VT, X, Y); if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) return R; @@ -7827,6 +7803,26 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) return N0; + // Attempt to match a legalized build_pair-esque pattern: + // or(shl(aext(Hi),BW/2),zext(Lo)) + SDValue Lo, Hi; + if (sd_match(N0, + m_OneUse(m_Shl(m_AnyExt(m_Value(Hi)), m_SpecificInt(BW / 2)))) && + sd_match(N1, m_ZExt(m_Value(Lo))) && + Lo.getScalarValueSizeInBits() == (BW / 2) && + Lo.getValueType() == Hi.getValueType()) { + // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)). + SDValue NotLo, NotHi; + if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) && + sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) { + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo); + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi); + Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, + DAG.getShiftAmountConstant(BW / 2, VT, DL)); + return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT); + } + } + return SDValue(); } @@ -7834,23 +7830,24 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); + SDLoc DL(N); // x | x --> x if (N0 == N1) return N0; // fold (or c1, c2) -> c1|c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::OR, DL, VT, N1, N0); // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (or x, 0) -> x, vector edition @@ -7860,7 +7857,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, -1) -> -1, vector edition if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 - return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); + return DAG.getAllOnesConstant(DL, N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting type / shuffle is legal. @@ -7910,10 +7907,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (CanFold) { SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); - SDValue LegalShuffle = - TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, - Mask, DAG); + TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG); if (LegalShuffle) return LegalShuffle; } @@ -7940,7 +7935,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDValue R = foldAndOrOfSETCC(N, DAG)) return R; - if (SDValue Combined = visitORLike(N0, N1, N)) + if (SDValue Combined = visitORLike(N0, N1, DL)) return Combined; if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) @@ -7953,12 +7948,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags())) return ROR; // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y)) - if (SDValue SD = reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, SDLoc(N), - VT, N0, N1)) + if (SDValue SD = + reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1)) return SD; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) @@ -7972,7 +7967,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { {N1, N0.getOperand(1)})) { SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(IOR.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR); + return DAG.getNode(ISD::AND, DL, VT, COR, IOR); } } @@ -7987,7 +7982,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return V; // See if this is some rotate idiom. - if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N))) + if (SDValue Rot = MatchRotate(N0, N1, DL)) return Rot; if (SDValue Load = MatchLoadCombine(N)) @@ -8766,6 +8761,10 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, if (NarrowBitWidth % 8 != 0) return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; + // EXTRACT_VECTOR_ELT can extend the element type to the width of the return + // type, leaving the high bits undefined. + if (Index >= NarrowByteWidth) + return std::nullopt; // Check to see if the position of the element in the vector corresponds // with the byte we are trying to provide for. In the case of a vector of @@ -8840,15 +8839,16 @@ static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, return BigEndian; } +// Look through one layer of truncate or extend. static SDValue stripTruncAndExt(SDValue Value) { switch (Value.getOpcode()) { case ISD::TRUNCATE: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::ANY_EXTEND: - return stripTruncAndExt(Value.getOperand(0)); + return Value.getOperand(0); } - return Value; + return SDValue(); } /// Match a pattern where a wide type scalar value is stored by several narrow @@ -8952,21 +8952,27 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { if (ShiftAmtC % NarrowNumBits != 0) return SDValue(); + // Make sure we aren't reading bits that are shifted in. + if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits) + return SDValue(); + Offset = ShiftAmtC / NarrowNumBits; WideVal = WideVal.getOperand(0); } // Stores must share the same source value with different offsets. - // Truncate and extends should be stripped to get the single source value. if (!SourceValue) SourceValue = WideVal; - else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal)) - return SDValue(); - else if (SourceValue.getValueType() != WideVT) { - if (WideVal.getValueType() == WideVT || - WideVal.getScalarValueSizeInBits() > - SourceValue.getScalarValueSizeInBits()) + else if (SourceValue != WideVal) { + // Truncate and extends can be stripped to see if the values are related. + if (stripTruncAndExt(SourceValue) != WideVal && + stripTruncAndExt(WideVal) != SourceValue) + return SDValue(); + + if (WideVal.getScalarValueSizeInBits() > + SourceValue.getScalarValueSizeInBits()) SourceValue = WideVal; + // Give up if the source value type is smaller than the store size. if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits()) return SDValue(); @@ -9264,11 +9270,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return NewLoad; SDValue ShiftedLoad = - NeedsZext - ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad, - DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT, - SDLoc(N), LegalOperations)) - : NewLoad; + NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad, + DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, + VT, SDLoc(N))) + : NewLoad; return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad); } @@ -9424,8 +9429,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (a^b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - DAG.haveNoCommonBitsSet(N0, N1)) - return DAG.getNode(ISD::OR, DL, VT, N0, N1); + DAG.haveNoCommonBitsSet(N0, N1)) { + SDNodeFlags Flags; + Flags.setDisjoint(true); + return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags); + } // look for 'add-like' folds: // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE) @@ -9530,7 +9538,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) - if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + if (!LegalOperations || hasOperation(ISD::ABS, VT)) { SDValue A = N0Opcode == ISD::ADD ? N0 : N1; SDValue S = N0Opcode == ISD::SRA ? N0 : N1; if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { @@ -9667,7 +9675,8 @@ static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT); SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC); SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1); - return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2); + return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2, + LogicOp->getFlags()); } /// Handle transforms common to the three shifts, when the shift amount is a @@ -9848,17 +9857,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (SDValue V = DAG.simplifyShift(N0, N1)) return V; + SDLoc DL(N); EVT VT = N0.getValueType(); EVT ShiftVT = N1.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold (shl c1, c2) -> c1<<c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1})) return C; // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); @@ -9874,8 +9884,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { if (SDValue C = - DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1})) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); + DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1})) + return DAG.getNode(ISD::AND, DL, VT, N00, C); } } } @@ -9886,13 +9896,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); + return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1); } // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) @@ -9905,7 +9915,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return (c1 + c2).uge(OpSizeInBits); }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { @@ -9915,7 +9925,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return (c1 + c2).ult(OpSizeInBits); }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { - SDLoc DL(N); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); } @@ -9946,7 +9955,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS, ConstantSDNode *RHS) { @@ -9959,7 +9968,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { - SDLoc DL(N); SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0)); SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT); Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1); @@ -9984,7 +9992,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { - SDLoc DL(N); EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType(); SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT); NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL); @@ -10002,8 +10009,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { LHSC.getZExtValue() <= RHSC.getZExtValue(); }; - SDLoc DL(N); - // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2 if (N0->getFlags().hasExact()) { @@ -10057,7 +10062,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && isConstantOrConstantVector(N1, /* No Opaques */ true)) { - SDLoc DL(N); SDValue AllBits = DAG.getAllOnesConstant(DL, VT); SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); @@ -10078,7 +10082,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Preserve the disjoint flag for Or. if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint()) Flags.setDisjoint(true); - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags); + return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags); } } @@ -10108,7 +10112,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N01 = N0.getOperand(1); if (SDValue Shl = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); + return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl); } ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -10116,6 +10120,21 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; + // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the + // target. + if (((N1.getOpcode() == ISD::CTTZ && + VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) || + N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && + N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) && + TLI.isOperationLegalOrCustom(ISD::MUL, VT)) { + SDValue Y = N1.getOperand(0); + SDLoc DL(N); + SDValue NegY = DAG.getNegative(Y, DL, ShiftVT); + SDValue And = + DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT); + return DAG.getNode(ISD::MUL, DL, VT, And, N0); + } + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -10123,7 +10142,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N0.getOpcode() == ISD::VSCALE && N1C) { const APInt &C0 = N0.getConstantOperandAPInt(0); const APInt &C1 = N1C->getAPIntValue(); - return DAG.getVScale(SDLoc(N), VT, C0 << C1); + return DAG.getVScale(DL, VT, C0 << C1); } // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)). @@ -10133,7 +10152,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { const APInt &C0 = N0.getConstantOperandAPInt(0); if (ShlVal.ult(C0.getBitWidth())) { APInt NewStep = C0 << ShlVal; - return DAG.getStepVector(SDLoc(N), VT, NewStep); + return DAG.getStepVector(DL, VT, NewStep); } } @@ -10144,7 +10163,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Examples: // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b) // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b) -static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, +static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI) { assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "SRL or SRA node is required here!"); @@ -10155,8 +10174,6 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, if (!ShiftAmtSrc) return SDValue(); - SDLoc DL(N); - // The operation feeding into the shift must be a multiply. SDValue ShiftOperand = N->getOperand(0); if (ShiftOperand.getOpcode() != ISD::MUL) @@ -10298,11 +10315,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (SDValue V = DAG.simplifyShift(N0, N1)) return V; + SDLoc DL(N); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold (sra c1, c2) -> (sra c1, c2) - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1})) return C; // Arithmetic shifting an all-sign-bit value is a no-op. @@ -10313,7 +10331,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -10324,7 +10342,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) // clamp (add c1, c2) to max shift. if (N0.getOpcode() == ISD::SRA) { - SDLoc DL(N); EVT ShiftVT = N1.getValueType(); EVT ShiftSVT = ShiftVT.getScalarType(); SmallVector<SDValue, 16> ShiftValues; @@ -10381,9 +10398,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { - SDLoc DL(N); - SDValue Amt = DAG.getConstant(ShiftAmt, DL, - getShiftAmountTy(N0.getOperand(0).getValueType())); + SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL); SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Amt); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, @@ -10422,7 +10437,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // that restriction may conflict with other transforms. if (TruncVT.isSimple() && isTypeLegal(TruncVT) && TLI.isTruncateFree(VT, TruncVT)) { - SDLoc DL(N); SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc( @@ -10443,7 +10457,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); + return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1); } // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2)) @@ -10460,7 +10474,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT LargeVT = N0Op0.getValueType(); unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits; if (LargeShift->getAPIntValue() == TruncBits) { - SDLoc DL(N); EVT LargeShiftVT = getShiftAmountTy(LargeVT); SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT); Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt, @@ -10478,7 +10491,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // If the sign bit is known to be zero, switch this to a SRL. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::SRL, DL, VT, N0, N1); if (N1C && !N1C->isOpaque()) if (SDValue NewSRA = visitShiftByConstant(N)) @@ -10486,7 +10499,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // Try to transform this shift into a multiply-high if // it matches the appropriate pattern detected in combineShiftToMULH. - if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) + if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI)) return MULH; // Attempt to convert a sra of a load into a narrower sign-extending load. @@ -10502,17 +10515,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (SDValue V = DAG.simplifyShift(N0, N1)) return V; + SDLoc DL(N); EVT VT = N0.getValueType(); EVT ShiftVT = N1.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1})) return C; // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -10522,7 +10536,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) if (N0.getOpcode() == ISD::SRL) { @@ -10534,7 +10548,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return (c1 + c2).uge(OpSizeInBits); }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { @@ -10544,7 +10558,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return (c1 + c2).ult(OpSizeInBits); }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { - SDLoc DL(N); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); } @@ -10563,7 +10576,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2))) // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { - SDLoc DL(N); if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, DL, VT); SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT); @@ -10575,7 +10587,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask) if (N0.hasOneUse() && InnerShift.hasOneUse() && c1 + c2 < InnerShiftSize) { - SDLoc DL(N); SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT); SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT, InnerShift.getOperand(0), NewShiftAmt); @@ -10603,7 +10614,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { - SDLoc DL(N); SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); SDValue Mask = DAG.getAllOnesConstant(DL, VT); @@ -10615,7 +10625,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { - SDLoc DL(N); SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); SDValue Mask = DAG.getAllOnesConstant(DL, VT); @@ -10637,13 +10646,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); SDLoc DL0(N0); - SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, - N0.getOperand(0), - DAG.getConstant(ShiftAmt, DL0, - getShiftAmountTy(SmallVT))); + SDValue SmallShift = + DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0), + DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0)); AddToWorklist(SmallShift.getNode()); APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt); - SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), DAG.getConstant(Mask, DL, VT)); @@ -10654,7 +10661,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // bit, which is unmodified by sra. if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) { if (N0.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power @@ -10685,14 +10692,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (ShAmt) { SDLoc DL(N0); Op = DAG.getNode(ISD::SRL, DL, VT, Op, - DAG.getConstant(ShAmt, DL, - getShiftAmountTy(Op.getValueType()))); + DAG.getShiftAmountConstant(ShAmt, VT, DL)); AddToWorklist(Op.getNode()); } - - SDLoc DL(N); - return DAG.getNode(ISD::XOR, DL, VT, - Op, DAG.getConstant(1, DL, VT)); + return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT)); } } @@ -10700,7 +10703,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); + return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1); } // fold operands of srl based on knowledge that the low bits are not @@ -10754,7 +10757,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // Try to transform this shift into a multiply-high if // it matches the appropriate pattern detected in combineShiftToMULH. - if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) + if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI)) return MULH; return SDValue(); @@ -10767,6 +10770,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { SDValue N2 = N->getOperand(2); bool IsFSHL = N->getOpcode() == ISD::FSHL; unsigned BitWidth = VT.getScalarSizeInBits(); + SDLoc DL(N); // fold (fshl N0, N1, 0) -> N0 // fold (fshr N0, N1, 0) -> N1 @@ -10786,8 +10790,8 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) if (Cst->getAPIntValue().uge(BitWidth)) { uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth); - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1, - DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy)); + return DAG.getNode(N->getOpcode(), DL, VT, N0, N1, + DAG.getConstant(RotAmt, DL, ShAmtTy)); } unsigned ShAmt = Cst->getZExtValue(); @@ -10799,13 +10803,13 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { // fold fshl(N0, undef_or_zero, C) -> shl(N0, C) // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C) if (IsUndefOrZero(N0)) - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, - DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, - SDLoc(N), ShAmtTy)); + return DAG.getNode( + ISD::SRL, DL, VT, N1, + DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy)); if (IsUndefOrZero(N1)) - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, - SDLoc(N), ShAmtTy)); + return DAG.getNode( + ISD::SHL, DL, VT, N0, + DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy)); // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive. // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive. @@ -10854,18 +10858,19 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { if (isPowerOf2_32(BitWidth)) { APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1); if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits)) - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2); + return DAG.getNode(ISD::SRL, DL, VT, N1, N2); if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits)) - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2); + return DAG.getNode(ISD::SHL, DL, VT, N0, N2); } // fold (fshl N0, N0, N2) -> (rotl N0, N2) // fold (fshr N0, N0, N2) -> (rotr N0, N2) - // TODO: Investigate flipping this rotate if only one is legal, if funnel shift - // is legal as well we might be better off avoiding non-constant (BW - N2). + // TODO: Investigate flipping this rotate if only one is legal. + // If funnel shift is legal as well we might be better off avoiding + // non-constant (BW - N2). unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR; if (N0 == N1 && hasOperation(RotOpc, VT)) - return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2); + return DAG.getNode(RotOpc, DL, VT, N0, N2); // Simplify, based on bits shifted out of N0/N1. if (SimplifyDemandedBits(SDValue(N, 0))) @@ -10880,11 +10885,11 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) { if (SDValue V = DAG.simplifyShift(N0, N1)) return V; + SDLoc DL(N); EVT VT = N0.getValueType(); // fold (*shlsat c1, c2) -> c1<<c2 - if (SDValue C = - DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1})) return C; ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -10893,13 +10898,13 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) { // fold (sshlsat x, c) -> (shl x, c) if (N->getOpcode() == ISD::SSHLSAT && N1C && N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0))) - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::SHL, DL, VT, N0, N1); // fold (ushlsat x, c) -> (shl x, c) if (N->getOpcode() == ISD::USHLSAT && N1C && N1C->getAPIntValue().ule( DAG.computeKnownBits(N0).countMinLeadingZeros())) - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::SHL, DL, VT, N0, N1); } return SDValue(); @@ -10909,7 +10914,7 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) { // (ABS (SUB (EXTEND a), (EXTEND b))). // (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))). // Generates UABD/SABD instruction. -SDValue DAGCombiner::foldABSToABD(SDNode *N) { +SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) { EVT SrcVT = N->getValueType(0); if (N->getOpcode() == ISD::TRUNCATE) @@ -10921,7 +10926,6 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) { EVT VT = N->getValueType(0); SDValue AbsOp1 = N->getOperand(0); SDValue Op0, Op1; - SDLoc DL(N); if (AbsOp1.getOpcode() != ISD::SUB) return SDValue(); @@ -10980,9 +10984,10 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) { SDValue DAGCombiner::visitABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (abs c1) -> c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, SDLoc(N), VT, {N0})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0})) return C; // fold (abs (abs x)) -> (abs x) if (N0.getOpcode() == ISD::ABS) @@ -10991,7 +10996,7 @@ SDValue DAGCombiner::visitABS(SDNode *N) { if (DAG.SignBitIsZero(N0)) return N0; - if (SDValue ABD = foldABSToABD(N)) + if (SDValue ABD = foldABSToABD(N, DL)) return ABD; // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x))) @@ -11001,7 +11006,6 @@ SDValue DAGCombiner::visitABS(SDNode *N) { if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) && TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) && hasOperation(ISD::ABS, ExtVT)) { - SDLoc DL(N); return DAG.getNode( ISD::ZERO_EXTEND, DL, VT, DAG.getNode(ISD::ABS, DL, ExtVT, @@ -11047,7 +11051,7 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue Res = N0.getOperand(0); if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2))) Res = DAG.getNode(ISD::SHL, DL, VT, Res, - DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT))); + DAG.getShiftAmountConstant(NewShAmt, VT, DL)); Res = DAG.getZExtOrTrunc(Res, DL, HalfVT); Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res); return DAG.getZExtOrTrunc(Res, DL, VT); @@ -11083,9 +11087,23 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { // fold (bitreverse c1) -> c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0})) return C; + // fold (bitreverse (bitreverse x)) -> x if (N0.getOpcode() == ISD::BITREVERSE) return N0.getOperand(0); + + SDValue X, Y; + + // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y) + if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) && + sd_match(N, m_BitReverse(m_Srl(m_BitReverse(m_Value(X)), m_Value(Y))))) + return DAG.getNode(ISD::SHL, DL, VT, X, Y); + + // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y) + if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) && + sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y))))) + return DAG.getNode(ISD::SRL, DL, VT, X, Y); + return SDValue(); } @@ -11150,25 +11168,62 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + unsigned NumBits = VT.getScalarSizeInBits(); SDLoc DL(N); // fold (ctpop c1) -> c2 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0})) return C; + + // If the source is being shifted, but doesn't affect any active bits, + // then we can call CTPOP on the shift source directly. + if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) { + const APInt &Amt = AmtC->getAPIntValue(); + if (Amt.ult(NumBits)) { + KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0)); + if ((N0.getOpcode() == ISD::SRL && + Amt.ule(KnownSrc.countMinTrailingZeros())) || + (N0.getOpcode() == ISD::SHL && + Amt.ule(KnownSrc.countMinLeadingZeros()))) { + return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0)); + } + } + } + } + + // If the upper bits are known to be zero, then see if its profitable to + // only count the lower bits. + if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) { + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2); + if (hasOperation(ISD::CTPOP, HalfVT) && + TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) && + TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) { + APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2); + if (DAG.MaskedValueIsZero(N0, UpperBits)) { + SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT, + DAG.getZExtOrTrunc(N0, DL, HalfVT)); + return DAG.getZExtOrTrunc(PopCnt, DL, VT); + } + } + } + return SDValue(); } -// FIXME: This should be checking for no signed zeros on individual operands, as -// well as no nans. static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, - SDValue RHS, + SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI) { - const TargetOptions &Options = DAG.getTarget().Options; EVT VT = LHS.getValueType(); + if (!VT.isFloatingPoint()) + return false; + + const TargetOptions &Options = DAG.getTarget().Options; - return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && + return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) && TLI.isProfitableToCombineMinNumMaxNum(VT) && - DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); + (Flags.hasNoNaNs() || + (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS))); } static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, @@ -11259,7 +11314,8 @@ SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, /// If a (v)select has a condition value that is a sign-bit test, try to smear /// the condition operand sign-bit across the value width and use it as a mask. -static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { +static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { SDValue Cond = N->getOperand(0); SDValue C1 = N->getOperand(1); SDValue C2 = N->getOperand(2); @@ -11279,14 +11335,12 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && isAllOnesOrAllOnesSplat(C2)) { // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 - SDLoc DL(N); SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); return DAG.getNode(ISD::OR, DL, VT, Sra, C1); } if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 - SDLoc DL(N); SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); return DAG.getNode(ISD::AND, DL, VT, Sra, C1); @@ -11426,41 +11480,49 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::OR, DL, VT, NotCond, N1); } - if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + if (SDValue V = foldSelectOfConstantsUsingSra(N, DL, DAG)) return V; return SDValue(); } -static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { - assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && - "Expected a (v)select"); +template <class MatchContextClass> +static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT || + N->getOpcode() == ISD::VP_SELECT) && + "Expected a (v)(vp.)select"); SDValue Cond = N->getOperand(0); SDValue T = N->getOperand(1), F = N->getOperand(2); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MatchContextClass matcher(DAG, TLI, N); + if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1) return SDValue(); - // select Cond, Cond, F --> or Cond, F - // select Cond, 1, F --> or Cond, F + // select Cond, Cond, F --> or Cond, freeze(F) + // select Cond, 1, F --> or Cond, freeze(F) if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F); + return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F)); - // select Cond, T, Cond --> and Cond, T - // select Cond, T, 0 --> and Cond, T + // select Cond, T, Cond --> and Cond, freeze(T) + // select Cond, T, 0 --> and Cond, freeze(T) if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true)) - return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T); + return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T)); - // select Cond, T, 1 --> or (not Cond), T + // select Cond, T, 1 --> or (not Cond), freeze(T) if (isOneOrOneSplat(F, /* AllowUndefs */ true)) { - SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); - return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T); + SDValue NotCond = + matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT)); + return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T)); } - // select Cond, 0, F --> and (not Cond), F + // select Cond, 0, F --> and (not Cond), freeze(F) if (isNullOrNullSplat(T, /* AllowUndefs */ true)) { - SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); - return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F); + SDValue NotCond = + matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT)); + return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F)); } return SDValue(); @@ -11471,13 +11533,12 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); - if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse()) - return SDValue(); - SDValue Cond0 = N0.getOperand(0); - SDValue Cond1 = N0.getOperand(1); - ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - if (VT != Cond0.getValueType()) + SDValue Cond0, Cond1; + ISD::CondCode CC; + if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1), + m_CondCode(CC)))) || + VT != Cond0.getValueType()) return SDValue(); // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the @@ -11489,37 +11550,37 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { else return SDValue(); - // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1 + // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1) if (isNullOrNullSplat(N2)) { SDLoc DL(N); SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); - return DAG.getNode(ISD::AND, DL, VT, Sra, N1); + return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1)); } - // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2 + // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2) if (isAllOnesOrAllOnesSplat(N1)) { SDLoc DL(N); SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); - return DAG.getNode(ISD::OR, DL, VT, Sra, N2); + return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2)); } // If we have to invert the sign bit mask, only do that transform if the // target has a bitwise 'and not' instruction (the invert is free). - // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2 + // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2) const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) { SDLoc DL(N); SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); SDValue Not = DAG.getNOT(DL, Sra, VT); - return DAG.getNode(ISD::AND, DL, VT, Not, N2); + return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2)); } // TODO: There's another pattern in this family, but it may require // implementing hasOrNot() to check for profitability: - // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2 + // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2) return SDValue(); } @@ -11536,7 +11597,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; - if (SDValue V = foldBoolSelectToLogic(N, DAG)) + if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG)) return V; // select (not Cond), N1, N2 -> select Cond, N2, N1 @@ -11624,7 +11685,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { N2_2, Flags); } // Otherwise see if we can optimize to a better pattern. - if (SDValue Combined = visitORLike(N0, N2_0, N)) + if (SDValue Combined = visitORLike(N0, N2_0, DL)) return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, N2_2, Flags); } @@ -11640,7 +11701,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // select (fcmp gt x, y), x, y -> fmaxnum x, y // // This is OK if we don't care what happens if either operand is a NaN. - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI)) if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC)) return FMinMax; @@ -11920,8 +11981,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { !MST->isCompressingStore() && !MST->isTruncatingStore()) return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), MST->getBasePtr(), MST->getPointerInfo(), - MST->getOriginalAlign(), MachineMemOperand::MOStore, - MST->getAAInfo()); + MST->getOriginalAlign(), + MST->getMemOperand()->getFlags(), MST->getAAInfo()); // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) @@ -11982,6 +12043,55 @@ SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) { + SDLoc DL(N); + SDValue Vec = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue Passthru = N->getOperand(2); + EVT VecVT = Vec.getValueType(); + + bool HasPassthru = !Passthru.isUndef(); + + APInt SplatVal; + if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal)) + return TLI.isConstTrueVal(Mask) ? Vec : Passthru; + + if (Vec.isUndef() || Mask.isUndef()) + return Passthru; + + // No need for potentially expensive compress if the mask is constant. + if (ISD::isBuildVectorOfConstantSDNodes(Mask.getNode())) { + SmallVector<SDValue, 16> Ops; + EVT ScalarVT = VecVT.getVectorElementType(); + unsigned NumSelected = 0; + unsigned NumElmts = VecVT.getVectorNumElements(); + for (unsigned I = 0; I < NumElmts; ++I) { + SDValue MaskI = Mask.getOperand(I); + // We treat undef mask entries as "false". + if (MaskI.isUndef()) + continue; + + if (TLI.isConstTrueVal(MaskI)) { + SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, + DAG.getVectorIdxConstant(I, DL)); + Ops.push_back(VecI); + NumSelected++; + } + } + for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) { + SDValue Val = + HasPassthru + ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru, + DAG.getVectorIdxConstant(Rest, DL)) + : DAG.getUNDEF(ScalarVT); + Ops.push_back(Val); + } + return DAG.getBuildVector(VecVT, DL, Ops); + } + + return SDValue(); +} + SDValue DAGCombiner::visitVPGATHER(SDNode *N) { VPGatherSDNode *MGT = cast<VPGatherSDNode>(N); SDValue Mask = MGT->getMask(); @@ -12058,7 +12168,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { SDValue NewLd = DAG.getLoad( N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(), MLD->getPointerInfo(), MLD->getOriginalAlign(), - MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges()); + MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges()); return CombineTo(N, NewLd, NewLd.getValue(1)); } @@ -12109,8 +12219,11 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { SDValue N2Elt = N2.getOperand(i); if (N1Elt.isUndef() || N2Elt.isUndef()) continue; - if (N1Elt.getValueType() != N2Elt.getValueType()) - continue; + if (N1Elt.getValueType() != N2Elt.getValueType()) { + AllAddOne = false; + AllSubOne = false; + break; + } const APInt &C1 = N1Elt->getAsAPIntVal(); const APInt &C2 = N2Elt->getAsAPIntVal(); @@ -12140,7 +12253,7 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); } - if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + if (SDValue V = foldSelectOfConstantsUsingSra(N, DL, DAG)) return V; // The general case for select-of-constants: @@ -12150,6 +12263,21 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVP_SELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDLoc DL(N); + + if (SDValue V = DAG.simplifySelect(N0, N1, N2)) + return V; + + if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG)) + return V; + + return SDValue(); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12160,13 +12288,24 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; - if (SDValue V = foldBoolSelectToLogic(N, DAG)) + if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG)) return V; // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) return DAG.getSelect(DL, VT, F, N2, N1); + // select (sext m), (add X, C), X --> (add X, (and C, (sext m)))) + if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() && + DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) && + N0.getScalarValueSizeInBits() == N1.getScalarValueSizeInBits() && + TLI.getBooleanContents(N0.getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + return DAG.getNode( + ISD::ADD, DL, N1.getValueType(), N2, + DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0)); + } + // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> // vselect (setgt X, -1), X, -X -> @@ -12190,9 +12329,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) return DAG.getNode(ISD::ABS, DL, VT, LHS); - SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarSizeInBits() - 1, - DL, getShiftAmountTy(VT))); + SDValue Shift = DAG.getNode( + ISD::SRA, DL, VT, LHS, + DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -12205,7 +12344,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (N0.hasOneUse() && + isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) { if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC)) return FMinMax; } @@ -12445,6 +12585,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue N3 = N->getOperand(3); SDValue N4 = N->getOperand(4); ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); + SDLoc DL(N); // fold select_cc lhs, rhs, x, x, cc -> x if (N2 == N3) @@ -12453,11 +12594,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { // select_cc bool, 0, x, y, seteq -> select bool, y, x if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 && isNullConstant(N1)) - return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2); + return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2); // Determine if the condition we're dealing with is constant if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, - CC, SDLoc(N), false)) { + CC, DL, false)) { AddToWorklist(SCC.getNode()); // cond always true -> true val @@ -12472,9 +12613,9 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { // Fold to a simpler select_cc if (SCC.getOpcode() == ISD::SETCC) { - SDValue SelectOp = DAG.getNode( - ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), - SCC.getOperand(1), N2, N3, SCC.getOperand(2)); + SDValue SelectOp = + DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0), + SCC.getOperand(1), N2, N3, SCC.getOperand(2)); SelectOp->setFlags(SCC->getFlags()); return SelectOp; } @@ -12482,10 +12623,10 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N2, N3)) - return SDValue(N, 0); // Don't revisit N. + return SDValue(N, 0); // Don't revisit N. // fold select_cc into other things, such as min/max/abs - return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); + return SimplifySelectCC(DL, N0, N1, N2, N3, CC); } SDValue DAGCombiner::visitSETCC(SDNode *N) { @@ -12498,10 +12639,9 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + SDLoc DL(N); - SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC); - - if (Combined) { + if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) { // If we prefer to have a setcc, and we don't, we'll try our best to // recreate one using rebuildSetCC. if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { @@ -12597,7 +12737,6 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask); // Transform is valid and we have a new preference. if (CanTransform && NewShiftOpc != ShiftOpc) { - SDLoc DL(N); SDValue NewShiftOrRotate = DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0), ShiftOrRotate.getOperand(1)); @@ -12673,13 +12812,11 @@ static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) { /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, - SelectionDAG &DAG, + SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - SDLoc DL(N); - assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); @@ -12720,12 +12857,12 @@ static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). /// Vector extends are not folded if operations are legal; this is to /// avoid introducing illegal build_vector dag nodes. -static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, +static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, + const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - SDLoc DL(N); assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) && "Expected EXTEND dag node in input!"); @@ -12952,12 +13089,12 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue BasePtr = LN0->getBasePtr(); for (unsigned Idx = 0; Idx < NumSplits; Idx++) { const unsigned Offset = Idx * Stride; - const Align Align = commonAlignment(LN0->getAlign(), Offset); - SDValue SplitLoad = DAG.getExtLoad( - ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, - LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, - LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); + SDValue SplitLoad = + DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), + BasePtr, LN0->getPointerInfo().getWithOffset(Offset), + SplitSrcVT, LN0->getOriginalAlign(), + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL); @@ -13135,20 +13272,39 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) // Only generate vector extloads when 1) they're legal, and 2) they are -// deemed desirable by the target. +// deemed desirable by the target. NonNegZExt can be set to true if a zero +// extend has the nonneg flag to allow use of sextload if profitable. static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, - ISD::NodeType ExtOpc) { + ISD::NodeType ExtOpc, + bool NonNegZExt = false) { + if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode())) + return {}; + + // If this is zext nneg, see if it would make sense to treat it as a sext. + if (NonNegZExt) { + assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND && + "Unexpected load type or opcode"); + for (SDNode *User : N0->uses()) { + if (User->getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); + if (ISD::isSignedIntSetCC(CC)) { + ExtLoadType = ISD::SEXTLOAD; + ExtOpc = ISD::SIGN_EXTEND; + break; + } + } + } + } + // TODO: isFixedLengthVector() should be removed and any negative effects on // code generation being the result of that target's implementation of // isVectorLoadExtDesirable(). - if (!ISD::isNON_EXTLoad(N0.getNode()) || - !ISD::isUNINDEXEDLoad(N0.getNode()) || - ((LegalOperations || VT.isFixedLengthVector() || - !cast<LoadSDNode>(N0)->isSimple()) && - !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) + if ((LegalOperations || VT.isFixedLengthVector() || + !cast<LoadSDNode>(N0)->isSimple()) && + !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())) return {}; bool DoXform = true; @@ -13207,6 +13363,37 @@ tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, return NewLoad; } +// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load))) +static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, + const TargetLowering &TLI, EVT VT, + SDValue N0, + ISD::LoadExtType ExtLoadType) { + auto *ALoad = dyn_cast<AtomicSDNode>(N0); + if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD) + return {}; + EVT MemoryVT = ALoad->getMemoryVT(); + if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT)) + return {}; + // Can't fold into ALoad if it is already extending differently. + ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType(); + if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) || + (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD)) + return {}; + + EVT OrigVT = ALoad->getValueType(0); + assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider."); + auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(), + ALoad->getBasePtr(), ALoad->getMemOperand())); + NewALoad->setExtensionType(ExtLoadType); + DAG.ReplaceAllUsesOfValueWith( + SDValue(ALoad, 0), + DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0))); + // Update the chain uses. + DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1)); + return SDValue(NewALoad, 0); +} + static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations) { assert((N->getOpcode() == ISD::SIGN_EXTEND || @@ -13381,7 +13568,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.isUndef()) return DAG.getConstant(0, DL, VT); - if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes)) return Res; // fold (sext (sext x)) -> (sext x) @@ -13478,6 +13665,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD)) return foldedExt; + // Try to simplify (sext (atomic_load x)). + if (SDValue foldedExt = + tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD)) + return foldedExt; + // fold (sext (and/or/xor (load x), cst)) -> // (and/or/xor (sextload x), (sext cst)) if (ISD::isBitwiseLogicOp(N0.getOpcode()) && @@ -13581,7 +13773,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level)) return Res; return SDValue(); @@ -13590,9 +13782,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { /// Given an extending node with a pop-count operand, if the target does not /// support a pop-count in the narrow source type but does support it in the /// destination type, widen the pop-count to the destination type. -static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) { +static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) { assert((Extend->getOpcode() == ISD::ZERO_EXTEND || - Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"); + Extend->getOpcode() == ISD::ANY_EXTEND) && + "Expected extend op"); SDValue CtPop = Extend->getOperand(0); if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse()) @@ -13605,7 +13798,6 @@ static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) { return SDValue(); // zext (ctpop X) --> ctpop (zext X) - SDLoc DL(Extend); SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT); return DAG.getNode(ISD::CTPOP, DL, VT, NewZext); } @@ -13650,20 +13842,23 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (N0.isUndef()) return DAG.getConstant(0, DL, VT); - if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes)) return Res; // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) - if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + SDNodeFlags Flags; + if (N0.getOpcode() == ISD::ZERO_EXTEND) + Flags.setNonNeg(N0->getFlags().hasNonNeg()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags); + } // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x) // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x) if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) - return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT, - N0.getOperand(0)); + return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0)); // fold (zext (truncate x)) -> (zext x) or // (zext (truncate x)) -> (truncate x) @@ -13703,6 +13898,32 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT SrcVT = N0.getOperand(0).getValueType(); EVT MinVT = N0.getValueType(); + if (N->getFlags().hasNonNeg()) { + SDValue Op = N0.getOperand(0); + unsigned OpBits = SrcVT.getScalarSizeInBits(); + unsigned MidBits = MinVT.getScalarSizeInBits(); + unsigned DestBits = VT.getScalarSizeInBits(); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits - MidBits) + return Op; + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + // FIXME: This can probably be ZERO_EXTEND nneg? + if (NumSignBits > OpBits - MidBits) + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits - MidBits) + return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + } + } + // Try to mask before the extension to avoid having to generate a larger mask, // possibly over several sub-vectors. if (SrcVT.bitsLT(VT) && VT.isVector()) { @@ -13744,9 +13965,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // Try to simplify (zext (load x)). - if (SDValue foldedExt = - tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, - ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) + if (SDValue foldedExt = tryToFoldExtOfLoad( + DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD, + ISD::ZERO_EXTEND, N->getFlags().hasNonNeg())) return foldedExt; if (SDValue foldedExt = @@ -13759,6 +13980,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue ExtLoad = CombineExtLoad(N)) return ExtLoad; + // Try to simplify (zext (atomic_load x)). + if (SDValue foldedExt = + tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD)) + return foldedExt; + // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) // Unless (and (load x) cst) will match as a zextload already and has @@ -13883,11 +14109,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SHL) { // If the original shl may be shifting out bits, do not perform this // transformation. - // TODO: Add MaskedValueIsZero check. unsigned KnownZeroBits = ShVal.getValueSizeInBits() - ShVal.getOperand(0).getValueSizeInBits(); - if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) - return SDValue(); + if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) { + // If the shift is too large, then see if we can deduce that the + // shift is safe anyway. + // Create a mask that has ones for the bits being shifted out. + APInt ShiftOutMask = + APInt::getHighBitsSet(ShVal.getValueSizeInBits(), + ShAmtC->getAPIntValue().getZExtValue()); + + // Check if the bits being shifted out are known to be zero. + if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask)) + return SDValue(); + } } // Ensure that the shift amount is wide enough for the shifted value. @@ -13903,36 +14138,47 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; - if (SDValue NewCtPop = widenCtPop(N, DAG)) + if (SDValue NewCtPop = widenCtPop(N, DAG, DL)) return NewCtPop; if (SDValue V = widenAbs(N, DAG)) return V; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level)) return Res; + // CSE zext nneg with sext if the zext is not free. + if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) { + SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0); + if (CSENode) + return SDValue(CSENode, 0); + } + return SDValue(); } SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // aext(undef) = undef if (N0.isUndef()) return DAG.getUNDEF(VT); - if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes)) return Res; // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) - if (N0.getOpcode() == ISD::ANY_EXTEND || - N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::SIGN_EXTEND) - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); + if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) { + SDNodeFlags Flags; + if (N0.getOpcode() == ISD::ZERO_EXTEND) + Flags.setNonNeg(N0->getFlags().hasNonNeg()); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags); + } // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x) // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x) @@ -13940,7 +14186,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0)); // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) @@ -13958,7 +14204,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (truncate x)) if (N0.getOpcode() == ISD::TRUNCATE) - return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); + return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT); // Fold (aext (and (trunc x), cst)) -> (and x, cst) // if the trunc is not free. @@ -13966,7 +14212,6 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::TRUNCATE && N0.getOperand(1).getOpcode() == ISD::Constant && !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) { - SDLoc DL(N); SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT); SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1)); assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!"); @@ -13992,9 +14237,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, - LN0->getChain(), LN0->getBasePtr(), - N0.getValueType(), LN0->getMemOperand()); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); // If the load value is used only by N, replace it via CombineTo N. bool NoReplaceTrunc = N0.hasOneUse(); @@ -14020,9 +14265,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), - VT, LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); + SDValue ExtLoad = + DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); recursivelyDeleteUnusedNodes(LN0); @@ -14050,23 +14295,20 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N00VT.getSizeInBits()) - return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/any extend EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); - SDValue VsetCC = - DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); + SDValue VsetCC = DAG.getSetCC( + DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getAnyExtOrTrunc(VsetCC, DL, VT); } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc - SDLoc DL(N); if (SDValue SCC = SimplifySelectCC( DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), @@ -14074,10 +14316,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SCC; } - if (SDValue NewCtPop = widenCtPop(N, DAG)) + if (SDValue NewCtPop = widenCtPop(N, DAG, DL)) return NewCtPop; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level)) return Res; return SDValue(); @@ -14192,7 +14434,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { // away, but using an AND rather than a right shift. HasShiftedOffset is used // to indicate that the narrowed load should be left-shifted ShAmt bits to get // the result. - bool HasShiftedOffset = false; + unsigned ShiftedOffset = 0; // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { @@ -14237,7 +14479,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { if (Mask.isMask()) { ActiveBits = Mask.countr_one(); } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) { - HasShiftedOffset = true; + ShiftedOffset = ShAmt; } else { return SDValue(); } @@ -14301,6 +14543,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { SDNode *Mask = *(SRL->use_begin()); if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND && isa<ConstantSDNode>(Mask->getOperand(1))) { + unsigned Offset, ActiveBits; const APInt& ShiftMask = Mask->getConstantOperandAPInt(1); if (ShiftMask.isMask()) { EVT MaskedVT = @@ -14309,6 +14552,18 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) && TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) ExtVT = MaskedVT; + } else if (ExtType == ISD::ZEXTLOAD && + ShiftMask.isShiftedMask(Offset, ActiveBits) && + (Offset + ShAmt) < VT.getScalarSizeInBits()) { + EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + // If the mask is shifted we can use a narrower load and a shl to insert + // the trailing zeros. + if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) && + TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) { + ExtVT = MaskedVT; + ShAmt = Offset + ShAmt; + ShiftedOffset = Offset; + } } } @@ -14353,7 +14608,6 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt; uint64_t PtrOff = PtrAdjustmentInBits / 8; - Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff); SDLoc DL(LN0); // The original load itself didn't wrap, so an offset within it doesn't. SDNodeFlags Flags; @@ -14365,13 +14619,14 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->getOriginalAlign(), LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, - NewAlign, LN0->getMemOperand()->getFlags(), - LN0->getAAInfo()); + LN0->getOriginalAlign(), + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); @@ -14380,9 +14635,6 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { // Shift the result left, if we've swallowed a left shift. SDValue Result = Load; if (ShLeftAmt != 0) { - EVT ShImmTy = getShiftAmountTy(Result.getValueType()); - if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt)) - ShImmTy = VT; // If the shift amount is as large as the result size (but, presumably, // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result @@ -14390,16 +14642,16 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { if (ShLeftAmt >= VT.getScalarSizeInBits()) Result = DAG.getConstant(0, DL, VT); else - Result = DAG.getNode(ISD::SHL, DL, VT, - Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); + Result = DAG.getNode(ISD::SHL, DL, VT, Result, + DAG.getShiftAmountConstant(ShLeftAmt, VT, DL)); } - if (HasShiftedOffset) { + if (ShiftedOffset != 0) { // We're using a shifted mask, so the load now has an offset. This means // that data has been loaded into the lower bytes than it would have been // before, so we need to shl the loaded data into the correct position in the // register. - SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT); + SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT); Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); } @@ -14605,10 +14857,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return SDValue(); } -static SDValue -foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI, - SelectionDAG &DAG, - bool LegalOperations) { +static SDValue foldExtendVectorInregToExtendOfSubvector( + SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, + bool LegalOperations) { unsigned InregOpcode = N->getOpcode(); unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode); @@ -14635,28 +14886,29 @@ foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI, if (LegalOperations && !TLI.isOperationLegal(Opcode, VT)) return SDValue(); - return DAG.getNode(Opcode, SDLoc(N), VT, Src); + return DAG.getNode(Opcode, DL, VT, Src); } SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); if (N0.isUndef()) { // aext_vector_inreg(undef) = undef because the top bits are undefined. // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same. return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ? DAG.getUNDEF(VT) - : DAG.getConstant(0, SDLoc(N), VT); + : DAG.getConstant(0, DL, VT); } - if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes)) return Res; if (SimplifyDemandedVectorElts(SDValue(N, 0))) return SDValue(N, 0); - if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG, + if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, DL, TLI, DAG, LegalOperations)) return R; @@ -14668,6 +14920,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { EVT VT = N->getValueType(0); EVT SrcVT = N0.getValueType(); bool isLE = DAG.getDataLayout().isLittleEndian(); + SDLoc DL(N); // trunc(undef) = undef if (N0.isUndef()) @@ -14675,10 +14928,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); // fold (truncate c1) -> c1 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, SDLoc(N), VT, {N0})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0})) return C; // fold (truncate (ext x)) -> (ext x) or (truncate x) or x @@ -14687,10 +14940,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { N0.getOpcode() == ISD::ANY_EXTEND) { // if the source is smaller than the dest, we still need an extend. if (N0.getOperand(0).getValueType().bitsLT(VT)) - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0)); // if the source is larger than the dest, than we just need the truncate. if (N0.getOperand(0).getValueType().bitsGT(VT)) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. return N0.getOperand(0); @@ -14704,8 +14957,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue ExtVal = N0.getOperand(1); EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT(); if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) { - SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal); + SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal); } } @@ -14740,8 +14993,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { int Elt = EltNo->getAsZExtVal(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); - - SDLoc DL(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, DAG.getBitcast(NVT, N0.getOperand(0)), DAG.getVectorIdxConstant(Index, DL)); @@ -14756,7 +15007,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue Cond = N0.getOperand(0); SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); - return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); + return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1); } } @@ -14768,22 +15019,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { KnownBits Known = DAG.computeKnownBits(Amt); unsigned Size = VT.getScalarSizeInBits(); if (Known.countMaxActiveBits() <= Log2_32(Size)) { - SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); if (AmtVT != Amt.getValueType()) { - Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT); + Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT); AddToWorklist(Amt.getNode()); } - return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt); + return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt); } } - if (SDValue V = foldSubToUSubSat(VT, N0.getNode())) + if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL)) return V; - if (SDValue ABD = foldABSToABD(N)) + if (SDValue ABD = foldABSToABD(N, DL)) return ABD; // Attempt to pre-truncate BUILD_VECTOR sources. @@ -14792,7 +15041,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) && // Avoid creating illegal types if running after type legalizer. (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) { - SDLoc DL(N); EVT SVT = VT.getScalarType(); SmallVector<SDValue, 8> TruncOps; for (const SDValue &Op : N0->op_values()) { @@ -14806,7 +15054,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getOpcode() == ISD::SPLAT_VECTOR && (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) && (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) { - SDLoc DL(N); EVT SVT = VT.getScalarType(); return DAG.getSplatVector( VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0))); @@ -14838,7 +15085,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getBuildVector(VT, SDLoc(N), Opnds); + return DAG.getBuildVector(VT, DL, Opnds); } } @@ -14901,7 +15148,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { AddToWorklist(NV.getNode()); Opnds.push_back(NV); } - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds); } } @@ -14915,11 +15162,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT && (!LegalOperations || TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) { - SDLoc SL(N); - unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1; - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc, - DAG.getVectorIdxConstant(Idx, SL)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc, + DAG.getVectorIdxConstant(Idx, DL)); } } @@ -14964,7 +15209,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // we are extra cautious to not create an unsupported operation. // Target-specific changes are likely needed to avoid regressions here. if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) { - SDLoc DL(N); SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR); @@ -14981,7 +15225,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) || TLI.isOperationLegal(N0.getOpcode(), VT)) && N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) { - SDLoc DL(N); SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1)); @@ -14998,7 +15241,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { VT.getScalarSizeInBits() && hasOperation(N0.getOpcode(), VT)) { return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1), - DAG, SDLoc(N)); + DAG, DL); } break; } @@ -15391,6 +15634,12 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false)) return N0; + // We currently avoid folding freeze over SRA/SRL, due to the problems seen + // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for + // example https://reviews.llvm.org/D136529#4120959. + if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL) + return SDValue(); + // Fold freeze(op(x, ...)) -> op(freeze(x), ...). // Try to push freeze through instructions that propagate but don't produce // poison as far as possible. If an operand of freeze follows three @@ -15403,9 +15652,33 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { N0->getNumValues() != 1 || !N0->hasOneUse()) return SDValue(); - bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR || - N0.getOpcode() == ISD::BUILD_PAIR || - N0.getOpcode() == ISD::CONCAT_VECTORS; + bool AllowMultipleMaybePoisonOperands = + N0.getOpcode() == ISD::SELECT_CC || + N0.getOpcode() == ISD::SETCC || + N0.getOpcode() == ISD::BUILD_VECTOR || + N0.getOpcode() == ISD::BUILD_PAIR || + N0.getOpcode() == ISD::VECTOR_SHUFFLE || + N0.getOpcode() == ISD::CONCAT_VECTORS; + + // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all + // ones" or "constant" into something that depends on FrozenUndef. We can + // instead pick undef values to keep those properties, while at the same time + // folding away the freeze. + // If we implement a more general solution for folding away freeze(undef) in + // the future, then this special handling can be removed. + if (N0.getOpcode() == ISD::BUILD_VECTOR) { + SDLoc DL(N0); + EVT VT = N0.getValueType(); + if (llvm::ISD::isBuildVectorAllOnes(N0.getNode())) + return DAG.getAllOnesConstant(DL, VT); + if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SmallVector<SDValue, 8> NewVecC; + for (const SDValue &Op : N0->op_values()) + NewVecC.push_back( + Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op); + return DAG.getBuildVector(VT, DL, NewVecC); + } + } SmallSetVector<SDValue, 8> MaybePoisonOperands; for (SDValue Op : N0->ops()) { @@ -15458,8 +15731,16 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (Op.getOpcode() == ISD::UNDEF) Op = DAG.getFreeze(Op); } - // NOTE: this strips poison generating flags. - SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops); + + SDValue R; + if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) { + // Special case handling for ShuffleVectorSDNode nodes. + R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1], + SVN->getMask()); + } else { + // NOTE: this strips poison generating flags. + R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops); + } assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) && "Can't create node that may be undef/poison!"); return R; @@ -15582,8 +15863,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); - bool CanReassociate = - Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || HasFMAD); // If the addition is not contractable, do not combine. @@ -15643,6 +15922,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) --> // fma A, B, (fma C, D, fma (E, F, G)). // This requires reassociation because it changes the order of operations. + bool CanReassociate = + Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); if (CanReassociate) { SDValue FMA, E; if (isFusedOp(N0) && N0.hasOneUse()) { @@ -16626,7 +16907,7 @@ SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) { // Perform actual transform. SDValue MantissaShiftCnt = - DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT)); + DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL); // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to // `(X << C1) + (C << C1)`, but that isn't always the case because of the // cast. We could implement that by handle here to handle the casts. @@ -16799,9 +17080,6 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { SelectionDAG::FlagInserter FlagsInserter(DAG, N); MatchContextClass matcher(DAG, TLI, N); - bool CanReassociate = - Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); - // Constant fold FMA. if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) && @@ -16844,6 +17122,8 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); + bool CanReassociate = + Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); if (CanReassociate) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) && @@ -17035,26 +17315,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SDValue V = combineRepeatedFPDivisors(N)) return V; - if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { - // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) { - // Compute the reciprocal 1.0 / c2. - const APFloat &N1APF = N1CFP->getValueAPF(); - APFloat Recip(N1APF.getSemantics(), 1); // 1.0 - APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is a legal fp immediate that - // isn't too nasty (eg NaN, denormal, ...). - if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty - (!LegalOperations || - // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM - // backend)... we should handle this gracefully after Legalize. - // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || - TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(Recip, VT, ForCodeSize))) - return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getConstantFP(Recip, DL, VT)); - } + // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or + // the loss is acceptable with AllowReciprocal. + if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) { + // Compute the reciprocal 1.0 / c2. + const APFloat &N1APF = N1CFP->getValueAPF(); + APFloat Recip = APFloat::getOne(N1APF.getSemantics()); + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if (((st == APFloat::opOK && !Recip.isDenormal()) || + (st == APFloat::opInexact && + (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) && + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT, ForCodeSize))) + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(Recip, DL, VT)); + } + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { @@ -17161,17 +17444,40 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { EVT VT = N->getValueType(0); SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); + SDLoc DL(N); if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; // fold (frem c1, c2) -> fmod(c1,c2) - if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1})) return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; + // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer + // power of 2. + if (!TLI.isOperationLegal(ISD::FREM, VT) && + TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && + TLI.isOperationLegalOrCustom(ISD::FDIV, VT) && + TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) && + DAG.isKnownToBeAPowerOfTwoFP(N1)) { + bool NeedsCopySign = + !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0); + SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1); + SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div); + SDValue MLA; + if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { + MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd), + N1, N0); + } else { + SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1); + MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul); + } + return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA; + } + return SDValue(); } @@ -17229,10 +17535,10 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (fcopysign c1, c2) -> fcopysign(c1,c2) - if (SDValue C = - DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1})) return C; if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) { @@ -17241,10 +17547,10 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) - return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + return DAG.getNode(ISD::FABS, DL, VT, N0); } else { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, + return DAG.getNode(ISD::FNEG, DL, VT, DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); } } @@ -17254,20 +17560,31 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(copysign(x,z), y) -> copysign(x, y) if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0.getOperand(0), N1); // copysign(x, abs(y)) -> abs(x) if (N1.getOpcode() == ISD::FABS) - return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + return DAG.getNode(ISD::FABS, DL, VT, N0); // copysign(x, copysign(y,z)) -> copysign(x, z) if (N1.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); + return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(1)); // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); + return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0)); + + // We only take the sign bit from the sign operand. + EVT SignVT = N1.getValueType(); + if (SimplifyDemandedBits(N1, + APInt::getSignMask(SignVT.getScalarSizeInBits()))) + return SDValue(N, 0); + + // We only take the non-sign bits from the value operand + if (SimplifyDemandedBits(N0, + APInt::getSignedMaxValue(VT.getScalarSizeInBits()))) + return SDValue(N, 0); return SDValue(); } @@ -18691,19 +19008,19 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // Truncate Value To Stored Memory Size. do { if (!getTruncatedStoreValue(ST, Val)) - continue; + break; if (!isTypeLegal(LDMemType)) - continue; + break; if (STMemType != LDMemType) { // TODO: Support vectors? This requires extract_subvector/bitcast. if (!STMemType.isVector() && !LDMemType.isVector() && STMemType.isInteger() && LDMemType.isInteger()) Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val); else - continue; + break; } if (!extendLoadedValueToExtension(LD, Val)) - continue; + break; return ReplaceLd(LD, Val, Chain); } while (false); @@ -19503,9 +19820,9 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) { SDLoc DL(IVal); - IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, - DAG.getConstant(ByteShift*8, DL, - DC->getShiftAmountTy(IVal.getValueType()))); + IVal = DAG.getNode( + ISD::SRL, DL, IVal.getValueType(), IVal, + DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL)); } // Figure out the offset for the store and the alignment of the access. @@ -20253,8 +20570,8 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( // * (Op 3) -> Represents the pre or post-indexing offset (or undef for // non-indexed stores). Not constant on all targets (e.g. ARM) // and so can participate in a cycle. - for (unsigned j = 0; j < N->getNumOperands(); ++j) - Worklist.push_back(N->getOperand(j).getNode()); + for (const SDValue &Op : N->op_values()) + Worklist.push_back(Op.getNode()); } // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) @@ -21051,6 +21368,24 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) { ST->getMemOperand()->getFlags()); } +SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) { + AtomicSDNode *ST = cast<AtomicSDNode>(N); + SDValue Val = ST->getVal(); + EVT VT = Val.getValueType(); + EVT MemVT = ST->getMemoryVT(); + + if (MemVT.bitsLT(VT)) { // Is truncating store + APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(), + MemVT.getScalarSizeInBits()); + // See if we can simplify the operation with SimplifyDemandedBits, which + // only works if the value has a single use. + if (SimplifyDemandedBits(Val, TruncDemandedBits)) + return SDValue(N, 0); + } + + return SDValue(); +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -21986,7 +22321,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, /// Transform a vector binary operation into a scalar binary operation by moving /// the math/logic after an extract element of a vector. static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, - bool LegalOperations) { + const SDLoc &DL, bool LegalOperations) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Vec = ExtElt->getOperand(0); SDValue Index = ExtElt->getOperand(1); @@ -22011,7 +22346,6 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) { // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) - SDLoc DL(ExtElt); EVT VT = ExtElt->getValueType(0); SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index); SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index); @@ -22227,12 +22561,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { IndexC->getAPIntValue().uge(VecVT.getVectorNumElements())) return DAG.getUNDEF(ScalarVT); - // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx - if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) { - return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, - VecOp.getOperand(0), Index)); - } - // extract_vector_elt (build_vector x, y), 1 -> y if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || VecOp.getOpcode() == ISD::SPLAT_VECTOR) && @@ -22256,7 +22584,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } - if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations)) + if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations)) return BO; if (VecVT.isScalableVector()) @@ -22492,17 +22820,16 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // -> extract_vector_elt b, 0 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3 // -> extract_vector_elt b, 1 - SDLoc SL(N); EVT ConcatVT = VecOp.getOperand(0).getValueType(); unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); - SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL, + SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL, Index.getValueType()); SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts); - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ConcatVT.getVectorElementType(), ConcatOp, NewIdx); - return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt); + return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt); } // Make sure we found a non-volatile load and the extractelement is @@ -23396,7 +23723,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // If X is a build_vector itself, the concat can become a larger build_vector. // TODO: Maybe this is useful for non-splat too? if (!LegalOperations) { - if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) { + SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue(); + // Only change build_vector to a concat_vector if the splat value type is + // same as the vector element type. + if (Splat && Splat.getValueType() == VT.getVectorElementType()) { Splat = peekThroughBitcasts(Splat); EVT SrcVT = Splat.getValueType(); if (SrcVT.isVector()) { @@ -23405,8 +23735,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SrcVT.getVectorElementType(), NumElts); if (!LegalTypes || TLI.isTypeLegal(NewVT)) { SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat); - SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), - NewVT, Ops); + SDValue Concat = + DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops); return DAG.getBitcast(VT, Concat); } } @@ -23479,9 +23809,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); EVT VT = N->getValueType(0); SmallVector<SDValue, 8> Ops; - EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); - SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); // Keep track of what we encounter. bool AnyInteger = false; @@ -23491,7 +23819,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { !Op.getOperand(0).getValueType().isVector()) Ops.push_back(Op.getOperand(0)); else if (ISD::UNDEF == Op.getOpcode()) - Ops.push_back(ScalarUndef); + Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT)); else return SDValue(); @@ -23511,13 +23839,12 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { // Replace UNDEFs by another scalar UNDEF node, of the final desired type. if (AnyFP) { SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); - ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); if (AnyInteger) { for (SDValue &Op : Ops) { if (Op.getValueType() == SVT) continue; if (Op.isUndef()) - Op = ScalarUndef; + Op = DAG.getNode(ISD::UNDEF, DL, SVT); else Op = DAG.getBitcast(SVT, Op); } @@ -23854,7 +24181,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } // concat_vectors(scalar, undef) -> scalar_to_vector(scalar) - if (!Scalar.getValueType().isVector()) { + if (!Scalar.getValueType().isVector() && In.hasOneUse()) { // If the bitcast type isn't legal, it might be a trunc of a legal type; // look through the trunc so we can still do the transform: // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) @@ -24230,7 +24557,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { // TODO: Use "BaseIndexOffset" to make this more effective. SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL); - uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize()); + LocationSize StoreSize = LocationSize::precise(VT.getStoreSize()); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO; if (Offset.isScalable()) { @@ -24404,6 +24731,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); uint64_t ExtIdx = N->getConstantOperandVal(1); + SDLoc DL(N); // Extract from UNDEF is UNDEF. if (V.isUndef()) @@ -24419,7 +24747,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), V.getConstantOperandVal(1)) && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) { - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0), + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0), V.getOperand(1)); } } @@ -24428,7 +24756,24 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (V.getOpcode() == ISD::SPLAT_VECTOR) if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse()) if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) - return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0)); + return DAG.getSplatVector(NVT, DL, V.getOperand(0)); + + // extract_subvector(insert_subvector(x,y,c1),c2) + // --> extract_subvector(y,c2-c1) + // iff we're just extracting from the inserted subvector. + if (V.getOpcode() == ISD::INSERT_SUBVECTOR) { + SDValue InsSub = V.getOperand(1); + EVT InsSubVT = InsSub.getValueType(); + unsigned NumInsElts = InsSubVT.getVectorMinNumElements(); + unsigned InsIdx = V.getConstantOperandVal(2); + unsigned NumSubElts = NVT.getVectorMinNumElements(); + if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) && + TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) && + InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() && + V.getValueType().isFixedLengthVector()) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub, + DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL)); + } // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') @@ -24442,10 +24787,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if ((SrcNumElts % DestNumElts) == 0) { unsigned SrcDestRatio = SrcNumElts / DestNumElts; ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio; - EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), - NewExtEC); + EVT NewExtVT = + EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC); if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { - SDLoc DL(N); SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, V.getOperand(0), NewIndex); @@ -24459,7 +24803,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio); EVT ScalarVT = SrcVT.getScalarType(); if ((ExtIdx % DestSrcRatio) == 0) { - SDLoc DL(N); unsigned IndexValScaled = ExtIdx / DestSrcRatio; EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC); @@ -24507,7 +24850,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // v2i8 extract_subvec v8i8 Y, 6 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { - SDLoc DL(N); unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?"); @@ -24546,13 +24888,13 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (NumElems == 1) { SDValue Src = V->getOperand(IdxVal); if (EltVT != Src.getValueType()) - Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Src); + Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src); return DAG.getBitcast(NVT, Src); } // Extract the pieces from the original build_vector. - SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), - V->ops().slice(IdxVal, NumElems)); + SDValue BuildVec = + DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems)); return DAG.getBitcast(NVT, BuildVec); } } @@ -24579,7 +24921,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { return DAG.getBitcast(NVT, V.getOperand(1)); } return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, + ISD::EXTRACT_SUBVECTOR, DL, NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), N->getOperand(1)); } @@ -26139,17 +26481,25 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return N1.getOperand(0); // TODO: To remove the zero check, need to adjust the offset to // a multiple of the new src type. - if (isNullConstant(N2) && - VT.isScalableVector() == SrcVT.isScalableVector()) { - if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements()) + if (isNullConstant(N2)) { + if (VT.knownBitsGE(SrcVT) && + !(VT.isFixedLengthVector() && SrcVT.isScalableVector())) return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0, N1.getOperand(0), N2); - else + else if (VT.knownBitsLE(SrcVT) && + !(VT.isScalableVector() && SrcVT.isFixedLengthVector())) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, N1.getOperand(0), N2); } } + // Handle case where we've ended up inserting back into the source vector + // we extracted the subvector from. + // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0 + if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 && + N1.getOperand(1) == N2) + return N0; + // Simplify scalar inserts into an undef vector: // insert_subvector undef, (splat X), N2 -> splat X if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR) @@ -26304,7 +26654,12 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { } } - return SDValue(); + // Sometimes constants manage to survive very late in the pipeline, e.g., + // because they are wrapped inside the <1 x f16> type. Try one last time to + // get rid of them. + SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), + N->getValueType(0), {N0}); + return Folded; } SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) { @@ -26413,6 +26768,12 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) { return visitVP_FSUB(N); case ISD::VP_FMA: return visitFMA<VPMatchContext>(N); + case ISD::VP_SELECT: + return visitVP_SELECT(N); + case ISD::VP_MUL: + return visitMUL<VPMatchContext>(N); + default: + break; } return SDValue(); } @@ -27070,12 +27431,11 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit // constant. - EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) { - SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); + SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL); SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); @@ -27095,7 +27455,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, if (TLI.shouldAvoidTransformToShift(XType, ShCt)) return SDValue(); - SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); + SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL); SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); @@ -27309,16 +27669,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, const APInt &AndMask = ConstAndRHS->getAPIntValue(); if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) { unsigned ShCt = AndMask.getBitWidth() - 1; - SDValue ShlAmt = - DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS), - getShiftAmountTy(AndLHS.getValueType())); + SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT, + SDLoc(AndLHS)); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is // either all-ones, or zero. - SDValue ShrAmt = - DAG.getConstant(ShCt, SDLoc(Shl), - getShiftAmountTy(Shl.getValueType())); + SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl)); SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); return DAG.getNode(ISD::AND, DL, VT, Shr, N3); @@ -27366,9 +27723,9 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, return SDValue(); // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(ShCt, SDLoc(Temp), - getShiftAmountTy(Temp.getValueType()))); + return DAG.getNode( + ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp))); } // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) @@ -27570,6 +27927,10 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, if (!VT.isVector()) return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT); // We need to create a build vector + if (Op.getOpcode() == ISD::SPLAT_VECTOR) + return DAG.getSplat(VT, DL, + DAG.getConstant(Pow2Constants.back().logBase2(), DL, + VT.getScalarType())); SmallVector<SDValue> Log2Ops; for (const APInt &Pow2 : Pow2Constants) Log2Ops.emplace_back( @@ -27861,7 +28222,7 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { bool IsAtomic; SDValue BasePtr; int64_t Offset; - std::optional<int64_t> NumBytes; + LocationSize NumBytes; MachineMemOperand *MMO; }; @@ -27869,32 +28230,29 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) { int64_t Offset = 0; if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset())) - Offset = (LSN->getAddressingMode() == ISD::PRE_INC) - ? C->getSExtValue() - : (LSN->getAddressingMode() == ISD::PRE_DEC) - ? -1 * C->getSExtValue() - : 0; - uint64_t Size = - MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize()); - return {LSN->isVolatile(), - LSN->isAtomic(), - LSN->getBasePtr(), - Offset /*base offset*/, - std::optional<int64_t>(Size), - LSN->getMemOperand()}; + Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue() + : (LSN->getAddressingMode() == ISD::PRE_DEC) + ? -1 * C->getSExtValue() + : 0; + TypeSize Size = LSN->getMemoryVT().getStoreSize(); + return {LSN->isVolatile(), LSN->isAtomic(), + LSN->getBasePtr(), Offset /*base offset*/, + LocationSize::precise(Size), LSN->getMemOperand()}; } if (const auto *LN = cast<LifetimeSDNode>(N)) return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, - (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize()) - : std::optional<int64_t>(), + (LN->hasOffset()) ? LocationSize::precise(LN->getSize()) + : LocationSize::beforeOrAfterPointer(), (MachineMemOperand *)nullptr}; // Default. return {false /*isvolatile*/, - /*isAtomic*/ false, SDValue(), - (int64_t)0 /*offset*/, std::optional<int64_t>() /*size*/, + /*isAtomic*/ false, + SDValue(), + (int64_t)0 /*offset*/, + LocationSize::beforeOrAfterPointer() /*size*/, (MachineMemOperand *)nullptr}; }; @@ -27921,6 +28279,13 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { return false; } + // If NumBytes is scalable and offset is not 0, conservatively return may + // alias + if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() && + MUC0.Offset != 0) || + (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() && + MUC1.Offset != 0)) + return true; // Try to prove that there is aliasing, or that there is no aliasing. Either // way, we can return now. If nothing can be proved, proceed with more tests. bool IsAlias; @@ -27949,18 +28314,24 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { int64_t SrcValOffset1 = MUC1.MMO->getOffset(); Align OrigAlignment0 = MUC0.MMO->getBaseAlign(); Align OrigAlignment1 = MUC1.MMO->getBaseAlign(); - auto &Size0 = MUC0.NumBytes; - auto &Size1 = MUC1.NumBytes; + LocationSize Size0 = MUC0.NumBytes; + LocationSize Size1 = MUC1.NumBytes; + if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - Size0.has_value() && Size1.has_value() && *Size0 == *Size1 && - OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && - SrcValOffset1 % *Size1 == 0) { + Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() && + !Size1.isScalable() && Size0 == Size1 && + OrigAlignment0 > Size0.getValue().getKnownMinValue() && + SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 && + SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); // There is no overlap between these relatively aligned accesses of // similar size. Return no alias. - if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0) + if ((OffAlign0 + static_cast<int64_t>( + Size0.getValue().getKnownMinValue())) <= OffAlign1 || + (OffAlign1 + static_cast<int64_t>( + Size1.getValue().getKnownMinValue())) <= OffAlign0) return false; } @@ -27973,16 +28344,25 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { UseAA = false; #endif - if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 && - Size1) { + if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && + Size0.hasValue() && Size1.hasValue() && + // Can't represent a scalable size + fixed offset in LocationSize + (!Size0.isScalable() || SrcValOffset0 == 0) && + (!Size1.isScalable() || SrcValOffset1 == 0)) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset; - int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset; + int64_t Overlap0 = + Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset; + int64_t Overlap1 = + Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset; + LocationSize Loc0 = + Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0); + LocationSize Loc1 = + Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1); if (AA->isNoAlias( - MemoryLocation(MUC0.MMO->getValue(), Overlap0, + MemoryLocation(MUC0.MMO->getValue(), Loc0, UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), - MemoryLocation(MUC1.MMO->getValue(), Overlap1, + MemoryLocation(MUC1.MMO->getValue(), Loc1, UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()))) return false; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index d213ea89de13..ef9f78335519 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -59,12 +59,12 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -752,17 +752,25 @@ FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee( } bool FastISel::selectPatchpoint(const CallInst *I) { - // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, - // i32 <numBytes>, - // i8* <target>, - // i32 <numArgs>, - // [Args...], - // [live variables...]) + // <ty> @llvm.experimental.patchpoint.<ty>(i64 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) CallingConv::ID CC = I->getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !I->getType()->isVoidTy(); Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts(); + // Check if we can lower the return type when using anyregcc. + MVT ValueType; + if (IsAnyRegCC && HasDef) { + ValueType = TLI.getSimpleValueType(DL, I->getType(), /*AllowUnknown=*/true); + if (ValueType == MVT::Other) + return false; + } + // Get the real number of arguments participating in the call <numArgs> assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && "Expected a constant integer."); @@ -790,7 +798,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) { // Add an explicit result reg if we use the anyreg calling convention. if (IsAnyRegCC && HasDef) { assert(CLI.NumResultRegs == 0 && "Unexpected result register."); - CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64)); + assert(ValueType.isValid()); + CLI.ResultReg = createResultReg(TLI.getRegClassFor(ValueType)); CLI.NumResultRegs = 1; Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true)); } @@ -1181,35 +1190,51 @@ bool FastISel::selectCall(const User *I) { } void FastISel::handleDbgInfo(const Instruction *II) { - if (!II->hasDbgValues()) + if (!II->hasDbgRecords()) return; // Clear any metadata. MIMD = MIMetadata(); // Reverse order of debug records, because fast-isel walks through backwards. - for (DPValue &DPV : llvm::reverse(II->getDbgValueRange())) { + for (DbgRecord &DR : llvm::reverse(II->getDbgRecordRange())) { flushLocalValueMap(); recomputeInsertPt(); + if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) { + assert(DLR->getLabel() && "Missing label"); + if (!FuncInfo.MF->getMMI().hasDebugInfo()) { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DLR << "\n"); + continue; + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DLR->getDebugLoc(), + TII.get(TargetOpcode::DBG_LABEL)) + .addMetadata(DLR->getLabel()); + continue; + } + + DbgVariableRecord &DVR = cast<DbgVariableRecord>(DR); + Value *V = nullptr; - if (!DPV.hasArgList()) - V = DPV.getVariableLocationOp(0); + if (!DVR.hasArgList()) + V = DVR.getVariableLocationOp(0); bool Res = false; - if (DPV.getType() == DPValue::LocationType::Value) { - Res = lowerDbgValue(V, DPV.getExpression(), DPV.getVariable(), - DPV.getDebugLoc()); + if (DVR.getType() == DbgVariableRecord::LocationType::Value || + DVR.getType() == DbgVariableRecord::LocationType::Assign) { + Res = lowerDbgValue(V, DVR.getExpression(), DVR.getVariable(), + DVR.getDebugLoc()); } else { - assert(DPV.getType() == DPValue::LocationType::Declare); - if (FuncInfo.PreprocessedDPVDeclares.contains(&DPV)) + assert(DVR.getType() == DbgVariableRecord::LocationType::Declare); + if (FuncInfo.PreprocessedDVRDeclares.contains(&DVR)) continue; - Res = lowerDbgDeclare(V, DPV.getExpression(), DPV.getVariable(), - DPV.getDebugLoc()); + Res = lowerDbgDeclare(V, DVR.getExpression(), DVR.getVariable(), + DVR.getDebugLoc()); } if (!Res) - LLVM_DEBUG(dbgs() << "Dropping debug-info for " << DPV << "\n";); + LLVM_DEBUG(dbgs() << "Dropping debug-info for " << DVR << "\n";); } } @@ -1393,6 +1418,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { return true; } + case Intrinsic::dbg_assign: + // A dbg.assign is a dbg.value with more information, typically produced + // during optimisation. If one reaches fastisel then something odd has + // happened (such as an optimised function being always-inlined into an + // optnone function). We will not be using the extra information in the + // dbg.assign in that case, just use its dbg.value fields. + [[fallthrough]]; case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst *DI = cast<DbgValueInst>(II); @@ -1429,6 +1461,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::is_constant: llvm_unreachable("llvm.is.constant.* should have been lowered already"); + case Intrinsic::allow_runtime_check: + case Intrinsic::allow_ubsan_check: { + Register ResultReg = getRegForValue(ConstantInt::getTrue(II->getType())); + if (!ResultReg) + return false; + updateValueMap(II, ResultReg); + return true; + } + case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::expect: { @@ -1441,7 +1482,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::experimental_stackmap: return selectStackmap(II); case Intrinsic::experimental_patchpoint_void: - case Intrinsic::experimental_patchpoint_i64: + case Intrinsic::experimental_patchpoint: return selectPatchpoint(II); case Intrinsic::xray_customevent: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 5926a6058111..8f5b05b662b3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -186,7 +186,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, Register SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector<TargetLowering::AsmOperandInfo> Ops = - TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, + TLI->ParseConstraints(Fn->getDataLayout(), TRI, *Call); for (TargetLowering::AsmOperandInfo &Op : Ops) { if (Op.Type == InlineAsm::isClobber) { @@ -214,6 +214,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (CI->isMustTailCall() && Fn->isVarArg()) MF->getFrameInfo().setHasMustTailInVarArgFunc(true); } + + // Determine if there is a call to setjmp in the machine function. + if (Call->hasFnAttr(Attribute::ReturnsTwice)) + MF->setExposesReturnsTwice(true); } // Mark values used outside their block as exported, by allocating @@ -222,8 +226,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I))) InitializeRegForValue(&I); - // Decide the preferred extend type for a value. - PreferredExtendType[&I] = getPreferredExtendForValue(&I); + // Decide the preferred extend type for a value. This iterates over all + // users and therefore isn't cheap, so don't do this at O0. + if (DAG->getOptLevel() != CodeGenOptLevel::None) + PreferredExtendType[&I] = getPreferredExtendForValue(&I); } } @@ -249,7 +255,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, "WinEHPrepare failed to remove PHIs from imaginary BBs"); continue; } - if (isa<FuncletPadInst>(PadInst)) + if (isa<FuncletPadInst>(PadInst) && + Personality != EHPersonality::Wasm_CXX) assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs"); } @@ -357,7 +364,7 @@ void FunctionLoweringInfo::clear() { StatepointRelocationMaps.clear(); PreferredExtendType.clear(); PreprocessedDbgDeclares.clear(); - PreprocessedDPVDeclares.clear(); + PreprocessedDVRDeclares.clear(); } /// CreateReg - Allocate a single virtual register for the given type. @@ -394,6 +401,16 @@ Register FunctionLoweringInfo::CreateRegs(const Value *V) { !TLI->requiresUniformRegister(*MF, V)); } +Register FunctionLoweringInfo::InitializeRegForValue(const Value *V) { + // Tokens live in vregs only when used for convergence control. + if (V->getType()->isTokenTy() && !isa<ConvergenceControlInst>(V)) + return 0; + Register &R = ValueMap[V]; + assert(R == Register() && "Already initialized this value register!"); + assert(VirtReg2Value.empty()); + return R = CreateRegs(V); +} + /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If /// the register's LiveOutInfo is for a smaller bit width, it is extended to @@ -431,7 +448,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1) return; - IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); + IntVT = TLI->getRegisterType(PN->getContext(), IntVT); unsigned BitWidth = IntVT.getSizeInBits(); auto It = ValueMap.find(PN); @@ -553,7 +570,7 @@ FunctionLoweringInfo::getValueFromVirtualReg(Register Vreg) { SmallVector<EVT, 4> ValueVTs; for (auto &P : ValueMap) { ValueVTs.clear(); - ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(), + ComputeValueVTs(*TLI, Fn->getDataLayout(), P.first->getType(), ValueVTs); unsigned Reg = P.second; for (EVT VT : ValueVTs) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 032cff416cda..4ce92e156cf8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -285,6 +285,30 @@ Register InstrEmitter::getVR(SDValue Op, return I->second; } +static bool isConvergenceCtrlMachineOp(SDValue Op) { + if (Op->isMachineOpcode()) { + switch (Op->getMachineOpcode()) { + case TargetOpcode::CONVERGENCECTRL_ANCHOR: + case TargetOpcode::CONVERGENCECTRL_ENTRY: + case TargetOpcode::CONVERGENCECTRL_LOOP: + case TargetOpcode::CONVERGENCECTRL_GLUE: + return true; + } + return false; + } + + // We can reach here when CopyFromReg is encountered. But rather than making a + // special case for that, we just make sure we don't reach here in some + // surprising way. + switch (Op->getOpcode()) { + case ISD::CONVERGENCECTRL_ANCHOR: + case ISD::CONVERGENCECTRL_ENTRY: + case ISD::CONVERGENCECTRL_LOOP: + case ISD::CONVERGENCECTRL_GLUE: + llvm_unreachable("Convergence control should have been selected by now."); + } + return false; +} /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is @@ -346,9 +370,12 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, // multiple uses. // Tied operands are never killed, so we need to check that. And that // means we need to determine the index of the operand. - bool isKill = Op.hasOneUse() && - Op.getNode()->getOpcode() != ISD::CopyFromReg && - !IsDebug && + // Don't kill convergence control tokens. Initially they are only used in glue + // nodes, and the InstrEmitter later adds implicit uses on the users of the + // glue node. This can sometimes make it seem like there is only one use, + // which is the glue node itself. + bool isKill = Op.hasOneUse() && !isConvergenceCtrlMachineOp(Op) && + Op.getNode()->getOpcode() != ISD::CopyFromReg && !IsDebug && !(IsClone || IsCloned); if (isKill) { unsigned Idx = MIB->getNumOperands(); @@ -1155,8 +1182,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, append_range(UsedRegs, MCID.implicit_uses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. - for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) - if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + for (const SDValue &Op : F->op_values()) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { Register Reg = R->getReg(); if (Reg.isPhysical()) UsedRegs.push_back(Reg); @@ -1191,6 +1218,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, } } + if (SDNode *GluedNode = Node->getGluedNode()) { + // FIXME: Possibly iterate over multiple glue nodes? + if (GluedNode->getOpcode() == + ~(unsigned)TargetOpcode::CONVERGENCECTRL_GLUE) { + Register VReg = getVR(GluedNode->getOperand(0), VRBaseMap); + MachineOperand MO = MachineOperand::CreateReg(VReg, /*isDef=*/false, + /*isImp=*/true); + MIB->addOperand(MO); + } + } + // Run post-isel target hook to adjust this instruction if needed. if (II.hasPostISelHook()) TLI->AdjustInstrPostInstrSelection(*MIB, Node); @@ -1374,6 +1412,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } } + // Add rounding control registers as implicit def for inline asm. + if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) { + ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters(); + for (MCPhysReg Reg : RCRegs) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + // GCC inline assembly allows input operands to also be early-clobber // output operands (so long as the operand is written only after it's // used), but this does not match the semantics of our early-clobber flag. @@ -1382,7 +1427,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, for (unsigned Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { MachineOperand *MO = - MIB->findRegisterDefOperand(Reg, false, false, TRI); + MIB->findRegisterDefOperand(Reg, TRI, false, false); assert(MO && "No def operand for clobbered register?"); MO->setIsEarlyClobber(false); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index adfeea073bff..7f5b46af01c6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -21,17 +21,18 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -117,14 +118,7 @@ private: void LegalizeLoadOps(SDNode *Node); void LegalizeStoreOps(SDNode *Node); - /// Some targets cannot handle a variable - /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it - /// is necessary to spill the vector being inserted into to memory, perform - /// the insert there, and then read the result back. - SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, - const SDLoc &dl); - SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, - const SDLoc &dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Op); /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type @@ -186,6 +180,13 @@ private: SmallVectorImpl<SDValue> &Results); SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl); + /// Implements vector reduce operation promotion. + /// + /// All vector operands are promoted to a vector type with larger element + /// type, and the start value is promoted to a larger scalar type. Then the + /// result is truncated back to the original scalar type. + SDValue PromoteReduction(SDNode *Node); + SDValue ExpandPARITY(SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); @@ -258,6 +259,21 @@ public: } // end anonymous namespace +// Helper function that generates an MMO that considers the alignment of the +// stack, and the size of the stack object +static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr, + MachineFunction &MF, + bool isObjectScalable) { + auto &MFI = MF.getFrameInfo(); + int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); + LocationSize ObjectSize = isObjectScalable + ? LocationSize::beforeOrAfterPointer() + : LocationSize::precise(MFI.getObjectSize(FI)); + return MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + ObjectSize, MFI.getObjectAlign(FI)); +} + /// Return a vector shuffle operation which /// performs the same shuffle in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. @@ -362,49 +378,12 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { return Result; } -/// Some target cannot handle a variable insertion index for the -/// INSERT_VECTOR_ELT instruction. In this case, it -/// is necessary to spill the vector being inserted into to memory, perform -/// the insert there, and then read the result back. -SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, - SDValue Val, - SDValue Idx, - const SDLoc &dl) { - SDValue Tmp1 = Vec; - SDValue Tmp2 = Val; - SDValue Tmp3 = Idx; - - // If the target doesn't support this, we have to spill the input vector - // to a temporary stack slot, update the element, then reload it. This is - // badness. We could also load the value into a vector register (either - // with a "move to register" or "extload into register" instruction, then - // permute it into place, if the idx is a constant and if the idx is - // supported by the target. - EVT VT = Tmp1.getValueType(); - EVT EltVT = VT.getVectorElementType(); - SDValue StackPtr = DAG.CreateStackTemporary(VT); - - int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - - // Store the vector. - SDValue Ch = DAG.getStore( - DAG.getEntryNode(), dl, Tmp1, StackPtr, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); - - SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3); - - // Store the scalar value. - Ch = DAG.getTruncStore( - Ch, dl, Tmp2, StackPtr2, - MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); - // Load the updated vector. - return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), SPFI)); -} +SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Op) { + SDValue Vec = Op.getOperand(0); + SDValue Val = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + SDLoc dl(Op); -SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, - SDValue Idx, - const SDLoc &dl) { if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for @@ -426,7 +405,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps); } } - return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); + return ExpandInsertToVectorThroughStack(Op); } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { @@ -1033,6 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOperand(0).getValueType()); break; case ISD::STRICT_FP_TO_FP16: + case ISD::STRICT_FP_TO_BF16: case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: case ISD::STRICT_LRINT: @@ -1125,9 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::CLEAR_CACHE: + // This operation is typically going to be LibCall unless the target wants + // something differrent. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + break; case ISD::READCYCLECOUNTER: - // READCYCLECOUNTER returns an i64, even if type legalization might have - // expanded that to several smaller types. + case ISD::READSTEADYCOUNTER: + // READCYCLECOUNTER and READSTEADYCOUNTER return a i64, even if type + // legalization might have expanded that to several smaller types. Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64); break; case ISD::READ_REGISTER: @@ -1167,6 +1153,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::USUBSAT: case ISD::SSHLSAT: case ISD::USHLSAT: + case ISD::SCMP: + case ISD::UCMP: case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -1241,11 +1229,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::VP_REDUCE_UMIN: case ISD::VP_REDUCE_FMAX: case ISD::VP_REDUCE_FMIN: + case ISD::VP_REDUCE_FMAXIMUM: + case ISD::VP_REDUCE_FMINIMUM: case ISD::VP_REDUCE_SEQ_FADD: case ISD::VP_REDUCE_SEQ_FMUL: Action = TLI.getOperationAction( Node->getOpcode(), Node->getOperand(1).getValueType()); break; + case ISD::VP_CTTZ_ELTS: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TLI.getCustomOperationAction(*Node); @@ -1426,8 +1421,9 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { if (!Ch.getNode()) { // Store the value to a temporary stack slot, then LOAD the returned part. StackPtr = DAG.CreateStackTemporary(VecVT); - Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo()); + MachineMemOperand *StoreMMO = getStackAlignedMMO( + StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector()); + Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, StoreMMO); } SDValue NewLoad; @@ -1471,7 +1467,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // Store the value to a temporary stack slot, then LOAD the returned part. EVT VecVT = Vec.getValueType(); - EVT SubVecVT = Part.getValueType(); + EVT PartVT = Part.getValueType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = @@ -1480,14 +1476,28 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // First store the whole vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + // Freeze the index so we don't poison the clamping code we're about to emit. + Idx = DAG.getFreeze(Idx); + // Then store the inserted part. - SDValue SubStackPtr = - TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx); + if (PartVT.isVector()) { + SDValue SubStackPtr = + TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, PartVT, Idx); + + // Store the subvector. + Ch = DAG.getStore( + Ch, dl, Part, SubStackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + } else { + SDValue SubStackPtr = + TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - // Store the subvector. - Ch = DAG.getStore( - Ch, dl, Part, SubStackPtr, - MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + // Store the scalar value. + Ch = DAG.getTruncStore( + Ch, dl, Part, SubStackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), + VecVT.getVectorElementType()); + } // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); @@ -1671,8 +1681,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit); } + SDNodeFlags Flags; + Flags.setDisjoint(true); + // Store the part with the modified sign and convert back to float. - SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit); + SDValue CopiedSign = + DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit, Flags); + return modifySignAsInt(MagAsInt, DL, CopiedSign); } @@ -2047,8 +2062,15 @@ SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) { std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TargetLowering::ArgListTy &&Args, bool isSigned) { - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); + EVT CodePtrTy = TLI.getPointerTy(DAG.getDataLayout()); + SDValue Callee; + if (const char *LibcallName = TLI.getLibcallName(LC)) + Callee = DAG.getExternalSymbol(LibcallName, CodePtrTy); + else { + Callee = DAG.getUNDEF(CodePtrTy); + DAG.getContext()->emitError(Twine("no libcall available for ") + + Node->getOperationName(&DAG)); + } EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); @@ -2990,6 +3012,44 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT)); } +SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { + MVT VecVT = Node->getOperand(1).getSimpleValueType(); + MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); + MVT ScalarVT = Node->getSimpleValueType(0); + MVT NewScalarVT = NewVecVT.getVectorElementType(); + + SDLoc DL(Node); + SmallVector<SDValue, 4> Operands(Node->getNumOperands()); + + // promote the initial value. + // FIXME: Support integer. + assert(Node->getOperand(0).getValueType().isFloatingPoint() && + "Only FP promotion is supported"); + Operands[0] = + DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0)); + + for (unsigned j = 1; j != Node->getNumOperands(); ++j) + if (Node->getOperand(j).getValueType().isVector() && + !(ISD::isVPOpcode(Node->getOpcode()) && + ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand. + // promote the vector operand. + // FIXME: Support integer. + assert(Node->getOperand(j).getValueType().isFloatingPoint() && + "Only FP promotion is supported"); + Operands[j] = + DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); + } else { + Operands[j] = Node->getOperand(j); // Skip VL operand. + } + + SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands, + Node->getFlags()); + + assert(ScalarVT.isFloatingPoint() && "Only FP promotion is supported"); + return DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); +} + bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to expand node\n"); SmallVector<SDValue, 8> Results; @@ -3006,6 +3066,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if ((Tmp1 = TLI.expandABD(Node, DAG))) Results.push_back(Tmp1); break; + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + if ((Tmp1 = TLI.expandAVG(Node, DAG))) + Results.push_back(Tmp1); + break; case ISD::CTPOP: if ((Tmp1 = TLI.expandCTPOP(Node, DAG))) Results.push_back(Tmp1); @@ -3065,6 +3132,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Node->getOperand(0)); break; case ISD::READCYCLECOUNTER: + case ISD::READSTEADYCOUNTER: // If the target didn't expand this, just return 'zero' and preserve the // chain. Results.append(Node->getNumValues() - 1, @@ -3200,14 +3268,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; case ISD::FP_ROUND: { - EVT VT = Node->getValueType(0); - if (VT.getScalarType() == MVT::bf16) { - Results.push_back( - DAG.getNode(ISD::FP_TO_BF16, SDLoc(Node), VT, Node->getOperand(0))); + if ((Tmp1 = TLI.expandFP_ROUND(Node, DAG))) { + Results.push_back(Tmp1); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; } case ISD::BITCAST: if ((Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), @@ -3276,6 +3342,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Op.getValueType() != MVT::f32) Op = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); + // Certain SNaNs will turn into infinities if we do a simple shift right. + if (!DAG.isKnownNeverSNaN(Op)) { + Op = DAG.getNode(ISD::FCANONICALIZE, dl, MVT::f32, Op, Node->getFlags()); + } Op = DAG.getNode( ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op), DAG.getConstant(16, dl, @@ -3398,9 +3468,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); break; case ISD::INSERT_VECTOR_ELT: - Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), - Node->getOperand(1), - Node->getOperand(2), dl)); + Results.push_back(ExpandINSERT_VECTOR_ELT(SDValue(Node, 0))); break; case ISD::VECTOR_SHUFFLE: { SmallVector<int, 32> NewMask; @@ -3566,6 +3634,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Expanded); break; } + case ISD::FMINIMUM: + case ISD::FMAXIMUM: { + if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) + Results.push_back(Expanded); + break; + } case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); @@ -3626,14 +3700,14 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); } break; + case ISD::STRICT_BF16_TO_FP: case ISD::STRICT_FP16_TO_FP: if (Node->getValueType(0) != MVT::f32) { // We can extend to types bigger than f32 in two steps without changing // the result. Since "f16 -> f32" is much more commonly available, give // CodeGen the option of emitting that before resorting to a libcall. - SDValue Res = - DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other}, - {Node->getOperand(0), Node->getOperand(1)}); + SDValue Res = DAG.getNode(Node->getOpcode(), dl, {MVT::f32, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {Node->getValueType(0), MVT::Other}, {Res.getValue(1), Res}); @@ -3818,6 +3892,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::USUBSAT: Results.push_back(TLI.expandAddSubSat(Node, DAG)); break; + case ISD::SCMP: + case ISD::UCMP: + Results.push_back(TLI.expandCMP(Node, DAG)); + break; case ISD::SSHLSAT: case ISD::USHLSAT: Results.push_back(TLI.expandShlSat(Node, DAG)); @@ -4104,7 +4182,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { "expanded."); EVT CCVT = getSetCCResultType(CmpVT); SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags()); - Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); + Results.push_back( + DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4, Node->getFlags())); break; } @@ -4238,6 +4317,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::VECREDUCE_FMINIMUM: Results.push_back(TLI.expandVecReduce(Node, DAG)); break; + case ISD::VP_CTTZ_ELTS: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + Results.push_back(TLI.expandVPCTTZElements(Node, DAG)); + break; + case ISD::CLEAR_CACHE: + // The default expansion of llvm.clear_cache is simply a no-op for those + // targets where it is not needed. + Results.push_back(Node->getOperand(0)); + break; case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -4395,6 +4483,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Results.push_back(CallResult.second); break; } + case ISD::CLEAR_CACHE: { + TargetLowering::MakeLibCallOptions CallOptions; + SDValue InputChain = Node->getOperand(0); + SDValue StartVal = Node->getOperand(1); + SDValue EndVal = Node->getOperand(2); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall( + DAG, RTLIB::CLEAR_CACHE, MVT::isVoid, {StartVal, EndVal}, CallOptions, + SDLoc(Node), InputChain); + Results.push_back(Tmp.second); + break; + } case ISD::FMINNUM: case ISD::STRICT_FMINNUM: ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, @@ -4433,6 +4532,41 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128, Results); break; + case ISD::FTAN: + case ISD::STRICT_FTAN: + ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, RTLIB::TAN_F80, + RTLIB::TAN_F128, RTLIB::TAN_PPCF128, Results); + break; + case ISD::FASIN: + case ISD::STRICT_FASIN: + ExpandFPLibCall(Node, RTLIB::ASIN_F32, RTLIB::ASIN_F64, RTLIB::ASIN_F80, + RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128, Results); + break; + case ISD::FACOS: + case ISD::STRICT_FACOS: + ExpandFPLibCall(Node, RTLIB::ACOS_F32, RTLIB::ACOS_F64, RTLIB::ACOS_F80, + RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128, Results); + break; + case ISD::FATAN: + case ISD::STRICT_FATAN: + ExpandFPLibCall(Node, RTLIB::ATAN_F32, RTLIB::ATAN_F64, RTLIB::ATAN_F80, + RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128, Results); + break; + case ISD::FSINH: + case ISD::STRICT_FSINH: + ExpandFPLibCall(Node, RTLIB::SINH_F32, RTLIB::SINH_F64, RTLIB::SINH_F80, + RTLIB::SINH_F128, RTLIB::SINH_PPCF128, Results); + break; + case ISD::FCOSH: + case ISD::STRICT_FCOSH: + ExpandFPLibCall(Node, RTLIB::COSH_F32, RTLIB::COSH_F64, RTLIB::COSH_F80, + RTLIB::COSH_F128, RTLIB::COSH_PPCF128, Results); + break; + case ISD::FTANH: + case ISD::STRICT_FTANH: + ExpandFPLibCall(Node, RTLIB::TANH_F32, RTLIB::TANH_F64, RTLIB::TANH_F80, + RTLIB::TANH_F128, RTLIB::TANH_PPCF128, Results); + break; case ISD::FSINCOS: // Expand into sincos libcall. ExpandSinCosLibCall(Node, Results); @@ -4632,6 +4766,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first); } break; + case ISD::STRICT_BF16_TO_FP: + if (Node->getValueType(0) == MVT::f32) { + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall( + DAG, RTLIB::FPEXT_BF16_F32, MVT::f32, Node->getOperand(1), + CallOptions, SDLoc(Node), Node->getOperand(0)); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + } + break; case ISD::STRICT_FP16_TO_FP: { if (Node->getValueType(0) == MVT::f32) { TargetLowering::MakeLibCallOptions CallOptions; @@ -4773,12 +4917,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; } case ISD::STRICT_FP_EXTEND: - case ISD::STRICT_FP_TO_FP16: { - RTLIB::Libcall LC = - Node->getOpcode() == ISD::STRICT_FP_TO_FP16 - ? RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16) - : RTLIB::getFPEXT(Node->getOperand(1).getValueType(), - Node->getValueType(0)); + case ISD::STRICT_FP_TO_FP16: + case ISD::STRICT_FP_TO_BF16: { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (Node->getOpcode() == ISD::STRICT_FP_TO_FP16) + LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16); + else if (Node->getOpcode() == ISD::STRICT_FP_TO_BF16) + LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::bf16); + else + LC = RTLIB::getFPEXT(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4941,10 +5090,18 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { OVT = Node->getOperand(0).getSimpleValueType(); } - if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP || + if (Node->getOpcode() == ISD::ATOMIC_STORE || + Node->getOpcode() == ISD::STRICT_UINT_TO_FP || Node->getOpcode() == ISD::STRICT_SINT_TO_FP || Node->getOpcode() == ISD::STRICT_FSETCC || - Node->getOpcode() == ISD::STRICT_FSETCCS) + Node->getOpcode() == ISD::STRICT_FSETCCS || + Node->getOpcode() == ISD::VP_REDUCE_FADD || + Node->getOpcode() == ISD::VP_REDUCE_FMUL || + Node->getOpcode() == ISD::VP_REDUCE_FMAX || + Node->getOpcode() == ISD::VP_REDUCE_FMIN || + Node->getOpcode() == ISD::VP_REDUCE_FMAXIMUM || + Node->getOpcode() == ISD::VP_REDUCE_FMINIMUM || + Node->getOpcode() == ISD::VP_REDUCE_SEQ_FADD) OVT = Node->getOperand(1).getSimpleValueType(); if (Node->getOpcode() == ISD::BR_CC || Node->getOpcode() == ISD::SELECT_CC) @@ -4956,8 +5113,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: - case ISD::CTPOP: + case ISD::CTPOP: { // Zero extend the argument unless its cttz, then use any_extend. if (Node->getOpcode() == ISD::CTTZ || Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF) @@ -4965,7 +5121,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { else Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); - if (Node->getOpcode() == ISD::CTTZ) { + unsigned NewOpc = Node->getOpcode(); + if (NewOpc == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. @@ -4973,12 +5130,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { OVT.getSizeInBits()); Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1, DAG.getConstant(TopBit, dl, NVT)); + NewOpc = ISD::CTTZ_ZERO_UNDEF; } // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is // already the correct result. - Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); - if (Node->getOpcode() == ISD::CTLZ || - Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { + Tmp1 = DAG.getNode(NewOpc, dl, NVT, Tmp1); + if (NewOpc == ISD::CTLZ) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - @@ -4986,6 +5143,26 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; + } + case ISD::CTLZ_ZERO_UNDEF: { + // We know that the argument is unlikely to be zero, hence we can take a + // different approach as compared to ISD::CTLZ + + // Any Extend the argument + auto AnyExtendedNode = + DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0)); + + // Tmp1 = Tmp1 << (sizeinbits(NVT) - sizeinbits(Old VT)) + auto ShiftConstant = DAG.getShiftAmountConstant( + NVT.getSizeInBits() - OVT.getSizeInBits(), NVT, dl); + auto LeftShiftResult = + DAG.getNode(ISD::SHL, dl, NVT, AnyExtendedNode, ShiftConstant); + + // Perform the larger operation + auto CTLZResult = DAG.getNode(Node->getOpcode(), dl, NVT, LeftShiftResult); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, CTLZResult)); + break; + } case ISD::BITREVERSE: case ISD::BSWAP: { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); @@ -5362,6 +5539,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: + case ISD::FTAN: + case ISD::FASIN: + case ISD::FACOS: + case ISD::FATAN: + case ISD::FSINH: + case ISD::FCOSH: + case ISD::FTANH: case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: @@ -5386,6 +5570,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::STRICT_FSQRT: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: + case ISD::STRICT_FTAN: + case ISD::STRICT_FASIN: + case ISD::STRICT_FACOS: + case ISD::STRICT_FATAN: + case ISD::STRICT_FSINH: + case ISD::STRICT_FCOSH: + case ISD::STRICT_FTANH: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG2: case ISD::STRICT_FLOG10: @@ -5417,10 +5608,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); SmallVector<SDValue, 8> NewOps; - for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) { - SDValue Op = Node->getOperand(I); + for (const SDValue &Op : Node->op_values()) NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op)); - } SDLoc SL(Node); SDValue Concat = @@ -5552,7 +5741,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(CvtVec); break; } - case ISD::ATOMIC_SWAP: { + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_STORE: { AtomicSDNode *AM = cast<AtomicSDNode>(Node); SDLoc SL(Node); SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal()); @@ -5561,11 +5751,35 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() && "unexpected atomic_swap with illegal type"); - SDValue NewAtomic - = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT, - DAG.getVTList(NVT, MVT::Other), - { AM->getChain(), AM->getBasePtr(), CastVal }, - AM->getMemOperand()); + SDValue Op0 = AM->getBasePtr(); + SDValue Op1 = CastVal; + + // ATOMIC_STORE uses a swapped operand order from every other AtomicSDNode, + // but really it should merge with ISD::STORE. + if (AM->getOpcode() == ISD::ATOMIC_STORE) + std::swap(Op0, Op1); + + SDValue NewAtomic = DAG.getAtomic(AM->getOpcode(), SL, NVT, AM->getChain(), + Op0, Op1, AM->getMemOperand()); + + if (AM->getOpcode() != ISD::ATOMIC_STORE) { + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic)); + Results.push_back(NewAtomic.getValue(1)); + } else + Results.push_back(NewAtomic); + break; + } + case ISD::ATOMIC_LOAD: { + AtomicSDNode *AM = cast<AtomicSDNode>(Node); + SDLoc SL(Node); + assert(NVT.getSizeInBits() == OVT.getSizeInBits() && + "unexpected promotion type"); + assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() && + "unexpected atomic_load with illegal type"); + + SDValue NewAtomic = + DAG.getAtomic(ISD::ATOMIC_LOAD, SL, NVT, DAG.getVTList(NVT, MVT::Other), + {AM->getChain(), AM->getBasePtr()}, AM->getMemOperand()); Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic)); Results.push_back(NewAtomic.getValue(1)); break; @@ -5587,6 +5801,15 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; } + case ISD::VP_REDUCE_FADD: + case ISD::VP_REDUCE_FMUL: + case ISD::VP_REDUCE_FMAX: + case ISD::VP_REDUCE_FMIN: + case ISD::VP_REDUCE_FMAXIMUM: + case ISD::VP_REDUCE_FMINIMUM: + case ISD::VP_REDUCE_SEQ_FADD: + Results.push_back(PromoteReduction(Node)); + break; } // Replace the original node with the legalized result. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index f0a04589fbfd..41fcc9afe4e9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -53,6 +53,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { SDValue R = SDValue(); switch (N->getOpcode()) { + // clang-format off default: #ifndef NDEBUG dbgs() << "SoftenFloatResult #" << ResNo << ": "; @@ -60,7 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif report_fatal_error("Do not know how to soften the result of this " "operator!"); - + case ISD::EXTRACT_ELEMENT: R = SoftenFloatRes_EXTRACT_ELEMENT(N); break; case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; @@ -75,12 +76,20 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::STRICT_FADD: case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::STRICT_FACOS: + case ISD::FACOS: R = SoftenFloatRes_FACOS(N); break; + case ISD::STRICT_FASIN: + case ISD::FASIN: R = SoftenFloatRes_FASIN(N); break; + case ISD::STRICT_FATAN: + case ISD::FATAN: R = SoftenFloatRes_FATAN(N); break; case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; case ISD::STRICT_FCOS: case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::STRICT_FCOSH: + case ISD::FCOSH: R = SoftenFloatRes_FCOSH(N); break; case ISD::STRICT_FDIV: case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::STRICT_FEXP: @@ -115,9 +124,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: case ISD::FLDEXP: case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break; - case ISD::FFREXP: - R = SoftenFloatRes_FFREXP(N); - break; + case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break; case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::STRICT_FRINT: @@ -128,13 +135,20 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break; case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::STRICT_FSINH: + case ISD::FSINH: R = SoftenFloatRes_FSINH(N); break; case ISD::STRICT_FSQRT: case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::STRICT_FSUB: case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::STRICT_FTAN: + case ISD::FTAN: R = SoftenFloatRes_FTAN(N); break; + case ISD::STRICT_FTANH: + case ISD::FTANH: R = SoftenFloatRes_FTANH(N); break; case ISD::STRICT_FTRUNC: case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; + case ISD::ATOMIC_LOAD: R = SoftenFloatRes_ATOMIC_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; @@ -150,14 +164,11 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMAXIMUM: - case ISD::VECREDUCE_FMINIMUM: - R = SoftenFloatRes_VECREDUCE(N); - break; + case ISD::VECREDUCE_FMINIMUM: R = SoftenFloatRes_VECREDUCE(N); break; case ISD::VECREDUCE_SEQ_FADD: - case ISD::VECREDUCE_SEQ_FMUL: - R = SoftenFloatRes_VECREDUCE_SEQ(N); - break; - } + case ISD::VECREDUCE_SEQ_FMUL: R = SoftenFloatRes_VECREDUCE_SEQ(N); break; + // clang-format on + } // If R is null, the sub-method took care of registering the result. if (R.getNode()) { @@ -262,6 +273,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) { } } +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_ELEMENT(SDNode *N) { + SDValue Src = N->getOperand(0); + assert(Src.getValueType() == MVT::ppcf128 && + "In floats only ppcf128 can be extracted by element!"); + return DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(N), + N->getValueType(0).changeTypeToInteger(), + DAG.getBitcast(MVT::i128, Src), N->getOperand(1)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), @@ -312,6 +332,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FACOS(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::ACOS_F32, RTLIB::ACOS_F64, + RTLIB::ACOS_F80, RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FASIN(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::ASIN_F32, RTLIB::ASIN_F64, + RTLIB::ASIN_F80, RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::ATAN_F32, RTLIB::ATAN_F64, + RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -387,6 +425,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOSH(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::COSH_F32, RTLIB::COSH_F64, + RTLIB::COSH_F80, RTLIB::COSH_F128, RTLIB::COSH_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, @@ -750,6 +794,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FSINH(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::SINH_F32, RTLIB::SINH_F64, + RTLIB::SINH_F80, RTLIB::SINH_F128, RTLIB::SINH_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, @@ -768,6 +818,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32, RTLIB::TAN_F64, + RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTANH(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::TANH_F32, RTLIB::TANH_F64, + RTLIB::TANH_F80, RTLIB::TANH_F128, RTLIB::TANH_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, @@ -810,6 +872,26 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { return BitConvertToInteger(ExtendNode); } +SDValue DAGTypeLegalizer::SoftenFloatRes_ATOMIC_LOAD(SDNode *N) { + AtomicSDNode *L = cast<AtomicSDNode>(N); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + if (L->getExtensionType() == ISD::NON_EXTLOAD) { + SDValue NewL = + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, NVT, DAG.getVTList(NVT, MVT::Other), + {L->getChain(), L->getBasePtr()}, L->getMemOperand()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; + } + + report_fatal_error("softening fp extending atomic load not handled"); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); @@ -918,6 +1000,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FP_TO_FP16: case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_TO_BF16: + case ISD::STRICT_FP_TO_BF16: case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::STRICT_FP_TO_SINT: @@ -940,6 +1023,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::ATOMIC_STORE: + Res = SoftenFloatOp_ATOMIC_STORE(N, OpNo); + break; case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; } @@ -970,6 +1056,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || N->getOpcode() == ISD::STRICT_FP_TO_FP16 || N->getOpcode() == ISD::FP_TO_BF16 || + N->getOpcode() == ISD::STRICT_FP_TO_BF16 || N->getOpcode() == ISD::STRICT_FP_ROUND); bool IsStrict = N->isStrictFPOpcode(); @@ -980,7 +1067,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { if (N->getOpcode() == ISD::FP_TO_FP16 || N->getOpcode() == ISD::STRICT_FP_TO_FP16) FloatRVT = MVT::f16; - else if (N->getOpcode() == ISD::FP_TO_BF16) + else if (N->getOpcode() == ISD::FP_TO_BF16 || + N->getOpcode() == ISD::STRICT_FP_TO_BF16) FloatRVT = MVT::bf16; RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); @@ -1164,6 +1252,20 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } +SDValue DAGTypeLegalizer::SoftenFloatOp_ATOMIC_STORE(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Can only soften the stored value!"); + AtomicSDNode *ST = cast<AtomicSDNode>(N); + SDValue Val = ST->getVal(); + EVT VT = Val.getValueType(); + SDLoc dl(N); + + assert(ST->getMemoryVT() == VT && "truncating atomic store not handled"); + + SDValue NewVal = GetSoftenedFloat(Val); + return DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, ST->getChain(), NewVal, + ST->getBasePtr(), ST->getMemOperand()); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = BitConvertToInteger(N->getOperand(1)); @@ -1284,7 +1386,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { #endif report_fatal_error("Do not know how to expand the result of this " "operator!"); - + // clang-format off case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; @@ -1304,12 +1406,20 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; case ISD::STRICT_FADD: case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::STRICT_FACOS: + case ISD::FACOS: ExpandFloatRes_FACOS(N, Lo, Hi); break; + case ISD::STRICT_FASIN: + case ISD::FASIN: ExpandFloatRes_FASIN(N, Lo, Hi); break; + case ISD::STRICT_FATAN: + case ISD::FATAN: ExpandFloatRes_FATAN(N, Lo, Hi); break; case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::STRICT_FCOS: case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::STRICT_FCOSH: + case ISD::FCOSH: ExpandFloatRes_FCOSH(N, Lo, Hi); break; case ISD::STRICT_FDIV: case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; case ISD::STRICT_FEXP: @@ -1349,10 +1459,16 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break; case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::STRICT_FSINH: + case ISD::FSINH: ExpandFloatRes_FSINH(N, Lo, Hi); break; case ISD::STRICT_FSQRT: case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::STRICT_FSUB: case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::STRICT_FTAN: + case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break; + case ISD::STRICT_FTANH: + case ISD::FTANH: ExpandFloatRes_FTANH(N, Lo, Hi); break; case ISD::STRICT_FTRUNC: case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; @@ -1362,6 +1478,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; case ISD::STRICT_FREM: case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; + // clang-format on } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1452,6 +1569,33 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, RTLIB::ADD_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FACOS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ACOS_F32, + RTLIB::ACOS_F64, RTLIB::ACOS_F80, + RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FASIN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ASIN_F32, + RTLIB::ASIN_F64, RTLIB::ASIN_F80, + RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FATAN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ATAN_F32, + RTLIB::ATAN_F64, RTLIB::ATAN_F80, + RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -1486,6 +1630,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, RTLIB::COS_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FCOSH(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::COSH_F32, + RTLIB::COSH_F64, RTLIB::COSH_F80, + RTLIB::COSH_F128, RTLIB::COSH_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), @@ -1704,6 +1857,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, RTLIB::SIN_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FSINH(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::SINH_F32, + RTLIB::SINH_F64, RTLIB::SINH_F80, + RTLIB::SINH_F128, RTLIB::SINH_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -1722,6 +1884,24 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32, + RTLIB::TAN_F64, RTLIB::TAN_F80, + RTLIB::TAN_F128, RTLIB::TAN_PPCF128), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTANH(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::TANH_F32, + RTLIB::TANH_F64, RTLIB::TANH_F80, + RTLIB::TANH_F128, RTLIB::TANH_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -2193,13 +2373,11 @@ static ISD::NodeType GetPromotionOpcodeStrict(EVT OpVT, EVT RetVT) { if (RetVT == MVT::f16) return ISD::STRICT_FP_TO_FP16; - if (OpVT == MVT::bf16) { - // TODO: return ISD::STRICT_BF16_TO_FP; - } + if (OpVT == MVT::bf16) + return ISD::STRICT_BF16_TO_FP; - if (RetVT == MVT::bf16) { - // TODO: return ISD::STRICT_FP_TO_BF16; - } + if (RetVT == MVT::bf16) + return ISD::STRICT_FP_TO_BF16; report_fatal_error("Attempt at an invalid promotion-related conversion"); } @@ -2243,6 +2421,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; + case ISD::ATOMIC_STORE: R = PromoteFloatOp_ATOMIC_STORE(N, OpNo); break; } // clang-format on @@ -2365,6 +2544,23 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } +SDValue DAGTypeLegalizer::PromoteFloatOp_ATOMIC_STORE(SDNode *N, + unsigned OpNo) { + AtomicSDNode *ST = cast<AtomicSDNode>(N); + SDValue Val = ST->getVal(); + SDLoc DL(N); + + SDValue Promoted = GetPromotedFloat(Val); + EVT VT = ST->getOperand(1).getValueType(); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + SDValue NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), + DL, IVT, Promoted); + + return DAG.getAtomic(ISD::ATOMIC_STORE, DL, IVT, ST->getChain(), NewVal, + ST->getBasePtr(), ST->getMemOperand()); +} + //===----------------------------------------------------------------------===// // Float Result Promotion //===----------------------------------------------------------------------===// @@ -2399,9 +2595,13 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { // Unary FP Operations case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCBRT: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -2415,8 +2615,11 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTRUNC: + case ISD::FTAN: + case ISD::FTANH: case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break; // Binary FP Operations @@ -2426,6 +2629,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMINIMUM: case ISD::FMAXNUM: case ISD::FMINNUM: + case ISD::FMAXNUM_IEEE: + case ISD::FMINNUM_IEEE: case ISD::FMUL: case ISD::FPOW: case ISD::FREM: @@ -2443,6 +2648,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { R = PromoteFloatRes_STRICT_FP_ROUND(N); break; case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break; + case ISD::ATOMIC_LOAD: + R = PromoteFloatRes_ATOMIC_LOAD(N); + break; case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break; case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break; @@ -2689,6 +2897,25 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL); } +SDValue DAGTypeLegalizer::PromoteFloatRes_ATOMIC_LOAD(SDNode *N) { + AtomicSDNode *AM = cast<AtomicSDNode>(N); + EVT VT = AM->getValueType(0); + + // Load the value as an integer value with the same number of bits. + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue newL = DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(N), IVT, DAG.getVTList(IVT, MVT::Other), + {AM->getChain(), AM->getBasePtr()}, AM->getMemOperand()); + + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); + + // Convert the integer value to the desired FP type + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, IVT), SDLoc(N), NVT, newL); +} + // Construct a new SELECT node with the promoted true- and false- values. SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) { SDValue TrueVal = GetPromotedFloat(N->getOperand(1)); @@ -2797,6 +3024,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { report_fatal_error("Do not know how to soft promote this operator's " "result!"); + case ISD::ARITH_FENCE: + R = SoftPromoteHalfRes_ARITH_FENCE(N); break; case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break; case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break; case ISD::EXTRACT_VECTOR_ELT: @@ -2807,9 +3036,13 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { // Unary FP Operations case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCBRT: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -2824,8 +3057,11 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTRUNC: + case ISD::FTAN: + case ISD::FTANH: case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; // Binary FP Operations @@ -2849,6 +3085,9 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break; case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break; + case ISD::ATOMIC_LOAD: + R = SoftPromoteHalfRes_ATOMIC_LOAD(N); + break; case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break; case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break; case ISD::SINT_TO_FP: @@ -2873,6 +3112,11 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { SetSoftPromotedHalf(SDValue(N, ResNo), R); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ARITH_FENCE(SDNode *N) { + return DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), MVT::i16, + BitConvertToInteger(N->getOperand(0))); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -2999,10 +3243,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { EVT SVT = N->getOperand(0).getValueType(); if (N->isStrictFPOpcode()) { - assert(RVT == MVT::f16); - SDValue Res = - DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other}, - {N->getOperand(0), N->getOperand(1)}); + // FIXME: assume we only have two f16 variants for now. + unsigned Opcode; + if (RVT == MVT::f16) + Opcode = ISD::STRICT_FP_TO_FP16; + else if (RVT == MVT::bf16) + Opcode = ISD::STRICT_FP_TO_BF16; + else + llvm_unreachable("unknown half type"); + SDValue Res = DAG.getNode(Opcode, SDLoc(N), {MVT::i16, MVT::Other}, + {N->getOperand(0), N->getOperand(1)}); ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; } @@ -3027,6 +3277,20 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) { return NewL; } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N) { + AtomicSDNode *AM = cast<AtomicSDNode>(N); + + // Load the value as an integer value with the same number of bits. + SDValue NewL = DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(N), MVT::i16, DAG.getVTList(MVT::i16, MVT::Other), + {AM->getChain(), AM->getBasePtr()}, AM->getMemOperand()); + + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) { SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2)); @@ -3142,6 +3406,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break; case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break; + case ISD::ATOMIC_STORE: + Res = SoftPromoteHalfOp_ATOMIC_STORE(N, OpNo); + break; case ISD::STACKMAP: Res = SoftPromoteHalfOp_STACKMAP(N, OpNo); break; @@ -3192,10 +3459,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0)); if (IsStrict) { - assert(SVT == MVT::f16); + unsigned Opcode; + if (SVT == MVT::f16) + Opcode = ISD::STRICT_FP16_TO_FP; + else if (SVT == MVT::bf16) + Opcode = ISD::STRICT_BF16_TO_FP; + else + llvm_unreachable("unknown half type"); SDValue Res = - DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N), - {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op}); + DAG.getNode(Opcode, SDLoc(N), {N->getValueType(0), MVT::Other}, + {N->getOperand(0), Op}); ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); ReplaceValueWith(SDValue(N, 0), Res); return SDValue(); @@ -3289,6 +3562,19 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_ATOMIC_STORE(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Can only soften the stored value!"); + AtomicSDNode *ST = cast<AtomicSDNode>(N); + SDValue Val = ST->getVal(); + SDLoc dl(N); + + SDValue Promoted = GetSoftPromotedHalf(Val); + return DAG.getAtomic(ISD::ATOMIC_STORE, dl, Promoted.getValueType(), + ST->getChain(), Promoted, ST->getBasePtr(), + ST->getMemOperand()); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) { assert(OpNo > 1); // Because the first two arguments are guaranteed legal. SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 814f746f5a4d..af77b0070df0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -76,6 +76,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VP_CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + case ISD::VP_CTTZ_ELTS: + Res = PromoteIntRes_VP_CttzElements(N); + break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break; @@ -83,6 +87,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { break; case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N)); break; + case ISD::VECTOR_COMPRESS: + Res = PromoteIntRes_VECTOR_COMPRESS(N); + break; case ISD::SELECT: case ISD::VSELECT: case ISD::VP_SELECT: @@ -103,9 +110,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; case ISD::SRA: - case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break; + case ISD::VP_SRA: Res = PromoteIntRes_SRA(N); break; case ISD::SRL: - case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break; + case ISD::VP_SRL: Res = PromoteIntRes_SRL(N); break; case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; @@ -133,6 +140,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::EXPERIMENTAL_VP_SPLAT: Res = PromoteIntRes_ScalarOp(N); break; case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break; @@ -165,6 +173,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16_BF16(N); break; + case ISD::STRICT_FP_TO_BF16: case ISD::STRICT_FP_TO_FP16: Res = PromoteIntRes_STRICT_FP_TO_FP16_BF16(N); break; @@ -183,6 +192,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VP_SUB: case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::AVGCEILS: + case ISD::AVGFLOORS: case ISD::VP_SMIN: case ISD::VP_SMAX: case ISD::SDIV: @@ -190,6 +201,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VP_SDIV: case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::AVGCEILU: + case ISD::AVGFLOORU: case ISD::VP_UMIN: case ISD::VP_UMAX: case ISD::UDIV: @@ -217,7 +230,20 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBSAT: case ISD::USUBSAT: case ISD::SSHLSAT: - case ISD::USHLSAT: Res = PromoteIntRes_ADDSUBSHLSAT(N); break; + case ISD::USHLSAT: + Res = PromoteIntRes_ADDSUBSHLSAT<EmptyMatchContext>(N); + break; + case ISD::VP_SADDSAT: + case ISD::VP_UADDSAT: + case ISD::VP_SSUBSAT: + case ISD::VP_USUBSAT: + Res = PromoteIntRes_ADDSUBSHLSAT<VPMatchContext>(N); + break; + + case ISD::SCMP: + case ISD::UCMP: + Res = PromoteIntRes_CMP(N); + break; case ISD::SMULFIX: case ISD::SMULFIXSAT: @@ -307,6 +333,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::LLRINT: Res = PromoteIntRes_XRINT(N); break; + + case ISD::PATCHPOINT: + Res = PromoteIntRes_PATCHPOINT(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -340,6 +370,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { N->getMemoryVT(), ResVT, N->getChain(), N->getBasePtr(), N->getMemOperand()); + if (N->getOpcode() == ISD::ATOMIC_LOAD) { + ISD::LoadExtType ETy = cast<AtomicSDNode>(N)->getExtensionType(); + if (ETy == ISD::NON_EXTLOAD) { + switch (TLI.getExtendForAtomicOps()) { + case ISD::SIGN_EXTEND: + ETy = ISD::SEXTLOAD; + break; + case ISD::ZERO_EXTEND: + ETy = ISD::ZEXTLOAD; + break; + case ISD::ANY_EXTEND: + ETy = ISD::EXTLOAD; + break; + default: + llvm_unreachable("Invalid atomic op extension"); + } + } + cast<AtomicSDNode>(Res)->setExtensionType(ETy); + } + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -540,7 +590,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { ShAmt); SDValue Mask = N->getOperand(1); SDValue EVL = N->getOperand(2); - return DAG.getNode(ISD::VP_LSHR, dl, NVT, + return DAG.getNode(ISD::VP_SRL, dl, NVT, DAG.getNode(ISD::VP_BSWAP, dl, NVT, Op, Mask, EVL), ShAmt, Mask, EVL); } @@ -568,7 +618,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), ShAmt); SDValue Mask = N->getOperand(1); SDValue EVL = N->getOperand(2); - return DAG.getNode(ISD::VP_LSHR, dl, NVT, + return DAG.getNode(ISD::VP_SRL, dl, NVT, DAG.getNode(ISD::VP_BITREVERSE, dl, NVT, Op, Mask, EVL), ShAmt, Mask, EVL); } @@ -613,21 +663,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { } } - // Zero extend to the promoted type and do the count there. - SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + unsigned CtlzOpcode = N->getOpcode(); + if (CtlzOpcode == ISD::CTLZ || CtlzOpcode == ISD::VP_CTLZ) { + // Subtract off the extra leading bits in the bigger type. + SDValue ExtractLeadingBits = DAG.getConstant( + NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); - // Subtract off the extra leading bits in the bigger type. - SDValue ExtractLeadingBits = DAG.getConstant( - NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); - if (!N->isVPOpcode()) - return DAG.getNode(ISD::SUB, dl, NVT, - DAG.getNode(N->getOpcode(), dl, NVT, Op), - ExtractLeadingBits); - SDValue Mask = N->getOperand(1); - SDValue EVL = N->getOperand(2); - return DAG.getNode(ISD::VP_SUB, dl, NVT, - DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), - ExtractLeadingBits, Mask, EVL); + if (!N->isVPOpcode()) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op), + ExtractLeadingBits); + } + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + // Zero extend to the promoted type and do the count there. + SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + return DAG.getNode(ISD::VP_SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), + ExtractLeadingBits, Mask, EVL); + } + if (CtlzOpcode == ISD::CTLZ_ZERO_UNDEF || + CtlzOpcode == ISD::VP_CTLZ_ZERO_UNDEF) { + // Any Extend the argument + SDValue Op = GetPromotedInteger(N->getOperand(0)); + // Op = Op << (sizeinbits(NVT) - sizeinbits(Old VT)) + unsigned SHLAmount = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + auto ShiftConst = + DAG.getShiftAmountConstant(SHLAmount, Op.getValueType(), dl); + if (!N->isVPOpcode()) { + Op = DAG.getNode(ISD::SHL, dl, NVT, Op, ShiftConst); + return DAG.getNode(CtlzOpcode, dl, NVT, Op); + } + + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + Op = DAG.getNode(ISD::VP_SHL, dl, NVT, Op, ShiftConst, Mask, EVL); + return DAG.getNode(CtlzOpcode, dl, NVT, Op, Mask, EVL); + } + llvm_unreachable("Invalid CTLZ Opcode"); } SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { @@ -648,11 +723,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { } // Zero extend to the promoted type and do the count or parity there. - SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - if (!N->isVPOpcode()) + if (!N->isVPOpcode()) { + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); - return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, - N->getOperand(1), N->getOperand(2)); + } + + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, Mask, + EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { @@ -676,23 +756,32 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { } } - if (N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::VP_CTTZ) { + unsigned NewOpc = N->getOpcode(); + if (NewOpc == ISD::CTTZ || NewOpc == ISD::VP_CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), OVT.getScalarSizeInBits()); - if (N->getOpcode() == ISD::CTTZ) + if (NewOpc == ISD::CTTZ) { Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); - else + NewOpc = ISD::CTTZ_ZERO_UNDEF; + } else { Op = DAG.getNode(ISD::VP_OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT), N->getOperand(1), N->getOperand(2)); + NewOpc = ISD::VP_CTTZ_ZERO_UNDEF; + } } if (!N->isVPOpcode()) - return DAG.getNode(N->getOpcode(), dl, NVT, Op); - return DAG.getNode(N->getOpcode(), dl, NVT, Op, N->getOperand(1), - N->getOperand(2)); + return DAG.getNode(NewOpc, dl, NVT, Op); + return DAG.getNode(NewOpc, dl, NVT, Op, N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VP_CttzElements(SDNode *N) { + SDLoc DL(N); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(N->getOpcode(), DL, NewVT, N->ops()); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -909,6 +998,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_COMPRESS(SDNode *N) { + SDValue Vec = GetPromotedInteger(N->getOperand(0)); + SDValue Passthru = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::VECTOR_COMPRESS, SDLoc(N), Vec.getValueType(), Vec, + N->getOperand(1), Passthru); +} + /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Change the return type of the boolean result while obeying @@ -920,7 +1016,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { unsigned NumOps = N->getNumOperands(); assert(NumOps <= 3 && "Too many operands"); if (NumOps == 3) - Ops[2] = N->getOperand(2); + Ops[2] = PromoteTargetBoolean(N->getOperand(2), VT); SDLoc dl(N); SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT), @@ -934,6 +1030,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT); } +template <class MatchContextClass> SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { // If the promoted type is legal, we can convert this to: // 1. ANY_EXTEND iN to iM @@ -945,11 +1042,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { SDLoc dl(N); SDValue Op1 = N->getOperand(0); SDValue Op2 = N->getOperand(1); + MatchContextClass matcher(DAG, TLI, N); unsigned OldBits = Op1.getScalarValueSizeInBits(); - unsigned Opcode = N->getOpcode(); + unsigned Opcode = matcher.getRootBaseOpcode(); bool IsShift = Opcode == ISD::USHLSAT || Opcode == ISD::SSHLSAT; + // FIXME: We need vp-aware PromotedInteger functions. SDValue Op1Promoted, Op2Promoted; if (IsShift) { Op1Promoted = GetPromotedInteger(Op1); @@ -968,18 +1067,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits); SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); SDValue Add = - DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); - return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); + matcher.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); + return matcher.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); } // USUBSAT can always be promoted as long as we have zero-extended the args. if (Opcode == ISD::USUBSAT) - return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted, - Op2Promoted); + return matcher.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted, + Op2Promoted); // Shift cannot use a min/max expansion, we can't detect overflow if all of // the bits have been shifted out. - if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) { + if (IsShift || matcher.isOperationLegal(Opcode, PromotedType)) { unsigned ShiftOp; switch (Opcode) { case ISD::SADDSAT: @@ -1002,11 +1101,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); if (!IsShift) Op2Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + matcher.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); SDValue Result = - DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); - return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + matcher.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); + return matcher.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); } unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB; @@ -1015,9 +1114,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType); SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); SDValue Result = - DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); - Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); - Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); + matcher.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); + Result = matcher.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); + Result = matcher.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); return Result; } @@ -1204,6 +1303,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_CMP(SDNode *N) { + EVT PromotedResultTy = + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), PromotedResultTy, + N->getOperand(0), N->getOperand(1)); +} + SDValue DAGTypeLegalizer::PromoteIntRes_Select(SDNode *N) { SDValue Mask = N->getOperand(0); @@ -1290,12 +1396,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FFREXP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) - RHS = ZExtPromotedInteger(RHS); - if (N->getOpcode() != ISD::VP_SHL) + if (N->getOpcode() != ISD::VP_SHL) { + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } + + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = VPZExtPromotedInteger(RHS, Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -1319,60 +1432,91 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { - // Sign extend the input. - SDValue LHS = SExtPromotedInteger(N->getOperand(0)); - SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - if (N->getNumOperands() == 2) + if (N->getNumOperands() == 2) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + // Sign extend the input. + SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL); + SDValue RHS = VPSExtPromotedInteger(N->getOperand(1), Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { - // Zero extend the input. - SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); - SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - if (N->getNumOperands() == 2) + if (N->getNumOperands() == 2) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); assert(N->isVPOpcode() && "Expected VP opcode"); + // Zero extend the input. + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + SDValue RHS = VPZExtPromotedInteger(N->getOperand(1), Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + // It doesn't matter if we sign extend or zero extend in the inputs. So do - // whatever is best for the target. - SDValue LHS = SExtOrZExtPromotedInteger(N->getOperand(0)); - SDValue RHS = SExtOrZExtPromotedInteger(N->getOperand(1)); + // whatever is best for the target and the promoted operands. + SExtOrZExtPromotedOperands(LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { - // The input value must be properly sign extended. - SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) - RHS = ZExtPromotedInteger(RHS); - if (N->getOpcode() != ISD::VP_ASHR) + if (N->getOpcode() != ISD::VP_SRA) { + // The input value must be properly sign extended. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } + + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + // The input value must be properly sign extended. + SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = VPZExtPromotedInteger(RHS, Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { - // The input value must be properly zero extended. - SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) - RHS = ZExtPromotedInteger(RHS); - if (N->getOpcode() != ISD::VP_LSHR) + if (N->getOpcode() != ISD::VP_SRL) { + // The input value must be properly zero extended. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } + + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + // The input value must be properly zero extended. + SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = VPZExtPromotedInteger(RHS, Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { @@ -1439,7 +1583,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) { SDValue Mask = N->getOperand(3); SDValue EVL = N->getOperand(4); if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger) - Amt = ZExtPromotedInteger(Amt); + Amt = VPZExtPromotedInteger(Amt, Mask, EVL); EVT AmtVT = Amt.getValueType(); SDLoc DL(N); @@ -1463,13 +1607,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) { !TLI.isOperationLegalOrCustom(Opcode, VT)) { SDValue HiShift = DAG.getConstant(OldBits, DL, VT); Hi = DAG.getNode(ISD::VP_SHL, DL, VT, Hi, HiShift, Mask, EVL); - // FIXME: Replace it by vp operations. - Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT); + Lo = DAG.getVPZeroExtendInReg(Lo, Mask, EVL, DL, OldVT); SDValue Res = DAG.getNode(ISD::VP_OR, DL, VT, Hi, Lo, Mask, EVL); - Res = DAG.getNode(IsFSHR ? ISD::VP_LSHR : ISD::VP_SHL, DL, VT, Res, Amt, + Res = DAG.getNode(IsFSHR ? ISD::VP_SRL : ISD::VP_SHL, DL, VT, Res, Amt, Mask, EVL); if (!IsFSHR) - Res = DAG.getNode(ISD::VP_LSHR, DL, VT, Res, HiShift, Mask, EVL); + Res = DAG.getNode(ISD::VP_SRL, DL, VT, Res, HiShift, Mask, EVL); return Res; } @@ -1788,6 +1931,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::EXPERIMENTAL_VP_SPLAT: Res = PromoteIntOp_ScalarOp(N); break; case ISD::VSELECT: @@ -1810,6 +1954,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { OpNo); break; case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo); break; + case ISD::VECTOR_COMPRESS: + Res = PromoteIntOp_VECTOR_COMPRESS(N, OpNo); + break; case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::BF16_TO_FP: @@ -1829,14 +1976,12 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::ROTL: case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + case ISD::SCMP: + case ISD::UCMP: Res = PromoteIntOp_CMP(N); break; + case ISD::FSHL: case ISD::FSHR: Res = PromoteIntOp_FunnelShift(N); break; - case ISD::SADDO_CARRY: - case ISD::SSUBO_CARRY: - case ISD::UADDO_CARRY: - case ISD::USUBO_CARRY: Res = PromoteIntOp_ADDSUBO_CARRY(N, OpNo); break; - case ISD::FRAMEADDR: case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break; @@ -1911,25 +2056,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { return false; } -/// PromoteSetCCOperands - Promote the operands of a comparison. This code is -/// shared among BR_CC, SELECT_CC, and SETCC handlers. -void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS, - ISD::CondCode CCCode) { - // We have to insert explicit sign or zero extends. Note that we could - // insert sign extends for ALL conditions. For those operations where either - // zero or sign extension would be valid, we ask the target which extension - // it would prefer. - - // Signed comparisons always require sign extension. - if (ISD::isSignedIntSetCC(CCCode)) { - LHS = SExtPromotedInteger(LHS); - RHS = SExtPromotedInteger(RHS); - return; - } - - assert((ISD::isUnsignedIntSetCC(CCCode) || ISD::isIntEqualitySetCC(CCCode)) && - "Unknown integer comparison!"); - +// These operands can be either sign extended or zero extended as long as we +// treat them the same. If an extension is free, choose that. Otherwise, follow +// target preference. +void DAGTypeLegalizer::SExtOrZExtPromotedOperands(SDValue &LHS, SDValue &RHS) { SDValue OpL = GetPromotedInteger(LHS); SDValue OpR = GetPromotedInteger(RHS); @@ -1973,6 +2103,28 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS, RHS = ZExtPromotedInteger(RHS); } +/// PromoteSetCCOperands - Promote the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS, + ISD::CondCode CCCode) { + // We have to insert explicit sign or zero extends. Note that we could + // insert sign extends for ALL conditions. For those operations where either + // zero or sign extension would be valid, we ask the target which extension + // it would prefer. + + // Signed comparisons always require sign extension. + if (ISD::isSignedIntSetCC(CCCode)) { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + return; + } + + assert((ISD::isUnsignedIntSetCC(CCCode) || ISD::isIntEqualitySetCC(CCCode)) && + "Unknown integer comparison!"); + + SExtOrZExtPromotedOperands(LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op); @@ -2078,10 +2230,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, } SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + if (N->getOpcode() == ISD::EXPERIMENTAL_VP_SPLAT) + return SDValue( + DAG.UpdateNodeOperands(N, Op, N->getOperand(1), N->getOperand(2)), 0); + // Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated, // so just promote the operand in place. - return SDValue(DAG.UpdateNodeOperands(N, - GetPromotedInteger(N->getOperand(0))), 0); + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { @@ -2137,6 +2293,17 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { ZExtPromotedInteger(N->getOperand(1))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_CMP(SDNode *N) { + SDValue LHS = N->getOpcode() == ISD::UCMP + ? ZExtPromotedInteger(N->getOperand(0)) + : SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOpcode() == ISD::UCMP + ? ZExtPromotedInteger(N->getOperand(1)) + : SExtPromotedInteger(N->getOperand(1)); + + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_FunnelShift(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), ZExtPromotedInteger(N->getOperand(2))), 0); @@ -2163,7 +2330,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_SIGN_EXTEND(SDNode *N) { // FIXME: There is no VP_SIGN_EXTEND_INREG so use a pair of shifts. SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShAmt, N->getOperand(1), N->getOperand(2)); - return DAG.getNode(ISD::VP_ASHR, dl, VT, Shl, ShAmt, N->getOperand(1), + return DAG.getNode(ISD::VP_SRA, dl, VT, Shl, ShAmt, N->getOperand(1), N->getOperand(2)); } @@ -2288,6 +2455,16 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, N->getIndexType(), TruncateStore); } +SDValue DAGTypeLegalizer::PromoteIntOp_VECTOR_COMPRESS(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Can only promote VECTOR_COMPRESS mask."); + SDValue Vec = N->getOperand(0); + EVT VT = Vec.getValueType(); + SDValue Passthru = N->getOperand(2); + SDValue Mask = PromoteTargetBoolean(N->getOperand(1), VT); + return DAG.getNode(ISD::VECTOR_COMPRESS, SDLoc(N), VT, Vec, Mask, Passthru); +} + SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); if (N->getOpcode() == ISD::VP_TRUNCATE) @@ -2325,23 +2502,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) { // FIXME: There is no VP_ANY_EXTEND yet. Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1), N->getOperand(2)); - APInt Imm = APInt::getLowBitsSet(VT.getScalarSizeInBits(), - N->getOperand(0).getScalarValueSizeInBits()); - return DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(Imm, dl, VT), - N->getOperand(1), N->getOperand(2)); -} - -SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo) { - assert(OpNo == 2 && "Don't know how to promote this operand!"); - - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue Carry = N->getOperand(2); - SDLoc DL(N); - - Carry = PromoteTargetBoolean(Carry, LHS.getValueType()); - - return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0); + return DAG.getVPZeroExtendInReg(Op, N->getOperand(1), N->getOperand(2), dl, + N->getOperand(0).getValueType()); } SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) { @@ -2648,7 +2810,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; - case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; + case ISD::READCYCLECOUNTER: + case ISD::READSTEADYCOUNTER: ExpandIntRes_READCOUNTER(N, Lo, Hi); break; case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; @@ -2706,6 +2869,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UMIN: case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break; + case ISD::SCMP: + case ISD::UCMP: ExpandIntRes_CMP(N, Lo, Hi); break; + case ISD::ADD: case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; @@ -2740,6 +2906,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSHLSAT: case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break; + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: ExpandIntRes_AVG(N, Lo, Hi); break; + case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -2824,25 +2995,26 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, EVT NVT = InL.getValueType(); unsigned VTBits = N->getValueType(0).getSizeInBits(); unsigned NVTBits = NVT.getSizeInBits(); - EVT ShTy = N->getOperand(1).getValueType(); if (N->getOpcode() == ISD::SHL) { if (Amt.uge(VTBits)) { Lo = Hi = DAG.getConstant(0, DL, NVT); } else if (Amt.ugt(NVTBits)) { Lo = DAG.getConstant(0, DL, NVT); - Hi = DAG.getNode(ISD::SHL, DL, - NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy)); + Hi = DAG.getNode(ISD::SHL, DL, NVT, InL, + DAG.getShiftAmountConstant(Amt - NVTBits, NVT, DL)); } else if (Amt == NVTBits) { Lo = DAG.getConstant(0, DL, NVT); Hi = InL; } else { - Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)); - Hi = DAG.getNode(ISD::OR, DL, NVT, - DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(Amt, DL, ShTy)), - DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(-Amt + NVTBits, DL, ShTy))); + Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, + DAG.getShiftAmountConstant(Amt, NVT, DL)); + Hi = DAG.getNode( + ISD::OR, DL, NVT, + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getShiftAmountConstant(Amt, NVT, DL)), + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL))); } return; } @@ -2851,19 +3023,21 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, if (Amt.uge(VTBits)) { Lo = Hi = DAG.getConstant(0, DL, NVT); } else if (Amt.ugt(NVTBits)) { - Lo = DAG.getNode(ISD::SRL, DL, - NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy)); + Lo = DAG.getNode(ISD::SRL, DL, NVT, InH, + DAG.getShiftAmountConstant(Amt - NVTBits, NVT, DL)); Hi = DAG.getConstant(0, DL, NVT); } else if (Amt == NVTBits) { Lo = InH; Hi = DAG.getConstant(0, DL, NVT); } else { - Lo = DAG.getNode(ISD::OR, DL, NVT, - DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(Amt, DL, ShTy)), - DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(-Amt + NVTBits, DL, ShTy))); - Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); + Lo = DAG.getNode( + ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getShiftAmountConstant(Amt, NVT, DL)), + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL))); + Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, + DAG.getShiftAmountConstant(Amt, NVT, DL)); } return; } @@ -2871,23 +3045,25 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); if (Amt.uge(VTBits)) { Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(NVTBits - 1, DL, ShTy)); + DAG.getShiftAmountConstant(NVTBits - 1, NVT, DL)); } else if (Amt.ugt(NVTBits)) { Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(Amt - NVTBits, DL, ShTy)); + DAG.getShiftAmountConstant(Amt - NVTBits, NVT, DL)); Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(NVTBits - 1, DL, ShTy)); + DAG.getShiftAmountConstant(NVTBits - 1, NVT, DL)); } else if (Amt == NVTBits) { Lo = InH; Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(NVTBits - 1, DL, ShTy)); + DAG.getShiftAmountConstant(NVTBits - 1, NVT, DL)); } else { - Lo = DAG.getNode(ISD::OR, DL, NVT, - DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(Amt, DL, ShTy)), - DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(-Amt + NVTBits, DL, ShTy))); - Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); + Lo = DAG.getNode( + ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getShiftAmountConstant(Amt, NVT, DL)), + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL))); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getShiftAmountConstant(Amt, NVT, DL)); } } @@ -2897,6 +3073,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, /// shift amount. bool DAGTypeLegalizer:: ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + unsigned Opc = N->getOpcode(); + SDValue In = N->getOperand(0); SDValue Amt = N->getOperand(1); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT ShTy = Amt.getValueType(); @@ -2907,15 +3085,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); - KnownBits Known = DAG.computeKnownBits(N->getOperand(1)); + KnownBits Known = DAG.computeKnownBits(Amt); // If we don't know anything about the high bits, exit. - if (((Known.Zero|Known.One) & HighBitMask) == 0) + if (((Known.Zero | Known.One) & HighBitMask) == 0) return false; // Get the incoming operand to be shifted. SDValue InL, InH; - GetExpandedInteger(N->getOperand(0), InL, InH); + GetExpandedInteger(In, InL, InH); // If we know that any of the high bits of the shift amount are one, then we // can do this as a couple of simple shifts. @@ -2924,7 +3102,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, DAG.getConstant(~HighBitMask, dl, ShTy)); - switch (N->getOpcode()) { + switch (Opc) { default: llvm_unreachable("Unknown shift"); case ISD::SHL: Lo = DAG.getConstant(0, dl, NVT); // Low part is zero. @@ -2952,7 +3130,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getConstant(NVTBits - 1, dl, ShTy)); unsigned Op1, Op2; - switch (N->getOpcode()) { + switch (Opc) { default: llvm_unreachable("Unknown shift"); case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; case ISD::SRL: @@ -2960,7 +3138,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { } // When shifting right the arithmetic for Lo and Hi is swapped. - if (N->getOpcode() != ISD::SHL) + if (Opc != ISD::SHL) std::swap(InL, InH); // Use a little trick to get the bits that move from Lo to Hi. First @@ -2969,10 +3147,10 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // Then compute the remaining shift with amount-1. SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); - Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt); + Lo = DAG.getNode(Opc, dl, NVT, InL, Amt); Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2); - if (N->getOpcode() != ISD::SHL) + if (Opc != ISD::SHL) std::swap(Hi, Lo); return true; } @@ -3191,6 +3369,11 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, SplitInteger(Result, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue ExpandedCMP = TLI.expandCMP(N, DAG); + SplitInteger(ExpandedCMP, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -4008,47 +4191,15 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, LC = RTLIB::MUL_I128; if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { - // We'll expand the multiplication by brute force because we have no other - // options. This is a trivially-generalized version of the code from - // Hacker's Delight (itself derived from Knuth's Algorithm M from section - // 4.3.1). - unsigned Bits = NVT.getSizeInBits(); - unsigned HalfBits = Bits >> 1; - SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, - NVT); - SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask); - SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask); - - SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL); - SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); - - SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl); - SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); - SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); - SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift); - - SDValue U = DAG.getNode(ISD::ADD, dl, NVT, - DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH); - SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask); - SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift); - - SDValue V = DAG.getNode(ISD::ADD, dl, NVT, - DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL); - SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift); - - SDValue W = DAG.getNode(ISD::ADD, dl, NVT, - DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH), - DAG.getNode(ISD::ADD, dl, NVT, UH, VH)); - Lo = DAG.getNode(ISD::ADD, dl, NVT, TL, - DAG.getNode(ISD::SHL, dl, NVT, V, Shift)); - - Hi = DAG.getNode(ISD::ADD, dl, NVT, W, - DAG.getNode(ISD::ADD, dl, NVT, - DAG.getNode(ISD::MUL, dl, NVT, RH, LL), - DAG.getNode(ISD::MUL, dl, NVT, RL, LH))); + // Perform a wide multiplication where the wide type is the original VT and + // the 4 parts are the split arguments. + TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, VT, LL, LH, RL, RH, Lo, + Hi); return; } + // Note that we don't need to do a wide MUL here since we don't care about the + // upper half of the result if it exceeds VT. SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); @@ -4056,8 +4207,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, Lo, Hi); } -void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::ExpandIntRes_READCOUNTER(SDNode *N, SDValue &Lo, + SDValue &Hi) { SDLoc DL(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other); @@ -4067,6 +4218,11 @@ void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, ReplaceValueWith(SDValue(N, 1), R.getValue(2)); } +void DAGTypeLegalizer::ExpandIntRes_AVG(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Result = TLI.expandAVG(N, DAG); + SplitInteger(Result, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Result = TLI.expandAddSubSat(N, DAG); @@ -4146,9 +4302,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG, TargetLowering::MulExpansionKind::OnlyLegalOrCustom, LL, LH, RL, RH)) { - report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI."); - return; + Result.clear(); + Result.resize(4); + + SDValue LoTmp, HiTmp; + TLI.forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, LoTmp, HiTmp); + SplitInteger(LoTmp, Result[0], Result[1]); + SplitInteger(HiTmp, Result[2], Result[3]); } + assert(Result.size() == 4 && "Unexpected number of partlets in the result"); unsigned NVTSize = NVT.getScalarSizeInBits(); assert((VTSize == NVTSize * 2) && "Expected the new value type to be half " @@ -4523,6 +4685,7 @@ void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); + unsigned Opc = N->getOpcode(); SDLoc dl(N); // If we can emit an efficient shift operation, do so now. Check to see if @@ -4537,12 +4700,12 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // If this target supports shift_PARTS, use it. First, map to the _PARTS opc. unsigned PartsOpc; - if (N->getOpcode() == ISD::SHL) { + if (Opc == ISD::SHL) { PartsOpc = ISD::SHL_PARTS; - } else if (N->getOpcode() == ISD::SRL) { + } else if (Opc == ISD::SRL) { PartsOpc = ISD::SRL_PARTS; } else { - assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + assert(Opc == ISD::SRA && "Unknown shift!"); PartsOpc = ISD::SRA_PARTS; } @@ -4595,7 +4758,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // Otherwise, emit a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; bool isSigned; - if (N->getOpcode() == ISD::SHL) { + if (Opc == ISD::SHL) { isSigned = false; /*sign irrelevant*/ if (VT == MVT::i16) LC = RTLIB::SHL_I16; @@ -4605,7 +4768,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, LC = RTLIB::SHL_I64; else if (VT == MVT::i128) LC = RTLIB::SHL_I128; - } else if (N->getOpcode() == ISD::SRL) { + } else if (Opc == ISD::SRL) { isSigned = false; if (VT == MVT::i16) LC = RTLIB::SRL_I16; @@ -4616,7 +4779,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, else if (VT == MVT::i128) LC = RTLIB::SRL_I128; } else { - assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + assert(Opc == ISD::SRA && "Unknown shift!"); isSigned = true; if (VT == MVT::i16) LC = RTLIB::SRA_I16; @@ -5101,6 +5264,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::EXPERIMENTAL_VP_SPLAT: case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; @@ -5120,6 +5284,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::RETURNADDR: case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; + case ISD::SCMP: + case ISD::UCMP: Res = ExpandIntOp_CMP(N); break; + case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break; case ISD::STACKMAP: Res = ExpandIntOp_STACKMAP(N, OpNo); @@ -5381,6 +5548,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0); } +SDValue DAGTypeLegalizer::ExpandIntOp_CMP(SDNode *N) { + return TLI.expandCMP(N, DAG); +} + SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This // surely makes pretty nice problems on 8/16 bit targets. Just truncate this @@ -5609,21 +5780,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp0 = N->getOperand(0); if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger) - InOp0 = GetPromotedInteger(N->getOperand(0)); + InOp0 = GetPromotedInteger(InOp0); EVT InVT = InOp0.getValueType(); + EVT InSVT = InVT.getVectorElementType(); unsigned OutNumElems = OutVT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; Ops.reserve(OutNumElems); for (unsigned i = 0; i != OutNumElems; ++i) { - // Extract the element from the original vector. - SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), - BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType())); - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - InVT.getVectorElementType(), N->getOperand(0), Index); - + SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), BaseIdx, + DAG.getConstant(i, dl, BaseIdx.getValueType())); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InSVT, + N->getOperand(0), Index); SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem); // Insert the converted element to the new vector. Ops.push_back(Op); @@ -5723,6 +5893,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) { EVT NOutElemVT = NOutVT.getVectorElementType(); SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0)); + if (N->isVPOpcode()) + return DAG.getNode(N->getOpcode(), dl, NOutVT, Op, N->getOperand(1), + N->getOperand(2)); return DAG.getNode(N->getOpcode(), dl, NOutVT, Op); } @@ -5877,6 +6050,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) { N->getOperand(1), N->getOperand(2), N->getOperand(3)); } +SDValue DAGTypeLegalizer::PromoteIntRes_PATCHPOINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + assert(N->getNumValues() == 3 && "Expected 3 values for PATCHPOINT"); + SDVTList VTList = DAG.getVTList({NVT, MVT::Other, MVT::Glue}); + + SmallVector<SDValue> Ops(N->ops()); + SDValue Res = DAG.getNode(ISD::PATCHPOINT, dl, VTList, Ops); + + // Replace chain and glue uses with the new patchpoint. + SDValue From[] = {SDValue(N, 1), SDValue(N, 2)}; + SDValue To[] = {Res.getValue(1), Res.getValue(2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + + return Res.getValue(0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 8a93433c5e04..cb6d3fe4db8a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -188,8 +188,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { #ifndef NDEBUG // Checked that NewNodes are only used by other NewNodes. - for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { - SDNode *N = NewNodes[i]; + for (SDNode *N : NewNodes) { for (SDNode *U : N->uses()) assert(U->getNodeId() == NewNode && "NewNode used by non-NewNode!"); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 09f0bca8b861..d4e61c858890 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H #define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H +#include "MatchContext.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" @@ -167,8 +168,6 @@ public: explicit DAGTypeLegalizer(SelectionDAG &dag) : TLI(dag.getTargetLoweringInfo()), DAG(dag), ValueTypeActions(TLI.getValueTypeActions()) { - static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, - "Too many value types for ValueTypeActions to hold!"); } /// This is the main entry point for the type legalizer. This does a @@ -274,18 +273,25 @@ private: return DAG.getZeroExtendInReg(Op, dl, OldVT); } - // Get a promoted operand and sign or zero extend it to the final size - // (depending on TargetLoweringInfo::isSExtCheaperThanZExt). For a given - // subtarget and type, the choice of sign or zero-extension will be - // consistent. - SDValue SExtOrZExtPromotedInteger(SDValue Op) { + /// Get a promoted operand and zero extend it to the final size. + SDValue VPSExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) { EVT OldVT = Op.getValueType(); - SDLoc DL(Op); + SDLoc dl(Op); Op = GetPromotedInteger(Op); - if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType())) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op, - DAG.getValueType(OldVT)); - return DAG.getZeroExtendInReg(Op, DL, OldVT); + // FIXME: Add VP_SIGN_EXTEND_INREG. + EVT VT = Op.getValueType(); + unsigned BitsDiff = VT.getScalarSizeInBits() - OldVT.getScalarSizeInBits(); + SDValue ShiftCst = DAG.getShiftAmountConstant(BitsDiff, VT, dl); + SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShiftCst, Mask, EVL); + return DAG.getNode(ISD::VP_SRA, dl, VT, Shl, ShiftCst, Mask, EVL); + } + + /// Get a promoted operand and zero extend it to the final size. + SDValue VPZExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + Op = GetPromotedInteger(Op); + return DAG.getVPZeroExtendInReg(Op, Mask, EVL, dl, OldVT); } // Promote the given operand V (vector or scalar) according to N's specific @@ -322,6 +328,7 @@ private: SDValue PromoteIntRes_CTLZ(SDNode *N); SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N); SDValue PromoteIntRes_CTTZ(SDNode *N); + SDValue PromoteIntRes_VP_CttzElements(SDNode *N); SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N); @@ -333,9 +340,11 @@ private: SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); + SDValue PromoteIntRes_VECTOR_COMPRESS(SDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); SDValue PromoteIntRes_FFREXP(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_CMP(SDNode *N); SDValue PromoteIntRes_Select(SDNode *N); SDValue PromoteIntRes_SELECT_CC(SDNode *N); SDValue PromoteIntRes_SETCC(SDNode *N); @@ -355,6 +364,7 @@ private: SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_VSCALE(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + template <class MatchContextClass> SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); SDValue PromoteIntRes_DIVFIX(SDNode *N); @@ -366,6 +376,7 @@ private: SDValue PromoteIntRes_FunnelShift(SDNode *N); SDValue PromoteIntRes_VPFunnelShift(SDNode *N); SDValue PromoteIntRes_IS_FPCLASS(SDNode *N); + SDValue PromoteIntRes_PATCHPOINT(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -386,6 +397,7 @@ private: SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_CMP(SDNode *N); SDValue PromoteIntOp_FunnelShift(SDNode *N); SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); SDValue PromoteIntOp_VP_SIGN_EXTEND(SDNode *N); @@ -401,7 +413,7 @@ private: SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); - SDValue PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VECTOR_COMPRESS(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); SDValue PromoteIntOp_FIX(SDNode *N); SDValue PromoteIntOp_ExpOp(SDNode *N); @@ -413,6 +425,7 @@ private: SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo); + void SExtOrZExtPromotedOperands(SDValue &LHS, SDValue &RHS); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); //===--------------------------------------------------------------------===// @@ -439,7 +452,7 @@ private: void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_READCOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -468,9 +481,12 @@ private: void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CMP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AVG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SHLSAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -496,6 +512,7 @@ private: SDValue ExpandIntOp_SETCC(SDNode *N); SDValue ExpandIntOp_SETCCCARRY(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); + SDValue ExpandIntOp_CMP(SDNode *N); SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ExpandIntOp_TRUNCATE(SDNode *N); SDValue ExpandIntOp_XINT_TO_FP(SDNode *N); @@ -541,8 +558,12 @@ private: SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(SDNode *N); + SDValue SoftenFloatRes_EXTRACT_ELEMENT(SDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FACOS(SDNode *N); + SDValue SoftenFloatRes_FASIN(SDNode *N); + SDValue SoftenFloatRes_FATAN(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); @@ -550,6 +571,7 @@ private: SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FCOSH(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); SDValue SoftenFloatRes_FEXP2(SDNode *N); @@ -575,10 +597,14 @@ private: SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSINH(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FTAN(SDNode *N); + SDValue SoftenFloatRes_FTANH(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_ATOMIC_LOAD(SDNode *N); SDValue SoftenFloatRes_SELECT(SDNode *N); SDValue SoftenFloatRes_SELECT_CC(SDNode *N); SDValue SoftenFloatRes_UNDEF(SDNode *N); @@ -602,6 +628,7 @@ private: SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_ATOMIC_STORE(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// @@ -624,7 +651,11 @@ private: SDValue &Lo, SDValue &Hi); void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, SDValue &Lo, SDValue &Hi); + // clang-format off void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FATAN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -632,6 +663,7 @@ private: void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOSH (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -654,11 +686,15 @@ private: void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSINH (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTANH (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + // clang-format on // Float Operand Expansion. bool ExpandFloatOperand(SDNode *N, unsigned OpNo); @@ -702,6 +738,7 @@ private: SDValue PromoteFloatRes_FP_ROUND(SDNode *N); SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N); SDValue PromoteFloatRes_LOAD(SDNode *N); + SDValue PromoteFloatRes_ATOMIC_LOAD(SDNode *N); SDValue PromoteFloatRes_SELECT(SDNode *N); SDValue PromoteFloatRes_SELECT_CC(SDNode *N); SDValue PromoteFloatRes_UnaryOp(SDNode *N); @@ -719,6 +756,7 @@ private: SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_ATOMIC_STORE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); @@ -735,6 +773,7 @@ private: void SetSoftPromotedHalf(SDValue Op, SDValue Result); void SoftPromoteHalfResult(SDNode *N, unsigned ResNo); + SDValue SoftPromoteHalfRes_ARITH_FENCE(SDNode *N); SDValue SoftPromoteHalfRes_BinOp(SDNode *N); SDValue SoftPromoteHalfRes_BITCAST(SDNode *N); SDValue SoftPromoteHalfRes_ConstantFP(SDNode *N); @@ -745,6 +784,7 @@ private: SDValue SoftPromoteHalfRes_FFREXP(SDNode *N); SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N); SDValue SoftPromoteHalfRes_LOAD(SDNode *N); + SDValue SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N); SDValue SoftPromoteHalfRes_SELECT(SDNode *N); SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N); SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N); @@ -762,6 +802,7 @@ private: SDValue SoftPromoteHalfOp_SETCC(SDNode *N); SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_ATOMIC_STORE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_PATCHPOINT(SDNode *N, unsigned OpNo); @@ -784,6 +825,7 @@ private: void ScalarizeVectorResult(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_CMP(SDNode *N); SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_StrictFPOp(SDNode *N); @@ -791,6 +833,7 @@ private: SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_VecInregOp(SDNode *N); + SDValue ScalarizeVecRes_ADDRSPACECAST(SDNode *N); SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); @@ -827,6 +870,7 @@ private: SDValue ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N); SDValue ScalarizeVecOp_VECREDUCE(SDNode *N); SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N); + SDValue ScalarizeVecOp_CMP(SDNode *N); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -857,7 +901,9 @@ private: void SplitVectorResult(SDNode *N, unsigned ResNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -883,7 +929,9 @@ private: void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi, bool SplitSETCC = false); + void SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -920,7 +968,9 @@ private: SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N); + SDValue SplitVecOp_CMP(SDNode *N); SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N); + SDValue SplitVecOp_VP_CttzElements(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp @@ -960,6 +1010,7 @@ private: // Widen Vector Result Promotion. void WidenVectorResult(SDNode *N, unsigned ResNo); SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo); + SDValue WidenVecRes_ADDRSPACECAST(SDNode *N); SDValue WidenVecRes_AssertZext(SDNode* N); SDValue WidenVecRes_BITCAST(SDNode* N); SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); @@ -971,6 +1022,7 @@ private: SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); + SDValue WidenVecRes_VECTOR_COMPRESS(SDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N); @@ -986,6 +1038,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_CMP(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); @@ -995,7 +1048,7 @@ private: SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecRes_XRINT(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); - SDValue WidenVecRes_IS_FPCLASS(SDNode *N); + SDValue WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N); SDValue WidenVecRes_ExpOp(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); @@ -1005,6 +1058,7 @@ private: SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTEND(SDNode *N); + SDValue WidenVecOp_CMP(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); @@ -1016,6 +1070,7 @@ private: SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_STRICT_FSETCC(SDNode* N); SDValue WidenVecOp_VSELECT(SDNode *N); @@ -1028,6 +1083,7 @@ private: SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N); SDValue WidenVecOp_VP_REDUCE(SDNode *N); SDValue WidenVecOp_ExpOp(SDNode *N); + SDValue WidenVecOp_VP_CttzElements(SDNode *N); /// Helper function to generate a set of operations to perform /// a vector operation for a wider type. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 1fbd6322f9ed..57843f0959ac 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -28,12 +28,14 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" @@ -147,6 +149,14 @@ class VectorLegalizer { void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); + bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, + SmallVectorImpl<SDValue> &Results); + bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results); + void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements vector promotion. @@ -166,13 +176,6 @@ class VectorLegalizer { /// truncated back to the original type. void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); - /// Implements vector reduce operation promotion. - /// - /// All vector operands are promoted to a vector type with larger element - /// type, and the start value is promoted to a larger scalar type. Then the - /// result is truncated back to the original scalar type. - void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results); - /// Implements vector setcc operation promotion. /// /// All vector operands are promoted to a vector type with larger element @@ -364,6 +367,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::ROTL: case ISD::ROTR: case ISD::ABS: + case ISD::ABDS: + case ISD::ABDU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: case ISD::BSWAP: case ISD::BITREVERSE: case ISD::CTLZ: @@ -392,6 +401,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: + case ISD::FTAN: + case ISD::FASIN: + case ISD::FACOS: + case ISD::FATAN: + case ISD::FSINH: + case ISD::FCOSH: + case ISD::FTANH: case ISD::FLDEXP: case ISD::FPOWI: case ISD::FPOW: @@ -404,14 +420,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FCEIL: case ISD::FTRUNC: case ISD::FRINT: - case ISD::LRINT: - case ISD::LLRINT: case ISD::FNEARBYINT: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: + case ISD::FPTRUNC_ROUND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: case ISD::ANY_EXTEND_VECTOR_INREG: @@ -440,6 +455,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: case ISD::MGATHER: + case ISD::VECTOR_COMPRESS: + case ISD::SCMP: + case ISD::UCMP: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::SMULFIX: @@ -455,6 +473,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Node->getValueType(0), Scale); break; } + case ISD::LRINT: + case ISD::LLRINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::VECREDUCE_ADD: @@ -499,6 +519,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Action != TargetLowering::Legal) \ break; \ } \ + /* Defer non-vector results to LegalizeDAG. */ \ + if (!Node->getValueType(0).isVector() && \ + Node->getValueType(0) != MVT::Other) { \ + Action = TargetLowering::Legal; \ + break; \ + } \ Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ } break; #include "llvm/IR/VPIntrinsics.def" @@ -569,50 +595,6 @@ bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, return true; } -void VectorLegalizer::PromoteReduction(SDNode *Node, - SmallVectorImpl<SDValue> &Results) { - MVT VecVT = Node->getOperand(1).getSimpleValueType(); - MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); - MVT ScalarVT = Node->getSimpleValueType(0); - MVT NewScalarVT = NewVecVT.getVectorElementType(); - - SDLoc DL(Node); - SmallVector<SDValue, 4> Operands(Node->getNumOperands()); - - // promote the initial value. - if (Node->getOperand(0).getValueType().isFloatingPoint()) - Operands[0] = - DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0)); - else - Operands[0] = - DAG.getNode(ISD::ANY_EXTEND, DL, NewScalarVT, Node->getOperand(0)); - - for (unsigned j = 1; j != Node->getNumOperands(); ++j) - if (Node->getOperand(j).getValueType().isVector() && - !(ISD::isVPOpcode(Node->getOpcode()) && - ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand. - // promote the vector operand. - if (Node->getOperand(j).getValueType().isFloatingPoint()) - Operands[j] = - DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); - else - Operands[j] = - DAG.getNode(ISD::ANY_EXTEND, DL, NewVecVT, Node->getOperand(j)); - else - Operands[j] = Node->getOperand(j); // Skip VL operand. - - SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands, - Node->getFlags()); - - if (ScalarVT.isFloatingPoint()) - Res = DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res, - DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); - else - Res = DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, Res); - - Results.push_back(Res); -} - void VectorLegalizer::PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results) { MVT VecVT = Node->getOperand(0).getSimpleValueType(); @@ -697,23 +679,6 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { // Promote the operation by extending the operand. PromoteFP_TO_INT(Node, Results); return; - case ISD::VP_REDUCE_ADD: - case ISD::VP_REDUCE_MUL: - case ISD::VP_REDUCE_AND: - case ISD::VP_REDUCE_OR: - case ISD::VP_REDUCE_XOR: - case ISD::VP_REDUCE_SMAX: - case ISD::VP_REDUCE_SMIN: - case ISD::VP_REDUCE_UMAX: - case ISD::VP_REDUCE_UMIN: - case ISD::VP_REDUCE_FADD: - case ISD::VP_REDUCE_FMUL: - case ISD::VP_REDUCE_FMAX: - case ISD::VP_REDUCE_FMIN: - case ISD::VP_REDUCE_SEQ_FADD: - // Promote the operation by extending the operand. - PromoteReduction(Node, Results); - return; case ISD::VP_SETCC: case ISD::SETCC: // Promote the operation by extending the operand. @@ -966,6 +931,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + if (SDValue Expanded = TLI.expandAVG(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::BITREVERSE: ExpandBITREVERSE(Node, Results); return; @@ -1038,6 +1012,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::FMINIMUM: + case ISD::FMAXIMUM: + Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)); + return; case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -1139,11 +1117,27 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::VP_MERGE: Results.push_back(ExpandVP_MERGE(Node)); return; + case ISD::FREM: + if (tryExpandVecMathCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128, Results)) + return; + + break; + case ISD::VECTOR_COMPRESS: + Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG)); + return; } SDValue Unrolled = DAG.UnrollVectorOp(Node); - for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) - Results.push_back(Unrolled.getValue(I)); + if (Node->getNumValues() == 1) { + Results.push_back(Unrolled); + } else { + assert(Node->getNumValues() == Unrolled->getNumValues() && + "VectorLegalizer Expand returned wrong number of results!"); + for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) + Results.push_back(Unrolled.getValue(I)); + } } SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { @@ -1842,6 +1836,117 @@ void VectorLegalizer::ExpandREM(SDNode *Node, Results.push_back(Result); } +// Try to expand libm nodes into vector math routine calls. Callers provide the +// LibFunc equivalent of the passed in Node, which is used to lookup mappings +// within TargetLibraryInfo. The only mappings considered are those where the +// result and all operands are the same vector type. While predicated nodes are +// not supported, we will emit calls to masked routines by passing in an all +// true mask. +bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, + SmallVectorImpl<SDValue> &Results) { + // Chain must be propagated but currently strict fp operations are down + // converted to their none strict counterpart. + assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!"); + + const char *LCName = TLI.getLibcallName(LC); + if (!LCName) + return false; + LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n"); + + EVT VT = Node->getValueType(0); + ElementCount VL = VT.getVectorElementCount(); + + // Lookup a vector function equivalent to the specified libcall. Prefer + // unmasked variants but we will generate a mask if need be. + const TargetLibraryInfo &TLibInfo = DAG.getLibInfo(); + const VecDesc *VD = TLibInfo.getVectorMappingInfo(LCName, VL, false); + if (!VD) + VD = TLibInfo.getVectorMappingInfo(LCName, VL, /*Masked=*/true); + if (!VD) + return false; + + LLVMContext *Ctx = DAG.getContext(); + Type *Ty = VT.getTypeForEVT(*Ctx); + Type *ScalarTy = Ty->getScalarType(); + + // Construct a scalar function type based on Node's operands. + SmallVector<Type *, 8> ArgTys; + for (unsigned i = 0; i < Node->getNumOperands(); ++i) { + assert(Node->getOperand(i).getValueType() == VT && + "Expected matching vector types!"); + ArgTys.push_back(ScalarTy); + } + FunctionType *ScalarFTy = FunctionType::get(ScalarTy, ArgTys, false); + + // Generate call information for the vector function. + const std::string MangledName = VD->getVectorFunctionABIVariantString(); + auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy); + if (!OptVFInfo) + return false; + + LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName() + << "\n"); + + // Sanity check just in case OptVFInfo has unexpected parameters. + if (OptVFInfo->Shape.Parameters.size() != + Node->getNumOperands() + VD->isMasked()) + return false; + + // Collect vector call operands. + + SDLoc DL(Node); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.IsSExt = false; + Entry.IsZExt = false; + + unsigned OpNum = 0; + for (auto &VFParam : OptVFInfo->Shape.Parameters) { + if (VFParam.ParamKind == VFParamKind::GlobalPredicate) { + EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *Ctx, VT); + Entry.Node = DAG.getBoolConstant(true, DL, MaskVT, VT); + Entry.Ty = MaskVT.getTypeForEVT(*Ctx); + Args.push_back(Entry); + continue; + } + + // Only vector operands are supported. + if (VFParam.ParamKind != VFParamKind::Vector) + return false; + + Entry.Node = Node->getOperand(OpNum++); + Entry.Ty = Ty; + Args.push_back(Entry); + } + + // Emit a call to the vector function. + SDValue Callee = DAG.getExternalSymbol(VD->getVectorFnName().data(), + TLI.getPointerTy(DAG.getDataLayout())); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallingConv::C, Ty, Callee, std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + Results.push_back(CallResult.first); + return true; +} + +/// Try to expand the node to a vector libcall based on the result type. +bool VectorLegalizer::tryExpandVecMathCall( + SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) { + RTLIB::Libcall LC = RTLIB::getFPLibCall( + Node->getValueType(0).getVectorElementType(), Call_F32, Call_F64, + Call_F80, Call_F128, Call_PPCF128); + + if (LC == RTLIB::UNKNOWN_LIBCALL) + return false; + + return tryExpandVecMathCall(Node, LC, Results); +} + void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results) { EVT VT = Node->getValueType(0); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7fc252600534..92b62ccdc275 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TypeSize.h" @@ -84,8 +85,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -106,7 +111,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: + case ISD::FTAN: + case ISD::FTANH: case ISD::FTRUNC: case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: @@ -116,11 +124,18 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCANONICALIZE: R = ScalarizeVecRes_UnaryOp(N); break; + case ISD::ADDRSPACECAST: + R = ScalarizeVecRes_ADDRSPACECAST(N); + break; case ISD::FFREXP: R = ScalarizeVecRes_FFREXP(N, ResNo); break; case ISD::ADD: case ISD::AND: + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: case ISD::FADD: case ISD::FCOPYSIGN: case ISD::FDIV: @@ -164,6 +179,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ROTR: R = ScalarizeVecRes_BinOp(N); break; + + case ISD::SCMP: + case ISD::UCMP: + R = ScalarizeVecRes_CMP(N); + break; + case ISD::FMA: case ISD::FSHL: case ISD::FSHR: @@ -213,6 +234,27 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { LHS.getValueType(), LHS, RHS, N->getFlags()); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_CMP(SDNode *N) { + SDLoc DL(N); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (getTypeAction(LHS.getValueType()) == + TargetLowering::TypeScalarizeVector) { + LHS = GetScalarizedVector(LHS); + RHS = GetScalarizedVector(RHS); + } else { + EVT VT = LHS.getValueType().getVectorElementType(); + LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getVectorIdxConstant(0, DL)); + RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getVectorIdxConstant(0, DL)); + } + + return DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0).getVectorElementType(), LHS, RHS); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); @@ -475,6 +517,31 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) { llvm_unreachable("Illegal extend_vector_inreg opcode"); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ADDRSPACECAST(SDNode *N) { + EVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + // This is a workaround for targets where it's impossible to scalarize the + // result of a conversion, because the source type is legal. + // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} + // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is + // legal and was not scalarized. + // See the similar logic in ScalarizeVecRes_SETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getVectorIdxConstant(0, DL)); + } + auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N); + unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace(); + unsigned DestAS = AddrSpaceCastN->getDestAddressSpace(); + return DAG.getAddrSpaceCast(DL, DestVT, Op, SrcAS, DestAS); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { // If the operand is wider than the vector element type then it is implicitly // truncated. Make that explicit here. @@ -741,6 +808,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VECREDUCE_SEQ_FMUL: Res = ScalarizeVecOp_VECREDUCE_SEQ(N); break; + case ISD::SCMP: + case ISD::UCMP: + Res = ScalarizeVecOp_CMP(N); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -961,6 +1032,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) { AccOp, Op, N->getFlags()); } +SDValue DAGTypeLegalizer::ScalarizeVecOp_CMP(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + + EVT ResVT = N->getValueType(0).getVectorElementType(); + SDValue Cmp = DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, LHS, RHS); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Cmp); +} + //===----------------------------------------------------------------------===// // Result Vector Splitting //===----------------------------------------------------------------------===// @@ -1005,6 +1085,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break; case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::EXPERIMENTAL_VP_SPLAT: SplitVecRes_VP_SPLAT(N, Lo, Hi); break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: SplitVecRes_ScalarOp(N, Lo, Hi); @@ -1029,6 +1110,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_GATHER: SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true); break; + case ISD::VECTOR_COMPRESS: + SplitVecRes_VECTOR_COMPRESS(N, Lo, Hi); + break; case ISD::SETCC: case ISD::VP_SETCC: SplitVecRes_SETCC(N, Lo, Hi); @@ -1075,9 +1159,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: case ISD::VP_CTPOP: case ISD::FABS: case ISD::VP_FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCEIL: case ISD::VP_FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -1102,13 +1190,18 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FRINT: case ISD::VP_FRINT: case ISD::LRINT: + case ISD::VP_LRINT: case ISD::LLRINT: + case ISD::VP_LLRINT: case ISD::FROUND: case ISD::VP_FROUND: case ISD::FROUNDEVEN: case ISD::VP_FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::VP_SQRT: + case ISD::FTAN: + case ISD::FTANH: case ISD::FTRUNC: case ISD::VP_FROUNDTOZERO: case ISD::SINT_TO_FP: @@ -1120,6 +1213,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); break; + case ISD::ADDRSPACECAST: + SplitVecRes_ADDRSPACECAST(N, Lo, Hi); + break; case ISD::FFREXP: SplitVecRes_FFREXP(N, ResNo, Lo, Hi); break; @@ -1137,11 +1233,19 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::MUL: case ISD::VP_MUL: case ISD::MULHS: case ISD::MULHU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: case ISD::FADD: case ISD::VP_FADD: case ISD::FSUB: case ISD::VP_FSUB: case ISD::FMUL: case ISD::VP_FMUL: - case ISD::FMINNUM: case ISD::VP_FMINNUM: - case ISD::FMAXNUM: case ISD::VP_FMAXNUM: + case ISD::FMINNUM: + case ISD::FMINNUM_IEEE: + case ISD::VP_FMINNUM: + case ISD::FMAXNUM: + case ISD::FMAXNUM_IEEE: + case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::VP_FMINIMUM: case ISD::FMAXIMUM: @@ -1154,8 +1258,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::OR: case ISD::VP_OR: case ISD::XOR: case ISD::VP_XOR: case ISD::SHL: case ISD::VP_SHL: - case ISD::SRA: case ISD::VP_ASHR: - case ISD::SRL: case ISD::VP_LSHR: + case ISD::SRA: case ISD::VP_SRA: + case ISD::SRL: case ISD::VP_SRL: case ISD::UREM: case ISD::VP_UREM: case ISD::SREM: case ISD::VP_SREM: case ISD::FREM: case ISD::VP_FREM: @@ -1163,10 +1267,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMAX: case ISD::VP_SMAX: case ISD::UMIN: case ISD::VP_UMIN: case ISD::UMAX: case ISD::VP_UMAX: - case ISD::SADDSAT: - case ISD::UADDSAT: - case ISD::SSUBSAT: - case ISD::USUBSAT: + case ISD::SADDSAT: case ISD::VP_SADDSAT: + case ISD::UADDSAT: case ISD::VP_UADDSAT: + case ISD::SSUBSAT: case ISD::VP_SSUBSAT: + case ISD::USUBSAT: case ISD::VP_USUBSAT: case ISD::SSHLSAT: case ISD::USHLSAT: case ISD::ROTL: @@ -1182,6 +1286,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_TernaryOp(N, Lo, Hi); break; + case ISD::SCMP: case ISD::UCMP: + SplitVecRes_CMP(N, Lo, Hi); + break; + #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" @@ -1325,6 +1433,27 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, {Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags); } +void DAGTypeLegalizer::SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi) { + LLVMContext &Ctxt = *DAG.getContext(); + SDLoc dl(N); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypeSplitVector) { + GetSplitVector(LHS, LHSLo, LHSHi); + GetSplitVector(RHS, RHSLo, RHSHi); + } else { + std::tie(LHSLo, LHSHi) = DAG.SplitVector(LHS, dl); + std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, dl); + } + + EVT SplitResVT = N->getValueType(0).getHalfNumVectorElementsVT(Ctxt); + Lo = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSLo, RHSLo); + Hi = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSHi, RHSHi); +} + void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); @@ -1386,6 +1515,13 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, report_fatal_error("Scalarization of scalable vectors is not supported."); } + if (LoVT.isScalableVector()) { + auto [InLo, InHi] = DAG.SplitVectorOperand(N, 0); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, InLo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, InHi); + return; + } + // In the general case, convert the input to an integer and split it by hand. EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); @@ -1784,17 +1920,12 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, } } - // See if the target wants to custom expand this node. - if (CustomLowerNode(N, N->getValueType(0), true)) - return; - // Make the vector elements byte-addressable if they aren't already. EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); - if (VecVT.getScalarSizeInBits() < 8) { - EltVT = MVT::i8; - VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - VecVT.getVectorElementCount()); + if (!EltVT.isByteSized()) { + EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext()); + VecVT = VecVT.changeElementType(EltVT); Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); // Extend the element type to match if needed. if (EltVT.bitsGT(Elt.getValueType())) @@ -1880,6 +2011,16 @@ void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, } } +void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); + auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1)); + auto [EVLLo, EVLHi] = DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0), MaskLo, EVLLo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi); +} + void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); @@ -1965,7 +2106,8 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( LD->getPointerInfo(), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, Alignment, LD->getAAInfo(), LD->getRanges()); + LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(), + LD->getRanges()); Lo = DAG.getLoadVP(LD->getAddressingMode(), ExtType, LoVT, dl, Ch, Ptr, Offset, @@ -1988,8 +2130,8 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, - LD->getAAInfo(), LD->getRanges()); + MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), + Alignment, LD->getAAInfo(), LD->getRanges()); Hi = DAG.getLoadVP(LD->getAddressingMode(), ExtType, HiVT, dl, Ch, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO, @@ -2068,8 +2210,8 @@ void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()), - MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, - SLD->getAAInfo(), SLD->getRanges()); + MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), + Alignment, SLD->getAAInfo(), SLD->getRanges()); Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(), HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(), @@ -2128,7 +2270,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MLD->getPointerInfo(), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(), + LocationSize::beforeOrAfterPointer(), Alignment, MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, @@ -2152,8 +2294,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, - MLD->getAAInfo(), MLD->getRanges()); + MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -2215,7 +2357,8 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo, MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( N->getPointerInfo(), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); + LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), + N->getRanges()); if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) { SDValue PassThru = MGT->getPassThru(); @@ -2261,6 +2404,17 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo, ReplaceValueWith(SDValue(N, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // This is not "trivial", as there is a dependency between the two subvectors. + // Depending on the number of 1s in the mask, the elements from the Hi vector + // need to be moved to the Lo vector. So we just perform this as one "big" + // operation and then extract the Lo and Hi vectors from that. This gets rid + // of VECTOR_COMPRESS and all other operands can be legalized later. + SDValue Compressed = TLI.expandVECTOR_COMPRESS(N, DAG); + std::tie(Lo, Hi) = DAG.SplitVector(Compressed, SDLoc(N)); +} + void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -2342,6 +2496,26 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags); } +void DAGTypeLegalizer::SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); + + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. + EVT InVT = N->getOperand(0).getValueType(); + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(0), Lo, Hi); + else + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + + auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N); + unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace(); + unsigned DestAS = AddrSpaceCastN->getDestAddressSpace(); + Lo = DAG.getAddrSpaceCast(dl, LoVT, Lo, SrcAS, DestAS); + Hi = DAG.getAddrSpaceCast(dl, HiVT, Hi, SrcAS, DestAS); +} + void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -2848,18 +3022,10 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, SDValue &Hi) { - EVT VT = N->getValueType(0); SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); - SDValue Expanded = TLI.expandVectorSplice(N, DAG); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded, - DAG.getVectorIdxConstant(0, DL)); - Hi = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded, - DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); + std::tie(Lo, Hi) = DAG.SplitVector(Expanded, DL); } void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, @@ -2882,10 +3048,10 @@ void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand( - PtrInfo, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, + PtrInfo, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), Alignment); MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand( - PtrInfo, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, + PtrInfo, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), Alignment); unsigned EltWidth = VT.getScalarSizeInBits() / 8; @@ -2904,12 +3070,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO); - auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Load, - DAG.getVectorIdxConstant(0, DL)); - Hi = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Load, - DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); + std::tie(Lo, Hi) = DAG.SplitVector(Load, DL); } void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) { @@ -2970,6 +3131,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { "operand!\n"); case ISD::VP_SETCC: + case ISD::STRICT_FSETCC: case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; @@ -3043,6 +3205,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { Res = SplitVecOp_FPOpDifferentTypes(N); break; + case ISD::SCMP: + case ISD::UCMP: + Res = SplitVecOp_CMP(N); + break; + case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: @@ -3085,8 +3252,14 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VP_REDUCE_UMIN: case ISD::VP_REDUCE_FMAX: case ISD::VP_REDUCE_FMIN: + case ISD::VP_REDUCE_FMAXIMUM: + case ISD::VP_REDUCE_FMINIMUM: Res = SplitVecOp_VP_REDUCE(N, OpNo); break; + case ISD::VP_CTTZ_ELTS: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + Res = SplitVecOp_VP_CttzElements(N); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -3255,16 +3428,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will // end up being split all the way down to individual components. Convert the // split pieces into integers and reassemble. + EVT ResVT = N->getValueType(0); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); + SDLoc dl(N); + + if (ResVT.isScalableVector()) { + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(ResVT); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); + } + Lo = BitConvertToInteger(Lo); Hi = BitConvertToInteger(Hi); if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); - return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), - JoinIntegers(Lo, Hi)); + return DAG.getNode(ISD::BITCAST, dl, ResVT, JoinIntegers(Lo, Hi)); } SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, @@ -3377,11 +3559,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Make the vector elements byte-addressable if they aren't already. SDLoc dl(N); EVT EltVT = VecVT.getVectorElementType(); - if (VecVT.getScalarSizeInBits() < 8) { - EltVT = MVT::i8; - VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - VecVT.getVectorElementCount()); + if (!EltVT.isByteSized()) { + EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext()); + VecVT = VecVT.changeElementType(EltVT); Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); + SDValue NewExtract = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, Idx); + return DAG.getAnyExtOrTrunc(NewExtract, dl, N->getValueType(0)); } // Store the vector to the stack. @@ -3399,13 +3583,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - // FIXME: This is to handle i1 vectors with elements promoted to i8. - // i1 vector handling needs general improvement. - if (N->getValueType(0).bitsLT(EltVT)) { - SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr, - MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); - return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0)); - } + // EXTRACT_VECTOR_ELT can extend the element type to the width of the return + // type, leaving the high bits undefined. But it can't truncate. + assert(N->getValueType(0).bitsGE(EltVT) && "Illegal EXTRACT_VECTOR_ELT."); return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, @@ -3476,7 +3656,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) { SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( N->getPointerInfo(), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); + LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), + N->getRanges()); Lo = DAG.getStoreVP(Ch, DL, DataLo, Ptr, Offset, MaskLo, EVLLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -3499,8 +3680,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) { LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, - N->getAAInfo(), N->getRanges()); + MPI, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), + Alignment, N->getAAInfo(), N->getRanges()); Hi = DAG.getStoreVP(Ch, DL, DataHi, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -3572,8 +3753,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(N->getPointerInfo().getAddrSpace()), - MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, - N->getAAInfo(), N->getRanges()); + MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), + Alignment, N->getAAInfo(), N->getRanges()); SDValue Hi = DAG.getStridedStoreVP( N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask, @@ -3624,7 +3805,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, SDValue Lo, Hi, Res; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( N->getPointerInfo(), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); + LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), + N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -3649,8 +3831,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, - N->getAAInfo(), N->getRanges()); + MPI, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), + Alignment, N->getAAInfo(), N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -3714,7 +3896,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) { SDValue Lo; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( N->getPointerInfo(), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); + LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), + N->getRanges()); if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) { SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale}; @@ -3916,14 +4099,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { } SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { + bool isStrict = N->getOpcode() == ISD::STRICT_FSETCC; assert(N->getValueType(0).isVector() && - N->getOperand(0).getValueType().isVector() && + N->getOperand(isStrict ? 1 : 0).getValueType().isVector() && "Operand types must be vectors"); // The result has a legal vector type, but the input needs splitting. SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; SDLoc DL(N); - GetSplitVector(N->getOperand(0), Lo0, Hi0); - GetSplitVector(N->getOperand(1), Lo1, Hi1); + GetSplitVector(N->getOperand(isStrict ? 1 : 0), Lo0, Hi0); + GetSplitVector(N->getOperand(isStrict ? 2 : 1), Lo1, Hi1); + auto PartEltCnt = Lo0.getValueType().getVectorElementCount(); LLVMContext &Context = *DAG.getContext(); @@ -3933,6 +4118,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { if (N->getOpcode() == ISD::SETCC) { LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + } else if (N->getOpcode() == ISD::STRICT_FSETCC) { + LoRes = DAG.getNode(ISD::STRICT_FSETCC, DL, + DAG.getVTList(PartResVT, N->getValueType(1)), + N->getOperand(0), Lo0, Lo1, N->getOperand(3)); + HiRes = DAG.getNode(ISD::STRICT_FSETCC, DL, + DAG.getVTList(PartResVT, N->getValueType(1)), + N->getOperand(0), Hi0, Hi1, N->getOperand(3)); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LoRes.getValue(1), HiRes.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); } else { assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); SDValue MaskLo, MaskHi, EVLLo, EVLHi; @@ -4016,6 +4211,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi); } +SDValue DAGTypeLegalizer::SplitVecOp_CMP(SDNode *N) { + LLVMContext &Ctxt = *DAG.getContext(); + SDLoc dl(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + + EVT ResVT = N->getValueType(0); + ElementCount SplitOpEC = LHSLo.getValueType().getVectorElementCount(); + EVT NewResVT = + EVT::getVectorVT(Ctxt, ResVT.getVectorElementType(), SplitOpEC); + + SDValue Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSLo, RHSLo); + SDValue Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSHi, RHSHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} + SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) { EVT ResVT = N->getValueType(0); SDValue Lo, Hi; @@ -4033,6 +4247,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } +SDValue DAGTypeLegalizer::SplitVecOp_VP_CttzElements(SDNode *N) { + SDLoc DL(N); + EVT ResVT = N->getValueType(0); + + SDValue Lo, Hi; + SDValue VecOp = N->getOperand(0); + GetSplitVector(VecOp, Lo, Hi); + + auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1)); + auto [EVLLo, EVLHi] = + DAG.SplitEVL(N->getOperand(2), VecOp.getValueType(), DL); + SDValue VLo = DAG.getZExtOrTrunc(EVLLo, DL, ResVT); + + // if VP_CTTZ_ELTS(Lo) != EVLLo => VP_CTTZ_ELTS(Lo). + // else => EVLLo + (VP_CTTZ_ELTS(Hi) or VP_CTTZ_ELTS_ZERO_UNDEF(Hi)). + SDValue ResLo = DAG.getNode(ISD::VP_CTTZ_ELTS, DL, ResVT, Lo, MaskLo, EVLLo); + SDValue ResLoNotEVL = + DAG.getSetCC(DL, getSetCCResultType(ResVT), ResLo, VLo, ISD::SETNE); + SDValue ResHi = DAG.getNode(N->getOpcode(), DL, ResVT, Hi, MaskHi, EVLHi); + return DAG.getSelect(DL, ResVT, ResLoNotEVL, ResLo, + DAG.getNode(ISD::ADD, DL, ResVT, VLo, ResHi)); +} + //===----------------------------------------------------------------------===// // Result Vector Widening //===----------------------------------------------------------------------===// @@ -4071,6 +4308,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { report_fatal_error("Do not know how to widen the result of this operator!"); case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; + case ISD::ADDRSPACECAST: + Res = WidenVecRes_ADDRSPACECAST(N); + break; case ISD::AssertZext: Res = WidenVecRes_AssertZext(N); break; case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; @@ -4084,6 +4324,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::EXPERIMENTAL_VP_SPLAT: Res = WidenVecRes_ScalarOp(N); break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; @@ -4106,6 +4347,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N)); break; + case ISD::VECTOR_COMPRESS: + Res = WidenVecRes_VECTOR_COMPRESS(N); + break; case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); break; @@ -4128,10 +4372,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SUB: case ISD::VP_SUB: case ISD::XOR: case ISD::VP_XOR: case ISD::SHL: case ISD::VP_SHL: - case ISD::SRA: case ISD::VP_ASHR: - case ISD::SRL: case ISD::VP_LSHR: - case ISD::FMINNUM: case ISD::VP_FMINNUM: - case ISD::FMAXNUM: case ISD::VP_FMAXNUM: + case ISD::SRA: case ISD::VP_SRA: + case ISD::SRL: case ISD::VP_SRL: + case ISD::FMINNUM: + case ISD::FMINNUM_IEEE: + case ISD::VP_FMINNUM: + case ISD::FMAXNUM: + case ISD::FMAXNUM_IEEE: + case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::VP_FMINIMUM: case ISD::FMAXIMUM: @@ -4140,10 +4388,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMAX: case ISD::VP_SMAX: case ISD::UMIN: case ISD::VP_UMIN: case ISD::UMAX: case ISD::VP_UMAX: - case ISD::UADDSAT: - case ISD::SADDSAT: - case ISD::USUBSAT: - case ISD::SSUBSAT: + case ISD::UADDSAT: case ISD::VP_UADDSAT: + case ISD::SADDSAT: case ISD::VP_SADDSAT: + case ISD::USUBSAT: case ISD::VP_USUBSAT: + case ISD::SSUBSAT: case ISD::VP_SSUBSAT: case ISD::SSHLSAT: case ISD::USHLSAT: case ISD::ROTL: @@ -4170,6 +4418,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Binary(N); break; + case ISD::SCMP: + case ISD::UCMP: + Res = WidenVecRes_CMP(N); + break; + case ISD::FPOW: case ISD::FREM: if (unrollExpandedOp()) @@ -4219,7 +4472,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::IS_FPCLASS: - Res = WidenVecRes_IS_FPCLASS(N); + case ISD::FPTRUNC_ROUND: + Res = WidenVecRes_UnarySameEltsWithScalarArg(N); break; case ISD::FLDEXP: @@ -4263,12 +4517,18 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::LRINT: case ISD::LLRINT: + case ISD::VP_LRINT: + case ISD::VP_LLRINT: Res = WidenVecRes_XRINT(N); break; case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -4281,7 +4541,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: + case ISD::FTAN: + case ISD::FTANH: case ISD::FTRUNC: if (unrollExpandedOp()) break; @@ -4373,6 +4636,28 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags()); } +SDValue DAGTypeLegalizer::WidenVecRes_CMP(SDNode *N) { + LLVMContext &Ctxt = *DAG.getContext(); + SDLoc dl(N); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT OpVT = LHS.getValueType(); + if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector) { + LHS = GetWidenedVector(LHS); + RHS = GetWidenedVector(RHS); + OpVT = LHS.getValueType(); + } + + EVT WidenResVT = TLI.getTypeToTransformTo(Ctxt, N->getValueType(0)); + ElementCount WidenResEC = WidenResVT.getVectorElementCount(); + if (WidenResEC == OpVT.getVectorElementCount()) { + return DAG.getNode(N->getOpcode(), dl, WidenResVT, LHS, RHS); + } + + return DAG.UnrollVectorOp(N, WidenResVT.getVectorNumElements()); +} + SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { // Binary op widening, but with an extra operand that shouldn't be widened. SDLoc dl(N); @@ -4869,7 +5154,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) { if (WidenNumElts != SrcVT.getVectorElementCount()) return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue()); - return DAG.getNode(N->getOpcode(), dl, WidenVT, Src); + if (N->getNumOperands() == 1) + return DAG.getNode(N->getOpcode(), dl, WidenVT, Src); + + assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue Mask = + GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); + return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, Mask, N->getOperand(2)); } SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { @@ -4971,7 +5264,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } -SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) { +/// Result and first source operand are different scalar types, but must have +/// the same number of elements. There is an additional control argument which +/// should be passed through unchanged. +SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) { SDValue FpValue = N->getOperand(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector) @@ -4985,7 +5281,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue RHS = N->getOperand(1); - SDValue ExpOp = RHS.getValueType().isVector() ? GetWidenedVector(RHS) : RHS; + EVT ExpVT = RHS.getValueType(); + SDValue ExpOp = RHS; + if (ExpVT.isVector()) { + EVT WideExpVT = + WidenVT.changeVectorElementType(ExpVT.getVectorElementType()); + ExpOp = ModifyToType(RHS, WideExpVT); + } return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp); } @@ -5022,6 +5324,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { return GetWidenedVector(WidenVec); } +SDValue DAGTypeLegalizer::WidenVecRes_ADDRSPACECAST(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N); + + return DAG.getAddrSpaceCast(SDLoc(N), WidenVT, InOp, + AddrSpaceCastN->getSrcAddressSpace(), + AddrSpaceCastN->getDestAddressSpace()); +} + SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); @@ -5464,6 +5776,23 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_COMPRESS(SDNode *N) { + SDValue Vec = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue Passthru = N->getOperand(2); + EVT WideVecVT = + TLI.getTypeToTransformTo(*DAG.getContext(), Vec.getValueType()); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + Mask.getValueType().getVectorElementType(), + WideVecVT.getVectorNumElements()); + + SDValue WideVec = ModifyToType(Vec, WideVecVT); + SDValue WideMask = ModifyToType(Mask, WideMaskVT, /*FillWithZeroes=*/true); + SDValue WidePassthru = ModifyToType(Passthru, WideVecVT); + return DAG.getNode(ISD::VECTOR_COMPRESS, SDLoc(N), WideVecVT, WideVec, + WideMask, WidePassthru); +} + SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); @@ -5552,6 +5881,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + if (N->isVPOpcode()) + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0)); } @@ -6065,6 +6397,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { Res = WidenVecOp_EXTEND(N); break; + case ISD::SCMP: + case ISD::UCMP: + Res = WidenVecOp_CMP(N); + break; + case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: case ISD::FP_ROUND: @@ -6086,6 +6423,10 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { Res = WidenVecOp_FP_TO_XINT_SAT(N); break; + case ISD::EXPERIMENTAL_VP_SPLAT: + Res = WidenVecOp_VP_SPLAT(N, OpNo); + break; + case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -6122,8 +6463,14 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VP_REDUCE_UMIN: case ISD::VP_REDUCE_FMAX: case ISD::VP_REDUCE_FMIN: + case ISD::VP_REDUCE_FMAXIMUM: + case ISD::VP_REDUCE_FMINIMUM: Res = WidenVecOp_VP_REDUCE(N); break; + case ISD::VP_CTTZ_ELTS: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + Res = WidenVecOp_VP_CttzElements(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -6205,6 +6552,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { } } +SDValue DAGTypeLegalizer::WidenVecOp_CMP(SDNode *N) { + SDLoc dl(N); + + EVT OpVT = N->getOperand(0).getValueType(); + EVT ResVT = N->getValueType(0); + SDValue LHS = GetWidenedVector(N->getOperand(0)); + SDValue RHS = GetWidenedVector(N->getOperand(1)); + + // 1. EXTRACT_SUBVECTOR + // 2. SIGN_EXTEND/ZERO_EXTEND + // 3. CMP + LHS = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, LHS, + DAG.getVectorIdxConstant(0, dl)); + RHS = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, RHS, + DAG.getVectorIdxConstant(0, dl)); + + // At this point the result type is guaranteed to be valid, so we can use it + // as the operand type by extending it appropriately + ISD::NodeType ExtendOpcode = + N->getOpcode() == ISD::SCMP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + LHS = DAG.getNode(ExtendOpcode, dl, ResVT, LHS); + RHS = DAG.getNode(ExtendOpcode, dl, ResVT, RHS); + + return DAG.getNode(N->getOpcode(), dl, ResVT, LHS, RHS); +} + SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) { // The result (and first input) is legal, but the second input is illegal. // We can't do much to fix that, so just unroll and let the extracts off of @@ -6514,6 +6887,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { report_fatal_error("Unable to widen vector store"); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Can widen only mask operand of vp_splat"); + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), GetWidenedVector(N->getOperand(1)), + N->getOperand(2)); +} + SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of vp_store"); @@ -6887,6 +7267,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { DAG.getVectorIdxConstant(0, DL)); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_CttzElements(SDNode *N) { + SDLoc DL(N); + SDValue Source = GetWidenedVector(N->getOperand(0)); + EVT SrcVT = Source.getValueType(); + SDValue Mask = + GetWidenedMask(N->getOperand(1), SrcVT.getVectorElementCount()); + + return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), + {Source, Mask, N->getOperand(2)}, N->getFlags()); +} + //===----------------------------------------------------------------------===// // Vector Widening Utilities //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/MatchContext.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/MatchContext.h new file mode 100644 index 000000000000..f965cb952f97 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/MatchContext.h @@ -0,0 +1,175 @@ +//===---------------- llvm/CodeGen/MatchContext.h --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the EmptyMatchContext class and VPMatchContext class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_MATCHCONTEXT_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_MATCHCONTEXT_H + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLowering.h" + +using namespace llvm; + +namespace { +class EmptyMatchContext { + SelectionDAG &DAG; + const TargetLowering &TLI; + SDNode *Root; + +public: + EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root) + : DAG(DAG), TLI(TLI), Root(Root) {} + + unsigned getRootBaseOpcode() { return Root->getOpcode(); } + bool match(SDValue OpN, unsigned Opcode) const { + return Opcode == OpN->getOpcode(); + } + + // Same as SelectionDAG::getNode(). + template <typename... ArgT> SDValue getNode(ArgT &&...Args) { + return DAG.getNode(std::forward<ArgT>(Args)...); + } + + bool isOperationLegal(unsigned Op, EVT VT) const { + return TLI.isOperationLegal(Op, VT); + } + + bool isOperationLegalOrCustom(unsigned Op, EVT VT, + bool LegalOnly = false) const { + return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly); + } +}; + +class VPMatchContext { + SelectionDAG &DAG; + const TargetLowering &TLI; + SDValue RootMaskOp; + SDValue RootVectorLenOp; + SDNode *Root; + +public: + VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *_Root) + : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() { + Root = _Root; + assert(Root->isVPOpcode()); + if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode())) + RootMaskOp = Root->getOperand(*RootMaskPos); + else if (Root->getOpcode() == ISD::VP_SELECT) + RootMaskOp = DAG.getAllOnesConstant(SDLoc(Root), + Root->getOperand(0).getValueType()); + + if (auto RootVLenPos = ISD::getVPExplicitVectorLengthIdx(Root->getOpcode())) + RootVectorLenOp = Root->getOperand(*RootVLenPos); + } + + unsigned getRootBaseOpcode() { + std::optional<unsigned> Opcode = ISD::getBaseOpcodeForVP( + Root->getOpcode(), !Root->getFlags().hasNoFPExcept()); + assert(Opcode.has_value()); + return *Opcode; + } + + /// whether \p OpVal is a node that is functionally compatible with the + /// NodeType \p Opc + bool match(SDValue OpVal, unsigned Opc) const { + if (!OpVal->isVPOpcode()) + return OpVal->getOpcode() == Opc; + + auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(), + !OpVal->getFlags().hasNoFPExcept()); + if (BaseOpc != Opc) + return false; + + // Make sure the mask of OpVal is true mask or is same as Root's. + unsigned VPOpcode = OpVal->getOpcode(); + if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) { + SDValue MaskOp = OpVal.getOperand(*MaskPos); + if (RootMaskOp != MaskOp && + !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode())) + return false; + } + + // Make sure the EVL of OpVal is same as Root's. + if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode)) + if (RootVectorLenOp != OpVal.getOperand(*VLenPos)) + return false; + return true; + } + + // Specialize based on number of operands. + // TODO emit VP intrinsics where MaskOp/VectorLenOp != null + // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return + // DAG.getNode(Opcode, DL, VT); } + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) { + unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); + assert(ISD::getVPMaskIdx(VPOpcode) == 1 && + ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2); + return DAG.getNode(VPOpcode, DL, VT, + {Operand, RootMaskOp, RootVectorLenOp}); + } + + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2) { + unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); + assert(ISD::getVPMaskIdx(VPOpcode) == 2 && + ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3); + return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp}); + } + + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, SDValue N3) { + unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); + assert(ISD::getVPMaskIdx(VPOpcode) == 3 && + ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4); + return DAG.getNode(VPOpcode, DL, VT, + {N1, N2, N3, RootMaskOp, RootVectorLenOp}); + } + + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand, + SDNodeFlags Flags) { + unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); + assert(ISD::getVPMaskIdx(VPOpcode) == 1 && + ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2); + return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp}, + Flags); + } + + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, SDNodeFlags Flags) { + unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); + assert(ISD::getVPMaskIdx(VPOpcode) == 2 && + ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3); + return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp}, + Flags); + } + + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, SDValue N3, SDNodeFlags Flags) { + unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode); + assert(ISD::getVPMaskIdx(VPOpcode) == 3 && + ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4); + return DAG.getNode(VPOpcode, DL, VT, + {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags); + } + + bool isOperationLegal(unsigned Op, EVT VT) const { + unsigned VPOp = ISD::getVPForBaseOpcode(Op); + return TLI.isOperationLegal(VPOp, VT); + } + + bool isOperationLegalOrCustom(unsigned Op, EVT VT, + bool LegalOnly = false) const { + unsigned VPOp = ISD::getVPForBaseOpcode(Op); + return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly); + } +}; +} // end anonymous namespace +#endif diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index e3acb58327a8..de4a1ac2a3ba 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -622,11 +622,11 @@ void ScheduleDAGFast::ListScheduleBottomUp() { } // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { - NotReady[i]->isPending = false; + for (SUnit *SU : NotReady) { + SU->isPending = false; // May no longer be available due to backtracking. - if (NotReady[i]->isAvailable) - AvailableQueue.push(NotReady[i]); + if (SU->isAvailable) + AvailableQueue.push(SU); } NotReady.clear(); @@ -748,8 +748,7 @@ void ScheduleDAGLinearize::Schedule() { ++DAGSize; } - for (unsigned i = 0, e = Glues.size(); i != e; ++i) { - SDNode *Glue = Glues[i]; + for (SDNode *Glue : Glues) { SDNode *GUser = GluedMap[Glue]; unsigned Degree = Glue->getNodeId(); unsigned UDegree = GUser->getNodeId(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index dcecb2e0e7fa..e4ee3fd99f16 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" @@ -36,6 +35,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index c9e2745f00c9..f44added89a7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -512,7 +513,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { Dep.setLatency(OpLatency); if (!isChain && !UnitLatencies) { computeOperandLatency(OpN, N, i, Dep); - ST.adjustSchedDependency(OpSU, DefIdx, &SU, i, Dep); + ST.adjustSchedDependency(OpSU, DefIdx, &SU, i, Dep, nullptr); } if (!SU.addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { @@ -888,8 +889,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } if (MI->isCandidateForCallSiteEntry() && - DAG->getTarget().Options.EmitCallSiteInfo) - MF.addCallArgsForwardingRegs(MI, DAG->getCallSiteInfo(Node)); + DAG->getTarget().Options.EmitCallSiteInfo) { + MF.addCallSiteInfo(MI, DAG->getCallSiteInfo(Node)); + } if (DAG->getNoMergeSiteInfo(Node)) { MI->setFlag(MachineInstr::MIFlag::NoMerge); @@ -898,6 +900,14 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (MDNode *MD = DAG->getPCSections(Node)) MI->setPCSections(MF, MD); + // Set MMRAs on _all_ added instructions. + if (MDNode *MMRA = DAG->getMMRAMetadata(Node)) { + for (MachineBasicBlock::iterator It = MI->getIterator(), + End = std::next(After); + It != End; ++It) + It->setMMRAMetadata(MF, MMRA); + } + return MI; }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 439ccfdc3275..446df640821d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -16,9 +16,9 @@ #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Support/Casting.h" #include <cassert> #include <string> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9a22088d2c62..02d44cd36ae5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -36,8 +36,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" @@ -46,8 +46,8 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/Constant.h" -#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -75,12 +75,14 @@ #include <cstdint> #include <cstdlib> #include <limits> +#include <optional> #include <set> #include <string> #include <utility> #include <vector> using namespace llvm; +using namespace llvm::SDPatternMatch; /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. @@ -468,8 +470,10 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { case ISD::VP_REDUCE_FMIN: return ISD::FMINNUM; case ISD::VECREDUCE_FMAXIMUM: + case ISD::VP_REDUCE_FMAXIMUM: return ISD::FMAXIMUM; case ISD::VECREDUCE_FMINIMUM: + case ISD::VP_REDUCE_FMINIMUM: return ISD::FMINIMUM; } } @@ -909,10 +913,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { break; } case ISD::VECTOR_SHUFFLE: { - const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); - for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); - i != e; ++i) - ID.AddInteger(SVN->getMaskElt(i)); + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); + for (int M : Mask) + ID.AddInteger(M); break; } case ISD::TargetBlockAddress: @@ -1110,9 +1113,11 @@ void SelectionDAG::DeallocateNode(SDNode *N) { #ifndef NDEBUG /// VerifySDNode - Check the given SDNode. Aborts if it is invalid. -static void VerifySDNode(SDNode *N) { +static void VerifySDNode(SDNode *N, const TargetLowering *TLI) { switch (N->getOpcode()) { default: + if (N->getOpcode() > ISD::BUILTIN_OP_END) + TLI->verifyTargetSDNode(N); break; case ISD::BUILD_PAIR: { EVT VT = N->getValueType(0); @@ -1156,7 +1161,7 @@ void SelectionDAG::InsertNode(SDNode *N) { AllNodes.push_back(N); #ifndef NDEBUG N->PersistentId = NextPersistentId++; - VerifySDNode(N); + VerifySDNode(N, TLI); #endif for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) DUL->NodeInserted(N); @@ -1235,6 +1240,7 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { // If there was already an existing matching node, use ReplaceAllUsesWith // to replace the dead one with the existing one. This can cause // recursive merging of other unrelated nodes down the line. + Existing->intersectFlagsWith(N->getFlags()); ReplaceAllUsesWith(N, Existing); // N is now dead. Inform the listeners and delete it. @@ -1415,10 +1421,8 @@ void SelectionDAG::clear() { TargetExternalSymbols.clear(); MCSymbols.clear(); SDEI.clear(); - std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), - static_cast<CondCodeSDNode*>(nullptr)); - std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), - static_cast<SDNode*>(nullptr)); + std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), nullptr); + std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), nullptr); EntryNode.UseList = nullptr; InsertNode(&EntryNode); @@ -1466,14 +1470,14 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { } SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL, - EVT VT) { + EVT VT) { assert(!VT.isVector()); auto Type = Op.getValueType(); SDValue DestOp; if (Type == VT) return Op; auto Size = Op.getValueSizeInBits(); - DestOp = getBitcast(MVT::getIntegerVT(Size), Op); + DestOp = getBitcast(EVT::getIntegerVT(*Context, Size), Op); if (DestOp.getValueType() == VT) return DestOp; @@ -1537,6 +1541,25 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT)); } +SDValue SelectionDAG::getVPZeroExtendInReg(SDValue Op, SDValue Mask, + SDValue EVL, const SDLoc &DL, + EVT VT) { + EVT OpVT = Op.getValueType(); + assert(VT.isInteger() && OpVT.isInteger() && + "Cannot getVPZeroExtendInReg FP types"); + assert(VT.isVector() && OpVT.isVector() && + "getVPZeroExtendInReg type and operand type should be vector!"); + assert(VT.getVectorElementCount() == OpVT.getVectorElementCount() && + "Vector element counts must match in getZeroExtendInReg"); + assert(VT.bitsLE(OpVT) && "Not extending!"); + if (OpVT == VT) + return Op; + APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(), + VT.getScalarSizeInBits()); + return getNode(ISD::VP_AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT), Mask, + EVL); +} + SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { // Only unsigned pointer semantics are supported right now. In the future this // might delegate to TLI to check pointer signedness. @@ -1700,8 +1723,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, assert(Elt->getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"); unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; + SDVTList VTs = getVTList(EltVT); FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt); + AddNodeIDNode(ID, Opc, VTs, std::nullopt); ID.AddPointer(Elt); ID.AddBoolean(isO); void *IP = nullptr; @@ -1711,7 +1735,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, return SDValue(N, 0); if (!N) { - N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT); + N = newSDNode<ConstantSDNode>(isT, isO, Elt, VTs); CSEMap.InsertNode(N, IP); InsertNode(N); NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); @@ -1729,12 +1753,18 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, } SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, - const SDLoc &DL, bool LegalTypes) { + const SDLoc &DL) { assert(VT.isInteger() && "Shift amount is not an integer type!"); - EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes); + EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout()); return getConstant(Val, DL, ShiftVT); } +SDValue SelectionDAG::getShiftAmountConstant(const APInt &Val, EVT VT, + const SDLoc &DL) { + assert(Val.ult(VT.getScalarSizeInBits()) && "Out of range shift"); + return getShiftAmountConstant(Val.getZExtValue(), VT, DL); +} + SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget) { return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget); @@ -1755,8 +1785,9 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, // value, so that we don't have problems with 0.0 comparing equal to -0.0, and // we don't have issues with SNANs. unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; + SDVTList VTs = getVTList(EltVT); FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt); + AddNodeIDNode(ID, Opc, VTs, std::nullopt); ID.AddPointer(&V); void *IP = nullptr; SDNode *N = nullptr; @@ -1765,7 +1796,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, return SDValue(N, 0); if (!N) { - N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT); + N = newSDNode<ConstantFPSDNode>(isTarget, &V, VTs); CSEMap.InsertNode(N, IP); InsertNode(N); } @@ -1812,8 +1843,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, else Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; + SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); + AddNodeIDNode(ID, Opc, VTs, std::nullopt); ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -1822,7 +1854,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, return SDValue(E, 0); auto *N = newSDNode<GlobalAddressSDNode>( - Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags); + Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VTs, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1830,14 +1862,15 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; + SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); + AddNodeIDNode(ID, Opc, VTs, std::nullopt); ID.AddInteger(FI); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget); + auto *N = newSDNode<FrameIndexSDNode>(FI, VTs, isTarget); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1848,15 +1881,16 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; + SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); + AddNodeIDNode(ID, Opc, VTs, std::nullopt); ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags); + auto *N = newSDNode<JumpTableSDNode>(JTI, VTs, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1879,8 +1913,9 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, ? getDataLayout().getABITypeAlign(C->getType()) : getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); + AddNodeIDNode(ID, Opc, VTs, std::nullopt); ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); ID.AddPointer(C); @@ -1889,7 +1924,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment, + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VTs, Offset, *Alignment, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -1906,8 +1941,9 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, if (!Alignment) Alignment = getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); + AddNodeIDNode(ID, Opc, VTs, std::nullopt); ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); @@ -1916,7 +1952,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment, + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VTs, Offset, *Alignment, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -1954,7 +1990,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); - N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT); + N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, getVTList(VT)); InsertNode(N); return SDValue(N, 0); } @@ -1963,7 +1999,7 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { SDNode *&N = MCSymbols[Sym]; if (N) return SDValue(N, 0); - N = newSDNode<MCSymbolSDNode>(Sym, VT); + N = newSDNode<MCSymbolSDNode>(Sym, getVTList(VT)); InsertNode(N); return SDValue(N, 0); } @@ -1973,7 +2009,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, SDNode *&N = TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)]; if (N) return SDValue(N, 0); - N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT); + N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, getVTList(VT)); InsertNode(N); return SDValue(N, 0); } @@ -2024,7 +2060,8 @@ SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) { return getStepVector(DL, ResVT, One); } -SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal) { +SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, + const APInt &StepVal) { assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth()); if (ResVT.isScalableVector()) return getNode( @@ -2188,9 +2225,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, } } + SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; - AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops); + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, VTs, Ops); for (int i = 0; i != NElts; ++i) ID.AddInteger(MaskVec[i]); @@ -2204,7 +2242,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); llvm::copy(MaskVec, MaskAlloc); - auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(), + auto *N = newSDNode<ShuffleVectorSDNode>(VTs, dl.getIROrder(), dl.getDebugLoc(), MaskAlloc); createOperands(N, Ops); @@ -2226,14 +2264,15 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { } SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { + SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::Register, getVTList(VT), std::nullopt); + AddNodeIDNode(ID, ISD::Register, VTs, std::nullopt); ID.AddInteger(RegNo); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<RegisterSDNode>(RegNo, VT); + auto *N = newSDNode<RegisterSDNode>(RegNo, VTs); N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -2282,9 +2321,10 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset, bool isTarget, unsigned TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; + SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); + AddNodeIDNode(ID, Opc, VTs, std::nullopt); ID.AddPointer(BA); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -2292,7 +2332,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags); + auto *N = newSDNode<BlockAddressSDNode>(Opc, VTs, BA, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -2337,9 +2377,10 @@ SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS) { + SDVTList VTs = getVTList(VT); SDValue Ops[] = {Ptr}; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops); + AddNodeIDNode(ID, ISD::ADDRSPACECAST, VTs, Ops); ID.AddInteger(SrcAS); ID.AddInteger(DestAS); @@ -2348,7 +2389,7 @@ SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, return SDValue(E, 0); auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VT, SrcAS, DestAS); + VTs, SrcAS, DestAS); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -2969,78 +3010,117 @@ SDValue SelectionDAG::getSplatValue(SDValue V, bool LegalTypes) { return SDValue(); } -const APInt * -SelectionDAG::getValidShiftAmountConstant(SDValue V, - const APInt &DemandedElts) const { +std::optional<ConstantRange> +SelectionDAG::getValidShiftAmountRange(SDValue V, const APInt &DemandedElts, + unsigned Depth) const { assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || V.getOpcode() == ISD::SRA) && "Unknown shift node"); + // Shifting more than the bitwidth is not valid. unsigned BitWidth = V.getScalarValueSizeInBits(); - if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) { - // Shifting more than the bitwidth is not valid. - const APInt &ShAmt = SA->getAPIntValue(); - if (ShAmt.ult(BitWidth)) - return &ShAmt; + + if (auto *Cst = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + const APInt &ShAmt = Cst->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return std::nullopt; + return ConstantRange(ShAmt); } - return nullptr; + + if (auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1))) { + const APInt *MinAmt = nullptr, *MaxAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; + auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); + if (!SA) { + MinAmt = MaxAmt = nullptr; + break; + } + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return std::nullopt; + if (!MinAmt || MinAmt->ugt(ShAmt)) + MinAmt = &ShAmt; + if (!MaxAmt || MaxAmt->ult(ShAmt)) + MaxAmt = &ShAmt; + } + assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) && + "Failed to find matching min/max shift amounts"); + if (MinAmt && MaxAmt) + return ConstantRange(*MinAmt, *MaxAmt + 1); + } + + // Use computeKnownBits to find a hidden constant/knownbits (usually type + // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc. + KnownBits KnownAmt = computeKnownBits(V.getOperand(1), DemandedElts, Depth); + if (KnownAmt.getMaxValue().ult(BitWidth)) + return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false); + + return std::nullopt; } -const APInt *SelectionDAG::getValidMinimumShiftAmountConstant( - SDValue V, const APInt &DemandedElts) const { +std::optional<uint64_t> +SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts, + unsigned Depth) const { assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || V.getOpcode() == ISD::SRA) && "Unknown shift node"); - if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts)) - return ValidAmt; - unsigned BitWidth = V.getScalarValueSizeInBits(); - auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); - if (!BV) - return nullptr; - const APInt *MinShAmt = nullptr; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - if (!DemandedElts[i]) - continue; - auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); - if (!SA) - return nullptr; - // Shifting more than the bitwidth is not valid. - const APInt &ShAmt = SA->getAPIntValue(); - if (ShAmt.uge(BitWidth)) - return nullptr; - if (MinShAmt && MinShAmt->ule(ShAmt)) - continue; - MinShAmt = &ShAmt; - } - return MinShAmt; + if (std::optional<ConstantRange> AmtRange = + getValidShiftAmountRange(V, DemandedElts, Depth)) + if (const APInt *ShAmt = AmtRange->getSingleElement()) + return ShAmt->getZExtValue(); + return std::nullopt; +} + +std::optional<uint64_t> +SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const { + EVT VT = V.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorNumElements()) + : APInt(1, 1); + return getValidShiftAmount(V, DemandedElts, Depth); } -const APInt *SelectionDAG::getValidMaximumShiftAmountConstant( - SDValue V, const APInt &DemandedElts) const { +std::optional<uint64_t> +SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts, + unsigned Depth) const { assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || V.getOpcode() == ISD::SRA) && "Unknown shift node"); - if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts)) - return ValidAmt; - unsigned BitWidth = V.getScalarValueSizeInBits(); - auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); - if (!BV) - return nullptr; - const APInt *MaxShAmt = nullptr; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - if (!DemandedElts[i]) - continue; - auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); - if (!SA) - return nullptr; - // Shifting more than the bitwidth is not valid. - const APInt &ShAmt = SA->getAPIntValue(); - if (ShAmt.uge(BitWidth)) - return nullptr; - if (MaxShAmt && MaxShAmt->uge(ShAmt)) - continue; - MaxShAmt = &ShAmt; - } - return MaxShAmt; + if (std::optional<ConstantRange> AmtRange = + getValidShiftAmountRange(V, DemandedElts, Depth)) + return AmtRange->getUnsignedMin().getZExtValue(); + return std::nullopt; +} + +std::optional<uint64_t> +SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const { + EVT VT = V.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorNumElements()) + : APInt(1, 1); + return getValidMinimumShiftAmount(V, DemandedElts, Depth); +} + +std::optional<uint64_t> +SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, + unsigned Depth) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); + if (std::optional<ConstantRange> AmtRange = + getValidShiftAmountRange(V, DemandedElts, Depth)) + return AmtRange->getUnsignedMax().getZExtValue(); + return std::nullopt; +} + +std::optional<uint64_t> +SelectionDAG::getValidMaximumShiftAmount(SDValue V, unsigned Depth) const { + EVT VT = V.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorNumElements()) + : APInt(1, 1); + return getValidMaximumShiftAmount(V, DemandedElts, Depth); } /// Determine which bits of Op are known to be either zero or one and return @@ -3111,6 +3191,33 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } break; } + case ISD::STEP_VECTOR: { + const APInt &Step = Op.getConstantOperandAPInt(0); + + if (Step.isPowerOf2()) + Known.Zero.setLowBits(Step.logBase2()); + + const Function &F = getMachineFunction().getFunction(); + + if (!isUIntN(BitWidth, Op.getValueType().getVectorMinNumElements())) + break; + const APInt MinNumElts = + APInt(BitWidth, Op.getValueType().getVectorMinNumElements()); + + bool Overflow; + const APInt MaxNumElts = getVScaleRange(&F, BitWidth) + .getUnsignedMax() + .umul_ov(MinNumElts, Overflow); + if (Overflow) + break; + + const APInt MaxValue = (MaxNumElts - 1).umul_ov(Step, Overflow); + if (Overflow) + break; + + Known.Zero.setHighBits(MaxValue.countl_zero()); + break; + } case ISD::BUILD_VECTOR: assert(!Op.getValueType().isScalableVector()); // Collect the known bits that are shared by every demanded vector element. @@ -3362,6 +3469,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::mulhs(Known, Known2); break; } + case ISD::ABDU: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = KnownBits::abdu(Known, Known2); + break; + } + case ISD::ABDS: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = KnownBits::abds(Known, Known2); + unsigned SignBits1 = + ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (SignBits1 == 1) + break; + unsigned SignBits0 = + ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known.Zero.setHighBits(std::min(SignBits0, SignBits1) - 1); + break; + } case ISD::UMUL_LOHI: { assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3384,14 +3510,28 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::mulhs(Known, Known2); break; } + case ISD::AVGFLOORU: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::avgFloorU(Known, Known2); + break; + } case ISD::AVGCEILU: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = Known.zext(BitWidth + 1); - Known2 = Known2.zext(BitWidth + 1); - KnownBits One = KnownBits::makeConstant(APInt(1, 1)); - Known = KnownBits::computeForAddCarry(Known, Known2, One); - Known = Known.extractBits(BitWidth, 1); + Known = KnownBits::avgCeilU(Known, Known2); + break; + } + case ISD::AVGFLOORS: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::avgFloorS(Known, Known2); + break; + } + case ISD::AVGCEILS: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::avgCeilS(Known, Known2); break; } case ISD::SELECT: @@ -3440,30 +3580,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; } - case ISD::SHL: + case ISD::SHL: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::shl(Known, Known2); + + bool NUW = Op->getFlags().hasNoUnsignedWrap(); + bool NSW = Op->getFlags().hasNoSignedWrap(); + + bool ShAmtNonZero = Known2.isNonZero(); + + Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero); // Minimum shift low bits are known zero. - if (const APInt *ShMinAmt = - getValidMinimumShiftAmountConstant(Op, DemandedElts)) - Known.Zero.setLowBits(ShMinAmt->getZExtValue()); + if (std::optional<uint64_t> ShMinAmt = + getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) + Known.Zero.setLowBits(*ShMinAmt); break; + } case ISD::SRL: Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::lshr(Known, Known2); + Known = KnownBits::lshr(Known, Known2, /*ShAmtNonZero=*/false, + Op->getFlags().hasExact()); // Minimum shift high bits are known zero. - if (const APInt *ShMinAmt = - getValidMinimumShiftAmountConstant(Op, DemandedElts)) - Known.Zero.setHighBits(ShMinAmt->getZExtValue()); + if (std::optional<uint64_t> ShMinAmt = + getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) + Known.Zero.setHighBits(*ShMinAmt); break; case ISD::SRA: Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::ashr(Known, Known2); + Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false, + Op->getFlags().hasExact()); break; case ISD::FSHL: case ISD::FSHR: @@ -3613,32 +3762,42 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } } } - } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { - // If this is a ZEXTLoad and we are looking at the loaded value. - EVT VT = LD->getMemoryVT(); - unsigned MemBits = VT.getScalarSizeInBits(); - Known.Zero.setBitsFrom(MemBits); - } else if (const MDNode *Ranges = LD->getRanges()) { - EVT VT = LD->getValueType(0); - - // TODO: Handle for extending loads - if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + } else if (Op.getResNo() == 0) { + KnownBits Known0(!LD->getMemoryVT().isScalableVT() + ? LD->getMemoryVT().getFixedSizeInBits() + : BitWidth); + EVT VT = Op.getValueType(); + // Fill in any known bits from range information. There are 3 types being + // used. The results VT (same vector elt size as BitWidth), the loaded + // MemoryVT (which may or may not be vector) and the range VTs original + // type. The range matadata needs the full range (i.e + // MemoryVT().getSizeInBits()), which is truncated to the correct elt size + // if it is know. These are then extended to the original VT sizes below. + if (const MDNode *MD = LD->getRanges()) { + computeKnownBitsFromRangeMetadata(*MD, Known0); if (VT.isVector()) { // Handle truncation to the first demanded element. // TODO: Figure out which demanded elements are covered if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) break; + Known0 = Known0.trunc(BitWidth); + } + } - // Handle the case where a load has a vector type, but scalar memory - // with an attached range. - EVT MemVT = LD->getMemoryVT(); - KnownBits KnownFull(MemVT.getSizeInBits()); + if (LD->getMemoryVT().isVector()) + Known0 = Known0.trunc(LD->getMemoryVT().getScalarSizeInBits()); - computeKnownBitsFromRangeMetadata(*Ranges, KnownFull); - Known = KnownFull.trunc(BitWidth); - } else - computeKnownBitsFromRangeMetadata(*Ranges, Known); - } + // Extend the Known bits from memory to the size of the result. + if (ISD::isZEXTLoad(Op.getNode())) + Known = Known0.zext(BitWidth); + else if (ISD::isSEXTLoad(Op.getNode())) + Known = Known0.sext(BitWidth); + else if (ISD::isEXTLoad(Op.getNode())) + Known = Known0.anyext(BitWidth); + else + Known = Known0; + assert(Known.getBitWidth() == BitWidth); + return Known; } break; } @@ -3721,8 +3880,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, SDNodeFlags Flags = Op.getNode()->getFlags(); Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD, - Flags.hasNoSignedWrap(), Known, Known2); + Known = KnownBits::computeForAddSub( + Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(), + Flags.hasNoUnsignedWrap(), Known, Known2); break; } case ISD::USUBO: @@ -3900,12 +4060,14 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::ABS: { Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known = Known2.abs(); + Known.Zero.setHighBits( + ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1) - 1); break; } case ISD::USUBSAT: { - // The result of usubsat will never be larger than the LHS. - Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known.Zero.setHighBits(Known2.countMinLeadingZeros()); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::usub_sat(Known, Known2); break; } case ISD::UMIN: { @@ -3972,6 +4134,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } + case ISD::UINT_TO_FP: { + Known.makeNonNegative(); + break; + } + case ISD::SINT_TO_FP: { + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Known2.isNonNegative()) + Known.makeNonNegative(); + else if (Known2.isNegative()) + Known.makeNegative(); + break; + } case ISD::FP_TO_UINT_SAT: { // FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT. EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -4011,6 +4185,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (Op.getResNo() == 0) { if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) Known.Zero.setBitsFrom(MemBits); + else if (Op->getOpcode() == ISD::ATOMIC_LOAD && + cast<AtomicSDNode>(Op)->getExtensionType() == ISD::ZEXTLOAD) + Known.Zero.setBitsFrom(MemBits); } break; } @@ -4037,7 +4214,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); return Known; } @@ -4233,21 +4409,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) && isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1); - if (Val.getOpcode() == ISD::AND) { - // Looking for `x & -x` pattern: - // If x == 0: - // x & -x -> 0 - // If x != 0: - // x & -x -> non-zero pow2 - // so if we find the pattern return whether we know `x` is non-zero. - for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { - SDValue NegOp = Val.getOperand(OpIdx); - if (NegOp.getOpcode() == ISD::SUB && - NegOp.getOperand(1) == Val.getOperand(1 - OpIdx) && - isNullOrNullSplat(NegOp.getOperand(0))) - return isKnownNeverZero(Val.getOperand(1 - OpIdx), Depth); - } - } + // Looking for `x & -x` pattern: + // If x == 0: + // x & -x -> 0 + // If x != 0: + // x & -x -> non-zero pow2 + // so if we find the pattern return whether we know `x` is non-zero. + SDValue X; + if (sd_match(Val, m_And(m_Value(X), m_Neg(m_Deferred(X))))) + return isKnownNeverZero(X, Depth); if (Val.getOpcode() == ISD::ZERO_EXTEND) return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); @@ -4257,6 +4427,16 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { return false; } +bool SelectionDAG::isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth) const { + if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Val, true)) + return C1->getValueAPF().getExactLog2Abs() >= 0; + + if (Val.getOpcode() == ISD::UINT_TO_FP || Val.getOpcode() == ISD::SINT_TO_FP) + return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); + + return false; +} + unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); @@ -4431,17 +4611,38 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::SRA: Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); // SRA X, C -> adds C sign bits. - if (const APInt *ShAmt = - getValidMinimumShiftAmountConstant(Op, DemandedElts)) - Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); + if (std::optional<uint64_t> ShAmt = + getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) + Tmp = std::min<uint64_t>(Tmp + *ShAmt, VTBits); return Tmp; case ISD::SHL: - if (const APInt *ShAmt = - getValidMaximumShiftAmountConstant(Op, DemandedElts)) { + if (std::optional<ConstantRange> ShAmtRange = + getValidShiftAmountRange(Op, DemandedElts, Depth + 1)) { + uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue(); + uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue(); + // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are + // shifted out, then we can compute the number of sign bits for the + // operand being extended. A future improvement could be to pass along the + // "shifted left by" information in the recursive calls to + // ComputeKnownSignBits. Allowing us to handle this more generically. + if (ISD::isExtOpcode(Op.getOperand(0).getOpcode())) { + SDValue Ext = Op.getOperand(0); + EVT ExtVT = Ext.getValueType(); + SDValue Extendee = Ext.getOperand(0); + EVT ExtendeeVT = Extendee.getValueType(); + uint64_t SizeDifference = + ExtVT.getScalarSizeInBits() - ExtendeeVT.getScalarSizeInBits(); + if (SizeDifference <= MinShAmt) { + Tmp = SizeDifference + + ComputeNumSignBits(Extendee, DemandedElts, Depth + 1); + if (MaxShAmt < Tmp) + return Tmp - MaxShAmt; + } + } // shl destroys sign bits, ensure it doesn't shift out all sign bits. Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); - if (ShAmt->ult(Tmp)) - return Tmp - ShAmt->getZExtValue(); + if (MaxShAmt < Tmp) + return Tmp - MaxShAmt; } break; case ISD::AND: @@ -4624,6 +4825,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; } + case ISD::AVGCEILS: + case ISD::AVGFLOORS: + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Tmp == 1) + return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + return std::min(Tmp, Tmp2); case ISD::SREM: // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. The magnitude of the result should be less than or @@ -4822,6 +5030,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return VTBits - Tmp + 1; if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) return VTBits - Tmp; + if (Op->getOpcode() == ISD::ATOMIC_LOAD) { + ISD::LoadExtType ETy = cast<AtomicSDNode>(Op)->getExtensionType(); + if (ETy == ISD::SEXTLOAD) + return VTBits - Tmp + 1; + if (ETy == ISD::ZEXTLOAD) + return VTBits - Tmp; + } } break; } @@ -4943,9 +5158,11 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, return true; switch (Opcode) { + case ISD::CONDCODE: case ISD::VALUETYPE: case ISD::FrameIndex: case ISD::TargetFrameIndex: + case ISD::CopyFromReg: return true; case ISD::UNDEF: @@ -4963,6 +5180,24 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, } return true; + case ISD::VECTOR_SHUFFLE: { + APInt DemandedLHS, DemandedRHS; + auto *SVN = cast<ShuffleVectorSDNode>(Op); + if (!getShuffleDemandedElts(DemandedElts.getBitWidth(), SVN->getMask(), + DemandedElts, DemandedLHS, DemandedRHS, + /*AllowUndefElts=*/false)) + return false; + if (!DemandedLHS.isZero() && + !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedLHS, + PoisonOnly, Depth + 1)) + return false; + if (!DemandedRHS.isZero() && + !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedRHS, + PoisonOnly, Depth + 1)) + return false; + return true; + } + // TODO: Search for noundef attributes from library functions. // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. @@ -4978,8 +5213,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, // If Op can't create undef/poison and none of its operands are undef/poison // then Op is never undef/poison. - // NOTE: TargetNodes should handle this in themselves in - // isGuaranteedNotToBeUndefOrPoisonForTargetNode. + // NOTE: TargetNodes can handle this in themselves in + // isGuaranteedNotToBeUndefOrPoisonForTargetNode or let + // TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode handle it. return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true, Depth) && all_of(Op->ops(), [&](SDValue V) { @@ -5010,11 +5246,24 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, if (VT.isScalableVector()) return true; + if (ConsiderFlags && Op->hasPoisonGeneratingFlags()) + return true; + unsigned Opcode = Op.getOpcode(); switch (Opcode) { case ISD::FREEZE: case ISD::CONCAT_VECTORS: case ISD::INSERT_SUBVECTOR: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: + case ISD::MULHU: + case ISD::MULHS: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: case ISD::AND: case ISD::XOR: case ISD::ROTL: @@ -5035,35 +5284,66 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::BUILD_PAIR: return false; - // Matches hasPoisonGeneratingFlags(). - case ISD::ZERO_EXTEND: - return ConsiderFlags && Op->getFlags().hasNonNeg(); + case ISD::SELECT_CC: + case ISD::SETCC: { + // Integer setcc cannot create undef or poison. + if (Op.getOperand(0).getValueType().isInteger()) + return false; + // FP compares are more complicated. They can create poison for nan/infinity + // based on options and flags. The options and flags also cause special + // nonan condition codes to be used. Those condition codes may be preserved + // even if the nonan flag is dropped somewhere. + unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4; + ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get(); + if (((unsigned)CCCode & 0x10U)) + return true; + + const TargetOptions &Options = getTarget().Options; + return Options.NoNaNsFPMath || Options.NoInfsFPMath; + } + + case ISD::OR: + case ISD::ZERO_EXTEND: case ISD::ADD: case ISD::SUB: case ISD::MUL: - // Matches hasPoisonGeneratingFlags(). - return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || - Op->getFlags().hasNoUnsignedWrap()); + // No poison except from flags (which is handled above) + return false; case ISD::SHL: - // If the max shift amount isn't in range, then the shift can create poison. - if (!getValidMaximumShiftAmountConstant(Op, DemandedElts)) - return true; - - // Matches hasPoisonGeneratingFlags(). - return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || - Op->getFlags().hasNoUnsignedWrap()); + case ISD::SRL: + case ISD::SRA: + // If the max shift amount isn't in range, then the shift can + // create poison. + return !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedElts, + PoisonOnly, Depth + 1) || + !getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1); - // Matches hasPoisonGeneratingFlags(). - case ISD::OR: - return ConsiderFlags && Op->getFlags().hasDisjoint(); + case ISD::SCALAR_TO_VECTOR: + // Check if we demand any upper (undef) elements. + return !PoisonOnly && DemandedElts.ugt(1); - case ISD::INSERT_VECTOR_ELT:{ + case ISD::INSERT_VECTOR_ELT: + case ISD::EXTRACT_VECTOR_ELT: { // Ensure that the element index is in bounds. EVT VecVT = Op.getOperand(0).getValueType(); - KnownBits KnownIdx = computeKnownBits(Op.getOperand(2), Depth + 1); - return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements()); + SDValue Idx = Op.getOperand(Opcode == ISD::INSERT_VECTOR_ELT ? 2 : 1); + if (isGuaranteedNotToBeUndefOrPoison(Idx, DemandedElts, PoisonOnly, + Depth + 1)) { + KnownBits KnownIdx = computeKnownBits(Idx, Depth + 1); + return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements()); + } + return true; + } + + case ISD::VECTOR_SHUFFLE: { + // Check for any demanded shuffle element that is undef. + auto *SVN = cast<ShuffleVectorSDNode>(Op); + for (auto [Idx, Elt] : enumerate(SVN->getMask())) + if (Elt < 0 && DemandedElts[Idx]) + return true; + return false; } default: @@ -5079,26 +5359,19 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return true; } -bool SelectionDAG::isADDLike(SDValue Op) const { +bool SelectionDAG::isADDLike(SDValue Op, bool NoWrap) const { unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::OR) return Op->getFlags().hasDisjoint() || haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); if (Opcode == ISD::XOR) - return isMinSignedConstant(Op.getOperand(1)); + return !NoWrap && isMinSignedConstant(Op.getOperand(1)); return false; } bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { - if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || - !isa<ConstantSDNode>(Op.getOperand(1))) - return false; - - if (Op.getOpcode() == ISD::OR && - !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1))) - return false; - - return true; + return Op.getNumOperands() == 2 && isa<ConstantSDNode>(Op.getOperand(1)) && + (Op.getOpcode() == ISD::ADD || isADDLike(Op)); } bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const { @@ -5124,6 +5397,13 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FREM: case ISD::FSIN: case ISD::FCOS: + case ISD::FTAN: + case ISD::FASIN: + case ISD::FACOS: + case ISD::FATAN: + case ISD::FSINH: + case ISD::FCOSH: + case ISD::FTANH: case ISD::FMA: case ISD::FMAD: { if (SNaN) @@ -5277,10 +5557,38 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { return isKnownNeverZero(Op.getOperand(1), Depth + 1) || isKnownNeverZero(Op.getOperand(0), Depth + 1); - // TODO for smin/smax: If either operand is known negative/positive + // For smin/smax: If either operand is known negative/positive // respectively we don't need the other to be known at all. - case ISD::SMAX: - case ISD::SMIN: + case ISD::SMAX: { + KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1); + if (Op1.isStrictlyPositive()) + return true; + + KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1); + if (Op0.isStrictlyPositive()) + return true; + + if (Op1.isNonZero() && Op0.isNonZero()) + return true; + + return isKnownNeverZero(Op.getOperand(1), Depth + 1) && + isKnownNeverZero(Op.getOperand(0), Depth + 1); + } + case ISD::SMIN: { + KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1); + if (Op1.isNegative()) + return true; + + KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1); + if (Op0.isNegative()) + return true; + + if (Op1.isNonZero() && Op0.isNonZero()) + return true; + + return isKnownNeverZero(Op.getOperand(1), Depth + 1) && + isKnownNeverZero(Op.getOperand(0), Depth + 1); + } case ISD::UMIN: return isKnownNeverZero(Op.getOperand(1), Depth + 1) && isKnownNeverZero(Op.getOperand(0), Depth + 1); @@ -5343,11 +5651,27 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: return isKnownNeverZero(Op.getOperand(0), Depth + 1); + case ISD::VSCALE: { + const Function &F = getMachineFunction().getFunction(); + const APInt &Multiplier = Op.getConstantOperandAPInt(0); + ConstantRange CR = + getVScaleRange(&F, Op.getScalarValueSizeInBits()).multiply(Multiplier); + if (!CR.contains(APInt(CR.getBitWidth(), 0))) + return true; + break; + } } return computeKnownBits(Op, Depth).isNonZero(); } +bool SelectionDAG::cannotBeOrderedNegativeFP(SDValue Op) const { + if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true)) + return !C1->isNegative(); + + return Op.getOpcode() == ISD::FABS; +} + bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { // Check the obvious case. if (A == B) return true; @@ -5555,14 +5879,14 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, /// Gets or creates the specified node. SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { + SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, getVTList(VT), std::nullopt); + AddNodeIDNode(ID, Opcode, VTs, std::nullopt); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), - getVTList(VT)); + auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -5683,8 +6007,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(N1.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!"); - if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) - return getNode(OpOpcode, DL, VT, N1.getOperand(0)); + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) { + SDNodeFlags Flags; + if (OpOpcode == ISD::ZERO_EXTEND) + Flags.setNonNeg(N1->getFlags().hasNonNeg()); + return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags); + } if (OpOpcode == ISD::UNDEF) // sext(undef) = 0, because the top bits will all be the same. return getConstant(0, DL, VT); @@ -5700,8 +6028,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(N1.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!"); - if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) - return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0)); + if (OpOpcode == ISD::ZERO_EXTEND) { // (zext (zext x)) -> (zext x) + SDNodeFlags Flags; + Flags.setNonNeg(N1->getFlags().hasNonNeg()); + return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags); + } if (OpOpcode == ISD::UNDEF) // zext(undef) = 0, because the top bits will be zero. return getConstant(0, DL, VT); @@ -5737,9 +6068,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(N1.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || - OpOpcode == ISD::ANY_EXTEND) + OpOpcode == ISD::ANY_EXTEND) { + SDNodeFlags Flags; + if (OpOpcode == ISD::ZERO_EXTEND) + Flags.setNonNeg(N1->getFlags().hasNonNeg()); // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) - return getNode(OpOpcode, DL, VT, N1.getOperand(0)); + return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags); + } if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); @@ -5874,6 +6209,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1.getValueType().getScalarType() == MVT::i1) return getNode(ISD::VECREDUCE_AND, DL, VT, N1); break; + case ISD::SPLAT_VECTOR: + assert(VT.isVector() && "Wrong return type!"); + // FIXME: Hexagon uses i32 scalar for a floating point zero vector so allow + // that for now. + assert((VT.getVectorElementType() == N1.getValueType() || + (VT.isFloatingPoint() && N1.getValueType() == MVT::i32) || + (VT.getVectorElementType().isInteger() && + N1.getValueType().isInteger() && + VT.getVectorElementType().bitsLE(N1.getValueType()))) && + "Wrong operand type!"); + break; } SDNode *N; @@ -5943,50 +6289,25 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, if (!C2.getBoolValue()) break; return C1.srem(C2); - case ISD::MULHS: { - unsigned FullWidth = C1.getBitWidth() * 2; - APInt C1Ext = C1.sext(FullWidth); - APInt C2Ext = C2.sext(FullWidth); - return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth()); - } - case ISD::MULHU: { - unsigned FullWidth = C1.getBitWidth() * 2; - APInt C1Ext = C1.zext(FullWidth); - APInt C2Ext = C2.zext(FullWidth); - return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth()); - } - case ISD::AVGFLOORS: { - unsigned FullWidth = C1.getBitWidth() + 1; - APInt C1Ext = C1.sext(FullWidth); - APInt C2Ext = C2.sext(FullWidth); - return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1); - } - case ISD::AVGFLOORU: { - unsigned FullWidth = C1.getBitWidth() + 1; - APInt C1Ext = C1.zext(FullWidth); - APInt C2Ext = C2.zext(FullWidth); - return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1); - } - case ISD::AVGCEILS: { - unsigned FullWidth = C1.getBitWidth() + 1; - APInt C1Ext = C1.sext(FullWidth); - APInt C2Ext = C2.sext(FullWidth); - return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1); - } - case ISD::AVGCEILU: { - unsigned FullWidth = C1.getBitWidth() + 1; - APInt C1Ext = C1.zext(FullWidth); - APInt C2Ext = C2.zext(FullWidth); - return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1); - } + case ISD::AVGFLOORS: + return APIntOps::avgFloorS(C1, C2); + case ISD::AVGFLOORU: + return APIntOps::avgFloorU(C1, C2); + case ISD::AVGCEILS: + return APIntOps::avgCeilS(C1, C2); + case ISD::AVGCEILU: + return APIntOps::avgCeilU(C1, C2); case ISD::ABDS: - return APIntOps::smax(C1, C2) - APIntOps::smin(C1, C2); + return APIntOps::abds(C1, C2); case ISD::ABDU: - return APIntOps::umax(C1, C2) - APIntOps::umin(C1, C2); + return APIntOps::abdu(C1, C2); + case ISD::MULHS: + return APIntOps::mulhs(C1, C2); + case ISD::MULHU: + return APIntOps::mulhu(C1, C2); } return std::nullopt; } - // Handle constant folding with UNDEF. // TODO: Handle more cases. static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1, @@ -6049,7 +6370,8 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, ArrayRef<SDValue> Ops) { + EVT VT, ArrayRef<SDValue> Ops, + SDNodeFlags Flags) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. @@ -6267,17 +6589,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, ElementCount NumElts = VT.getVectorElementCount(); - // See if we can fold through bitcasted integer ops. + // See if we can fold through any bitcasted integer ops. if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && - Ops[0].getOpcode() == ISD::BITCAST && - Ops[1].getOpcode() == ISD::BITCAST) { + (Ops[0].getOpcode() == ISD::BITCAST || + Ops[1].getOpcode() == ISD::BITCAST)) { SDValue N1 = peekThroughBitcasts(Ops[0]); SDValue N2 = peekThroughBitcasts(Ops[1]); auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); - EVT BVVT = N1.getValueType(); - if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) { + if (BV1 && BV2 && N1.getValueType().isInteger() && + N2.getValueType().isInteger()) { bool IsLE = getDataLayout().isLittleEndian(); unsigned EltBits = VT.getScalarSizeInBits(); SmallVector<APInt> RawBits1, RawBits2; @@ -6293,15 +6615,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, RawBits.push_back(*Fold); } if (RawBits.size() == NumElts.getFixedValue()) { - // We have constant folded, but we need to cast this again back to - // the original (possibly legalized) type. + // We have constant folded, but we might need to cast this again back + // to the original (possibly legalized) type. + EVT BVVT, BVEltVT; + if (N1.getValueType() == VT) { + BVVT = N1.getValueType(); + BVEltVT = BV1->getOperand(0).getValueType(); + } else { + BVVT = N2.getValueType(); + BVEltVT = BV2->getOperand(0).getValueType(); + } + unsigned BVEltBits = BVEltVT.getSizeInBits(); SmallVector<APInt> DstBits; BitVector DstUndefs; BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(), DstBits, RawBits, DstUndefs, BitVector(RawBits.size(), false)); - EVT BVEltVT = BV1->getOperand(0).getValueType(); - unsigned BVEltBits = BVEltVT.getSizeInBits(); SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT)); for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { if (DstUndefs[I]) @@ -6406,7 +6735,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } // Constant fold the scalar operands. - SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps); + SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); // Legalize the (integer) scalar constant if necessary. if (LegalSVT != SVT) @@ -6512,16 +6841,17 @@ SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) { if (A == Align(1)) return Val; + SDVTList VTs = getVTList(Val.getValueType()); FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::AssertAlign, getVTList(Val.getValueType()), {Val}); + AddNodeIDNode(ID, ISD::AssertAlign, VTs, {Val}); ID.AddInteger(A.value()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - auto *N = newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(), - Val.getValueType(), A); + auto *N = + newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, A); createOperands(N, {Val}); CSEMap.InsertNode(N, IP); @@ -6660,6 +6990,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT)); } break; + case ISD::SCMP: + case ISD::UCMP: + assert(N1.getValueType() == N2.getValueType() && + "Types of operands of UCMP/SCMP must match"); + assert(N1.getValueType().isVector() == VT.isVector() && + "Operands and return type of must both be scalars or vectors"); + if (VT.isVector()) + assert(VT.getVectorElementCount() == + N1.getValueType().getVectorElementCount() && + "Result and operands must have the same number of elements"); + break; + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; case ISD::ABDS: case ISD::ABDU: assert(VT.isInteger() && "This operator does not apply to FP types!"); @@ -6976,7 +7325,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } // Perform trivial constant folding. - if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2})) + if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) return SV; // Canonicalize an UNDEF to the RHS, even over a constant. @@ -7207,6 +7556,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1.getValueType() == VT) return N1; break; + case ISD::VECTOR_COMPRESS: { + [[maybe_unused]] EVT VecVT = N1.getValueType(); + [[maybe_unused]] EVT MaskVT = N2.getValueType(); + [[maybe_unused]] EVT PassthruVT = N3.getValueType(); + assert(VT == VecVT && "Vector and result type don't match."); + assert(VecVT.isVector() && MaskVT.isVector() && PassthruVT.isVector() && + "All inputs must be vectors."); + assert(VecVT == PassthruVT && "Vector and passthru types don't match."); + assert(VecVT.getVectorElementCount() == MaskVT.getVectorElementCount() && + "Vector and mask must have same number of elements."); + + if (N1.isUndef() || N2.isUndef()) + return N3; + + break; + } } // Memoize node if it doesn't produce a glue result. @@ -7888,12 +8253,11 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, } } -SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, Align Alignment, - bool isVol, bool AlwaysInline, bool isTailCall, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo, AAResults *AA) { +SDValue SelectionDAG::getMemcpy( + SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, + Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, + std::optional<bool> OverrideTailCall, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -7948,6 +8312,18 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); + bool IsTailCall = false; + if (OverrideTailCall.has_value()) { + IsTailCall = *OverrideTailCall; + } else { + bool LowersToMemcpy = + TLI->getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy"); + bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI); + IsTailCall = CI && CI->isTailCall() && + isInTailCallPosition(*CI, getTarget(), + ReturnsFirstArg && LowersToMemcpy); + } + CLI.setDebugLoc(dl) .setChain(Chain) .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), @@ -7956,7 +8332,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, TLI->getPointerTy(getDataLayout())), std::move(Args)) .setDiscardResult() - .setTailCall(isTailCall); + .setTailCall(IsTailCall); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -8004,7 +8380,8 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, - bool isVol, bool isTailCall, + bool isVol, const CallInst *CI, + std::optional<bool> OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo, AAResults *AA) { @@ -8050,6 +8427,19 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); + + bool IsTailCall = false; + if (OverrideTailCall.has_value()) { + IsTailCall = *OverrideTailCall; + } else { + bool LowersToMemmove = + TLI->getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove"); + bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI); + IsTailCall = CI && CI->isTailCall() && + isInTailCallPosition(*CI, getTarget(), + ReturnsFirstArg && LowersToMemmove); + } + CLI.setDebugLoc(dl) .setChain(Chain) .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), @@ -8058,7 +8448,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, TLI->getPointerTy(getDataLayout())), std::move(Args)) .setDiscardResult() - .setTailCall(isTailCall); + .setTailCall(IsTailCall); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -8106,7 +8496,8 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, - bool isVol, bool AlwaysInline, bool isTailCall, + bool isVol, bool AlwaysInline, + const CallInst *CI, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo) { // Check to see if we should lower the memset to stores first. @@ -8166,8 +8557,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, return Entry; }; + bool UseBZero = isNullConstant(Src) && BzeroName; // If zeroing out and bzero is present, use it. - if (isNullConstant(Src) && BzeroName) { + if (UseBZero) { TargetLowering::ArgListTy Args; Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); @@ -8185,8 +8577,16 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, TLI->getPointerTy(DL)), std::move(Args)); } - - CLI.setDiscardResult().setTailCall(isTailCall); + bool LowersToMemset = + TLI->getLibcallName(RTLIB::MEMSET) == StringRef("memset"); + // If we're going to use bzero, make sure not to tail call unless the + // subsequent return doesn't need a value, as bzero doesn't return the first + // arg unlike memset. + bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI) && !UseBZero; + bool IsTailCall = + CI && CI->isTailCall() && + isInTailCallPosition(*CI, getTarget(), ReturnsFirstArg && LowersToMemset); + CLI.setDiscardResult().setTailCall(IsTailCall); std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -8324,11 +8724,10 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, - MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { - if (!Size && MemVT.isScalableVector()) - Size = MemoryLocation::UnknownSize; - else if (!Size) - Size = MemVT.getStoreSize(); + MachineMemOperand::Flags Flags, LocationSize Size, + const AAMDNodes &AAInfo) { + if (Size.hasValue() && !Size.getValue()) + Size = LocationSize::precise(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -8490,7 +8889,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); - uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); + LocationSize Size = LocationSize::precise(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges); @@ -8611,8 +9010,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); - uint64_t Size = - MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); + LocationSize Size = LocationSize::precise(Val.getValueType().getStoreSize()); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); @@ -8665,8 +9063,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), - Alignment, AAInfo); + PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment, + AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -8760,7 +9158,7 @@ SDValue SelectionDAG::getLoadVP( if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); - uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); + LocationSize Size = LocationSize::precise(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges); @@ -8913,8 +9311,8 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), - Alignment, AAInfo); + PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment, + AAInfo); return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO, IsCompressing); } @@ -9001,29 +9399,6 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, SDValue SelectionDAG::getStridedLoadVP( ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, - SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, - MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, - const MDNode *Ranges, bool IsExpanding) { - assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - - MMOFlags |= MachineMemOperand::MOLoad; - assert((MMOFlags & MachineMemOperand::MOStore) == 0); - // If we don't have a PtrInfo, infer the trivial frame index case to simplify - // clients. - if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); - - uint64_t Size = MemoryLocation::UnknownSize; - MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, - Alignment, AAInfo, Ranges); - return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, Mask, - EVL, MemVT, MMO, IsExpanding); -} - -SDValue SelectionDAG::getStridedLoadVP( - ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, - SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { bool Indexed = AM != ISD::UNINDEXED; assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); @@ -9055,17 +9430,6 @@ SDValue SelectionDAG::getStridedLoadVP( return V; } -SDValue SelectionDAG::getStridedLoadVP( - EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Stride, - SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, MaybeAlign Alignment, - MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, - const MDNode *Ranges, bool IsExpanding) { - SDValue Undef = getUNDEF(Ptr.getValueType()); - return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, - Undef, Stride, Mask, EVL, PtrInfo, VT, Alignment, - MMOFlags, AAInfo, Ranges, IsExpanding); -} - SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, @@ -9078,18 +9442,6 @@ SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, SDValue SelectionDAG::getExtStridedLoadVP( ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, - SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, - MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment, - MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, - bool IsExpanding) { - SDValue Undef = getUNDEF(Ptr.getValueType()); - return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef, - Stride, Mask, EVL, PtrInfo, MemVT, Alignment, - MMOFlags, AAInfo, nullptr, IsExpanding); -} - -SDValue SelectionDAG::getExtStridedLoadVP( - ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -9097,23 +9449,6 @@ SDValue SelectionDAG::getExtStridedLoadVP( Stride, Mask, EVL, MemVT, MMO, IsExpanding); } -SDValue SelectionDAG::getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL, - SDValue Base, SDValue Offset, - ISD::MemIndexedMode AM) { - auto *SLD = cast<VPStridedLoadSDNode>(OrigLoad); - assert(SLD->getOffset().isUndef() && - "Strided load is already a indexed load!"); - // Don't propagate the invariant or dereferenceable flags. - auto MMOFlags = - SLD->getMemOperand()->getFlags() & - ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); - return getStridedLoadVP( - AM, SLD->getExtensionType(), OrigLoad.getValueType(), DL, SLD->getChain(), - Base, Offset, SLD->getStride(), SLD->getMask(), SLD->getVectorLength(), - SLD->getPointerInfo(), SLD->getMemoryVT(), SLD->getAlign(), MMOFlags, - SLD->getAAInfo(), nullptr, SLD->isExpandingLoad()); -} - SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, @@ -9150,26 +9485,6 @@ SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL, return V; } -SDValue SelectionDAG::getTruncStridedStoreVP( - SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Stride, - SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT SVT, - Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, - bool IsCompressing) { - assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - - MMOFlags |= MachineMemOperand::MOStore; - assert((MMOFlags & MachineMemOperand::MOLoad) == 0); - - if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); - - MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemoryLocation::UnknownSize, Alignment, AAInfo); - return getTruncStridedStoreVP(Chain, DL, Val, Ptr, Stride, Mask, EVL, SVT, - MMO, IsCompressing); -} - SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Stride, SDValue Mask, @@ -9219,38 +9534,6 @@ SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, return V; } -SDValue SelectionDAG::getIndexedStridedStoreVP(SDValue OrigStore, - const SDLoc &DL, SDValue Base, - SDValue Offset, - ISD::MemIndexedMode AM) { - auto *SST = cast<VPStridedStoreSDNode>(OrigStore); - assert(SST->getOffset().isUndef() && - "Strided store is already an indexed store!"); - SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); - SDValue Ops[] = { - SST->getChain(), SST->getValue(), Base, Offset, SST->getStride(), - SST->getMask(), SST->getVectorLength()}; - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); - ID.AddInteger(SST->getMemoryVT().getRawBits()); - ID.AddInteger(SST->getRawSubclassData()); - ID.AddInteger(SST->getPointerInfo().getAddrSpace()); - void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) - return SDValue(E, 0); - - auto *N = newSDNode<VPStridedStoreSDNode>( - DL.getIROrder(), DL.getDebugLoc(), VTs, AM, SST->isTruncatingStore(), - SST->isCompressingStore(), SST->getMemoryVT(), SST->getMemOperand()); - createOperands(N, Ops); - - CSEMap.InsertNode(N, IP); - InsertNode(N); - SDValue V(N, 0); - NewSDValueDbgMsg(V, "Creating new node: ", this); - return V; -} - SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType) { @@ -9526,6 +9809,44 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, return V; } +SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT, + const SDLoc &dl, ArrayRef<SDValue> Ops, + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { + assert(Ops.size() == 7 && "Incompatible number of operands"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<MaskedHistogramSDNode>( + dl.getIROrder(), VTs, MemVT, MMO, IndexType)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + auto *N = newSDNode<MaskedHistogramSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VTs, MemVT, MMO, IndexType); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorElementCount() == + N->getIndex().getValueType().getVectorElementCount() && + "Vector width mismatch between mask and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + N->getScale()->getAsAPIntVal().isPowerOf2() && + "Scale should be a constant power of 2"); + assert(N->getInc().getValueType().isInteger() && "Non integer update value"); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -9631,6 +9952,10 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true)) return getUNDEF(X.getValueType()); + // shift i1/vXi1 X, Y --> X (any non-zero shift amount is undefined). + if (X.getValueType().getScalarType() == MVT::i1) + return X; + return SDValue(); } @@ -9889,6 +10214,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, } break; } + case ISD::SADDO_CARRY: + case ISD::UADDO_CARRY: + case ISD::SSUBO_CARRY: + case ISD::USUBO_CARRY: + assert(VTList.NumVTs == 2 && Ops.size() == 3 && + "Invalid add/sub overflow op!"); + assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && + Ops[0].getValueType() == Ops[1].getValueType() && + Ops[0].getValueType() == VTList.VTs[0] && + Ops[2].getValueType() == VTList.VTs[1] && + "Binary operator types must match!"); + break; case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid mul lo/hi op!"); @@ -11512,7 +11849,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, raw_string_ostream ErrorFormatter(ErrorStr); ErrorFormatter << "Undefined external symbol "; ErrorFormatter << '"' << Symbol << '"'; - report_fatal_error(Twine(ErrorFormatter.str())); + report_fatal_error(Twine(ErrorStr)); } //===----------------------------------------------------------------------===// @@ -11524,6 +11861,10 @@ bool llvm::isNullConstant(SDValue V) { return Const != nullptr && Const->isZero(); } +bool llvm::isNullConstantOrUndef(SDValue V) { + return V.isUndef() || isNullConstant(V); +} + bool llvm::isNullFPConstant(SDValue V) { ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); return Const != nullptr && Const->isZero() && !Const->isNegative(); @@ -11548,30 +11889,32 @@ bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V, unsigned OperandNo) { // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity(). // TODO: Target-specific opcodes could be added. - if (auto *Const = isConstOrConstSplat(V)) { + if (auto *ConstV = isConstOrConstSplat(V, /*AllowUndefs*/ false, + /*AllowTruncation*/ true)) { + APInt Const = ConstV->getAPIntValue().trunc(V.getScalarValueSizeInBits()); switch (Opcode) { case ISD::ADD: case ISD::OR: case ISD::XOR: case ISD::UMAX: - return Const->isZero(); + return Const.isZero(); case ISD::MUL: - return Const->isOne(); + return Const.isOne(); case ISD::AND: case ISD::UMIN: - return Const->isAllOnes(); + return Const.isAllOnes(); case ISD::SMAX: - return Const->isMinSignedValue(); + return Const.isMinSignedValue(); case ISD::SMIN: - return Const->isMaxSignedValue(); + return Const.isMaxSignedValue(); case ISD::SUB: case ISD::SHL: case ISD::SRA: case ISD::SRL: - return OperandNo == 1 && Const->isZero(); + return OperandNo == 1 && Const.isZero(); case ISD::UDIV: case ISD::SDIV: - return OperandNo == 1 && Const->isOne(); + return OperandNo == 1 && Const.isOne(); } } else if (auto *ConstFP = isConstOrConstSplatFP(V)) { switch (Opcode) { @@ -11739,20 +12082,6 @@ HandleSDNode::~HandleSDNode() { DropOperands(); } -GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, - const DebugLoc &DL, - const GlobalValue *GA, EVT VT, - int64_t o, unsigned TF) - : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { - TheGlobal = GA; -} - -AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, - EVT VT, unsigned SrcAS, - unsigned DestAS) - : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)), - SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} - MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { @@ -11764,9 +12093,10 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, // We check here that the size of the memory operand fits within the size of // the MMO. This is because the MMO might indicate only a possible address // range instead of specifying the affected memory addresses precisely. - // TODO: Make MachineMemOperands aware of scalable vectors. - assert(memvt.getStoreSize().getKnownMinValue() <= MMO->getSize() && - "Size mismatch!"); + assert( + (!MMO->getType().isValid() || + TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) && + "Size mismatch!"); } /// Profile - Gather unique data for the node. @@ -12911,7 +13241,7 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) { // Use of operator[] on the DenseMap may cause an insertion, which invalidates // the iterator, hence the need to make a copy to prevent a use-after-free. NodeExtraInfo NEI = I->second; - if (LLVM_LIKELY(!NEI.PCSections)) { + if (LLVM_LIKELY(!NEI.PCSections) && LLVM_LIKELY(!NEI.MMRA)) { // No deep copy required for the types of extra info set. // // FIXME: Investigate if other types of extra info also need deep copy. This diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 66825d845c19..f2ab88851b78 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -91,11 +91,10 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, } bool BaseIndexOffset::computeAliasing(const SDNode *Op0, - const std::optional<int64_t> NumBytes0, + const LocationSize NumBytes0, const SDNode *Op1, - const std::optional<int64_t> NumBytes1, + const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias) { - BaseIndexOffset BasePtr0 = match(Op0, DAG); if (!BasePtr0.getBase().getNode()) return false; @@ -105,27 +104,24 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, return false; int64_t PtrDiff; - if (NumBytes0 && NumBytes1 && - BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { + if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { // If the size of memory access is unknown, do not use it to analysis. - // One example of unknown size memory access is to load/store scalable - // vector objects on the stack. // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the // following situations arise: - if (PtrDiff >= 0 && - *NumBytes0 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { + if (PtrDiff >= 0 && NumBytes0.hasValue() && !NumBytes0.isScalable()) { // [----BasePtr0----] // [---BasePtr1--] // ========PtrDiff========> - IsAlias = !(*NumBytes0 <= PtrDiff); + IsAlias = !(static_cast<int64_t>(NumBytes0.getValue().getFixedValue()) <= + PtrDiff); return true; } - if (PtrDiff < 0 && - *NumBytes1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) { + if (PtrDiff < 0 && NumBytes1.hasValue() && !NumBytes1.isScalable()) { // [----BasePtr0----] // [---BasePtr1--] // =====(-PtrDiff)====> - IsAlias = !((PtrDiff + *NumBytes1) <= 0); + IsAlias = !((PtrDiff + static_cast<int64_t>( + NumBytes1.getValue().getFixedValue())) <= 0); return true; } return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 7406a8ac1611..37b1131d2f8a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" @@ -43,7 +44,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" @@ -79,6 +80,7 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -93,6 +95,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/InstructionCost.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -101,6 +104,7 @@ #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/Local.h" #include <cstddef> +#include <deque> #include <iterator> #include <limits> #include <optional> @@ -726,8 +730,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // prevents it from being picked up by the earlier bitcast case. if (ValueVT.getVectorElementCount().isScalar() && (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) { - Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, - DAG.getVectorIdxConstant(0, DL)); + // If we reach this condition and PartVT is FP, this means that + // ValueVT is also FP and both have a different size, otherwise we + // would have bitcasted them. Producing an EXTRACT_VECTOR_ELT here + // would be invalid since that would mean the smaller FP type has to + // be extended to the larger one. + if (PartVT.isFloatingPoint()) { + Val = DAG.getBitcast(ValueVT.getScalarType(), Val); + Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); + } else + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getVectorIdxConstant(0, DL)); } else { uint64_t ValueSize = ValueVT.getFixedSizeInBits(); assert(PartVT.getFixedSizeInBits() > ValueSize && @@ -1241,51 +1254,67 @@ void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) { It->Expr, Vals.size() > 1, It->DL, SDNodeOrder); } } - // We must early-exit here to prevent any DPValues from being emitted below, - // as we have just emitted the debug values resulting from assignment - // tracking analysis, making any existing DPValues redundant (and probably - // less correct). - return; } + // We must skip DbgVariableRecords if they've already been processed above as + // we have just emitted the debug values resulting from assignment tracking + // analysis, making any existing DbgVariableRecords redundant (and probably + // less correct). We still need to process DbgLabelRecords. This does sink + // DbgLabelRecords to the bottom of the group of debug records. That sholdn't + // be important as it does so deterministcally and ordering between + // DbgLabelRecords and DbgVariableRecords is immaterial (other than for MIR/IR + // printing). + bool SkipDbgVariableRecords = DAG.getFunctionVarLocs(); // Is there is any debug-info attached to this instruction, in the form of - // DPValue non-instruction debug-info records. - for (DPValue &DPV : I.getDbgValueRange()) { - DILocalVariable *Variable = DPV.getVariable(); - DIExpression *Expression = DPV.getExpression(); + // DbgRecord non-instruction debug-info records. + for (DbgRecord &DR : I.getDbgRecordRange()) { + if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) { + assert(DLR->getLabel() && "Missing label"); + SDDbgLabel *SDV = + DAG.getDbgLabel(DLR->getLabel(), DLR->getDebugLoc(), SDNodeOrder); + DAG.AddDbgLabel(SDV); + continue; + } + + if (SkipDbgVariableRecords) + continue; + DbgVariableRecord &DVR = cast<DbgVariableRecord>(DR); + DILocalVariable *Variable = DVR.getVariable(); + DIExpression *Expression = DVR.getExpression(); dropDanglingDebugInfo(Variable, Expression); - if (DPV.getType() == DPValue::LocationType::Declare) { - if (FuncInfo.PreprocessedDPVDeclares.contains(&DPV)) + if (DVR.getType() == DbgVariableRecord::LocationType::Declare) { + if (FuncInfo.PreprocessedDVRDeclares.contains(&DVR)) continue; - LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DPV + LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DVR << "\n"); - handleDebugDeclare(DPV.getVariableLocationOp(0), Variable, Expression, - DPV.getDebugLoc()); + handleDebugDeclare(DVR.getVariableLocationOp(0), Variable, Expression, + DVR.getDebugLoc()); continue; } - // A DPValue with no locations is a kill location. - SmallVector<Value *, 4> Values(DPV.location_ops()); + // A DbgVariableRecord with no locations is a kill location. + SmallVector<Value *, 4> Values(DVR.location_ops()); if (Values.empty()) { - handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(), + handleKillDebugValue(Variable, Expression, DVR.getDebugLoc(), SDNodeOrder); continue; } - // A DPValue with an undef or absent location is also a kill location. + // A DbgVariableRecord with an undef or absent location is also a kill + // location. if (llvm::any_of(Values, [](Value *V) { return !V || isa<UndefValue>(V); })) { - handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(), + handleKillDebugValue(Variable, Expression, DVR.getDebugLoc(), SDNodeOrder); continue; } - bool IsVariadic = DPV.hasArgList(); - if (!handleDebugValue(Values, Variable, Expression, DPV.getDebugLoc(), + bool IsVariadic = DVR.hasArgList(); + if (!handleDebugValue(Values, Variable, Expression, DVR.getDebugLoc(), SDNodeOrder, IsVariadic)) { addDanglingDebugInfo(Values, Variable, Expression, IsVariadic, - DPV.getDebugLoc(), SDNodeOrder); + DVR.getDebugLoc(), SDNodeOrder); } } } @@ -1308,7 +1337,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) { bool NodeInserted = false; std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener; MDNode *PCSectionsMD = I.getMetadata(LLVMContext::MD_pcsections); - if (PCSectionsMD) { + MDNode *MMRA = I.getMetadata(LLVMContext::MD_mmra); + if (PCSectionsMD || MMRA) { InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>( DAG, [&](SDNode *) { NodeInserted = true; }); } @@ -1320,14 +1350,17 @@ void SelectionDAGBuilder::visit(const Instruction &I) { CopyToExportRegsIfNeeded(&I); // Handle metadata. - if (PCSectionsMD) { + if (PCSectionsMD || MMRA) { auto It = NodeMap.find(&I); if (It != NodeMap.end()) { - DAG.addPCSections(It->second.getNode(), PCSectionsMD); + if (PCSectionsMD) + DAG.addPCSections(It->second.getNode(), PCSectionsMD); + if (MMRA) + DAG.addMMRAMetadata(It->second.getNode(), MMRA); } else if (NodeInserted) { // This should not happen; if it does, don't let it go unnoticed so we can // fix it. Relevant visit*() function is probably missing a setValue(). - errs() << "warning: loosing !pcsections metadata [" + errs() << "warning: loosing !pcsections and/or !mmra metadata [" << I.getModule()->getName() << "]\n"; LLVM_DEBUG(I.dump()); assert(false); @@ -1661,7 +1694,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, if (!FragmentExpr) continue; SDDbgValue *SDV = DAG.getVRegDbgValue( - Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, SDNodeOrder); + Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, Order); DAG.AddDbgValue(SDV, false); Offset += RegisterSize; } @@ -1676,11 +1709,10 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, } // We have created a SDDbgOperand for each Value in Values. - // Should use Order instead of SDNodeOrder? assert(!LocationOps.empty()); - SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies, - /*IsIndirect=*/false, DbgLoc, - SDNodeOrder, IsVariadic); + SDDbgValue *SDV = + DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies, + /*IsIndirect=*/false, DbgLoc, Order, IsVariadic); DAG.AddDbgValue(SDV, /*isParameter=*/false); return true; } @@ -1771,6 +1803,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); + if (const ConstantPtrAuth *CPA = dyn_cast<ConstantPtrAuth>(C)) { + return DAG.getNode(ISD::PtrAuthGlobalAddress, getCurSDLoc(), VT, + getValue(CPA->getPointer()), getValue(CPA->getKey()), + getValue(CPA->getAddrDiscriminator()), + getValue(CPA->getDiscriminator())); + } + if (isa<ConstantPointerNull>(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); return DAG.getConstant(0, getCurSDLoc(), @@ -2432,6 +2471,152 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, SL->SwitchCases.push_back(CB); } +// Collect dependencies on V recursively. This is used for the cost analysis in +// `shouldKeepJumpConditionsTogether`. +static bool collectInstructionDeps( + SmallMapVector<const Instruction *, bool, 8> *Deps, const Value *V, + SmallMapVector<const Instruction *, bool, 8> *Necessary = nullptr, + unsigned Depth = 0) { + // Return false if we have an incomplete count. + if (Depth >= SelectionDAG::MaxRecursionDepth) + return false; + + auto *I = dyn_cast<Instruction>(V); + if (I == nullptr) + return true; + + if (Necessary != nullptr) { + // This instruction is necessary for the other side of the condition so + // don't count it. + if (Necessary->contains(I)) + return true; + } + + // Already added this dep. + if (!Deps->try_emplace(I, false).second) + return true; + + for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx) + if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary, + Depth + 1)) + return false; + return true; +} + +bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether( + const FunctionLoweringInfo &FuncInfo, const BranchInst &I, + Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs, + TargetLoweringBase::CondMergingParams Params) const { + if (I.getNumSuccessors() != 2) + return false; + + if (!I.isConditional()) + return false; + + if (Params.BaseCost < 0) + return false; + + // Baseline cost. + InstructionCost CostThresh = Params.BaseCost; + + BranchProbabilityInfo *BPI = nullptr; + if (Params.LikelyBias || Params.UnlikelyBias) + BPI = FuncInfo.BPI; + if (BPI != nullptr) { + // See if we are either likely to get an early out or compute both lhs/rhs + // of the condition. + BasicBlock *IfFalse = I.getSuccessor(0); + BasicBlock *IfTrue = I.getSuccessor(1); + + std::optional<bool> Likely; + if (BPI->isEdgeHot(I.getParent(), IfTrue)) + Likely = true; + else if (BPI->isEdgeHot(I.getParent(), IfFalse)) + Likely = false; + + if (Likely) { + if (Opc == (*Likely ? Instruction::And : Instruction::Or)) + // Its likely we will have to compute both lhs and rhs of condition + CostThresh += Params.LikelyBias; + else { + if (Params.UnlikelyBias < 0) + return false; + // Its likely we will get an early out. + CostThresh -= Params.UnlikelyBias; + } + } + } + + if (CostThresh <= 0) + return false; + + // Collect "all" instructions that lhs condition is dependent on. + // Use map for stable iteration (to avoid non-determanism of iteration of + // SmallPtrSet). The `bool` value is just a dummy. + SmallMapVector<const Instruction *, bool, 8> LhsDeps, RhsDeps; + collectInstructionDeps(&LhsDeps, Lhs); + // Collect "all" instructions that rhs condition is dependent on AND are + // dependencies of lhs. This gives us an estimate on which instructions we + // stand to save by splitting the condition. + if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps)) + return false; + // Add the compare instruction itself unless its a dependency on the LHS. + if (const auto *RhsI = dyn_cast<Instruction>(Rhs)) + if (!LhsDeps.contains(RhsI)) + RhsDeps.try_emplace(RhsI, false); + + const auto &TLI = DAG.getTargetLoweringInfo(); + const auto &TTI = + TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); + + InstructionCost CostOfIncluding = 0; + // See if this instruction will need to computed independently of whether RHS + // is. + Value *BrCond = I.getCondition(); + auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) { + for (const auto *U : Ins->users()) { + // If user is independent of RHS calculation we don't need to count it. + if (auto *UIns = dyn_cast<Instruction>(U)) + if (UIns != BrCond && !RhsDeps.contains(UIns)) + return false; + } + return true; + }; + + // Prune instructions from RHS Deps that are dependencies of unrelated + // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly + // arbitrary and just meant to cap the how much time we spend in the pruning + // loop. Its highly unlikely to come into affect. + const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth; + // Stop after a certain point. No incorrectness from including too many + // instructions. + for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) { + const Instruction *ToDrop = nullptr; + for (const auto &InsPair : RhsDeps) { + if (!ShouldCountInsn(InsPair.first)) { + ToDrop = InsPair.first; + break; + } + } + if (ToDrop == nullptr) + break; + RhsDeps.erase(ToDrop); + } + + for (const auto &InsPair : RhsDeps) { + // Finally accumulate latency that we can only attribute to computing the + // RHS condition. Use latency because we are essentially trying to calculate + // the cost of the dependency chain. + // Possible TODO: We could try to estimate ILP and make this more precise. + CostOfIncluding += + TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency); + + if (CostOfIncluding > CostThresh) + return false; + } + return true; +} + void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -2646,8 +2831,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) Opcode = Instruction::Or; - if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && - match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + if (Opcode && + !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) && + !shouldKeepJumpConditionsTogether( + FuncInfo, I, Opcode, BOp0, BOp1, + DAG.getTargetLoweringInfo().getJumpConditionMergingParams( + Opcode, BOp0, BOp1))) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), getEdgeProbability(BrMBB, Succ1MBB), @@ -2870,7 +3060,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; MachineMemOperand *MemRef = MF.getMachineMemOperand( - MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy)); + MPInfo, Flags, LocationSize::precise(PtrTy.getSizeInBits() / 8), + DAG.getEVTAlign(PtrTy)); DAG.setNodeMemRefs(Node, {MemRef}); } if (PtrTy != PtrMemTy) @@ -3132,12 +3323,12 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { const BasicBlock *EHPadBB = I.getSuccessor(1); MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB]; - // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // Deopt and ptrauth bundles are lowered in helper functions, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition, LLVMContext::OB_gc_live, LLVMContext::OB_funclet, - LLVMContext::OB_cfguardtarget, + LLVMContext::OB_cfguardtarget, LLVMContext::OB_ptrauth, LLVMContext::OB_clang_arc_attachedcall}) && "Cannot lower invokes with arbitrary operand bundles yet!"); @@ -3161,7 +3352,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { EHPadMBB->setMachineBlockAddressTaken(); break; case Intrinsic::experimental_patchpoint_void: - case Intrinsic::experimental_patchpoint_i64: + case Intrinsic::experimental_patchpoint: visitPatchpoint(I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: @@ -3172,7 +3363,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // special because it can be invoked, so we manually lower it to a DAG // node here. SmallVector<SDValue, 8> Ops; - Ops.push_back(getRoot()); // inchain + Ops.push_back(getControlRoot()); // inchain for the terminator node const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Ops.push_back( DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(), @@ -3182,12 +3373,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { break; } } - } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { + } else if (I.hasDeoptState()) { // Currently we do not lower any intrinsic calls with deopt operand bundles. // Eventually we will support lowering the @llvm.experimental.deoptimize // intrinsic, and right now there are no plans to support other intrinsics // with deopt state. LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); + } else if (I.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) { + LowerCallSiteWithPtrAuthBundle(cast<CallBase>(I), EHPadBB); } else { LowerCallTo(I, getValue(Callee), false, false, EHPadBB); } @@ -3349,11 +3542,13 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { return; // We may be able to ignore unreachable behind a noreturn call. - if (DAG.getTarget().Options.NoTrapAfterNoreturn) { - if (const CallInst *Call = dyn_cast_or_null<CallInst>(I.getPrevNode())) { - if (Call->doesNotReturn()) - return; - } + if (const CallInst *Call = dyn_cast_or_null<CallInst>(I.getPrevNode()); + Call && Call->doesNotReturn()) { + if (DAG.getTarget().Options.NoTrapAfterNoreturn) + return; + // Do not emit an additional trap instruction. + if (Call->isNonContinuableTrap()) + return; } DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); @@ -3440,12 +3635,8 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { Op2, Flags)); } -void SelectionDAGBuilder::visitICmp(const User &I) { - ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; - if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) - predicate = IC->getPredicate(); - else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) - predicate = ICmpInst::Predicate(IC->getPredicate()); +void SelectionDAGBuilder::visitICmp(const ICmpInst &I) { + ICmpInst::Predicate predicate = I.getPredicate(); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); @@ -3467,12 +3658,8 @@ void SelectionDAGBuilder::visitICmp(const User &I) { setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } -void SelectionDAGBuilder::visitFCmp(const User &I) { - FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; - if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) - predicate = FC->getPredicate(); - else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) - predicate = FCmpInst::Predicate(FC->getPredicate()); +void SelectionDAGBuilder::visitFCmp(const FCmpInst &I) { + FCmpInst::Predicate predicate = I.getPredicate(); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -3712,7 +3899,11 @@ void SelectionDAGBuilder::visitUIToFP(const User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); + SDNodeFlags Flags; + if (auto *PNI = dyn_cast<PossiblyNonNegInst>(&I)) + Flags.setNonNeg(PNI->hasNonNeg()); + + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N, Flags)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { @@ -4306,6 +4497,17 @@ static const MDNode *getRangeMetadata(const Instruction &I) { return I.getMetadata(LLVMContext::MD_range); } +static std::optional<ConstantRange> getRange(const Instruction &I) { + if (const auto *CB = dyn_cast<CallBase>(&I)) { + // see comment in getRangeMetadata about this check + if (CB->hasRetAttr(Attribute::NoUndef)) + return CB->getRange(); + } + if (const MDNode *Range = getRangeMetadata(I)) + return getConstantRangeFromMetadata(*Range); + return std::nullopt; +} + void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); @@ -4331,7 +4533,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<TypeSize, 4> Offsets; - ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0); + ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -4499,7 +4701,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<TypeSize, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0); + SrcV->getType(), ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -4554,24 +4756,24 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, SDLoc sdl = getCurSDLoc(); auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, - MaybeAlign &Alignment) { + Align &Alignment) { // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); - Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue(); + Alignment = cast<ConstantInt>(I.getArgOperand(2))->getAlignValue(); Mask = I.getArgOperand(3); }; auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, - MaybeAlign &Alignment) { + Align &Alignment) { // llvm.masked.compressstore.*(Src0, Ptr, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Mask = I.getArgOperand(2); - Alignment = std::nullopt; + Alignment = I.getParamAlign(1).valueOrOne(); }; Value *PtrOperand, *MaskOperand, *Src0Operand; - MaybeAlign Alignment; + Align Alignment; if (IsCompressing) getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else @@ -4583,15 +4785,26 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); + + auto MMOFlags = MachineMemOperand::MOStore; + if (I.hasMetadata(LLVMContext::MD_nontemporal)) + MMOFlags |= MachineMemOperand::MONonTemporal; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata()); + MachinePointerInfo(PtrOperand), MMOFlags, + LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata()); + + const auto &TLI = DAG.getTargetLoweringInfo(); + const auto &TTI = + TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); SDValue StoreNode = - DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, - ISD::UNINDEXED, false /* Truncating */, IsCompressing); + !IsCompressing && + TTI.hasConditionalLoadStoreForType(I.getArgOperand(0)->getType()) + ? TLI.visitMaskedStore(DAG, sdl, getMemoryRoot(), MMO, Ptr, Src0, + Mask) + : DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, + VT, MMO, ISD::UNINDEXED, /*Truncating=*/false, + IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } @@ -4692,9 +4905,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOStore, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - MemoryLocation::UnknownSize, Alignment, I.getAAMetadata()); + LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata()); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); @@ -4720,24 +4931,24 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDLoc sdl = getCurSDLoc(); auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, - MaybeAlign &Alignment) { + Align &Alignment) { // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); + Alignment = cast<ConstantInt>(I.getArgOperand(1))->getAlignValue(); Mask = I.getArgOperand(2); Src0 = I.getArgOperand(3); }; auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, - MaybeAlign &Alignment) { + Align &Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = std::nullopt; + Alignment = I.getParamAlign(0).valueOrOne(); Mask = I.getArgOperand(1); Src0 = I.getArgOperand(2); }; Value *PtrOperand, *MaskOperand, *Src0Operand; - MaybeAlign Alignment; + Align Alignment; if (IsExpanding) getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else @@ -4749,9 +4960,6 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); - AAMDNodes AAInfo = I.getAAMetadata(); const MDNode *Ranges = getRangeMetadata(I); @@ -4761,16 +4969,31 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + auto MMOFlags = MachineMemOperand::MOLoad; + if (I.hasMetadata(LLVMContext::MD_nontemporal)) + MMOFlags |= MachineMemOperand::MONonTemporal; + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + MachinePointerInfo(PtrOperand), MMOFlags, + LocationSize::beforeOrAfterPointer(), Alignment, AAInfo, Ranges); - SDValue Load = - DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, - ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding); + const auto &TLI = DAG.getTargetLoweringInfo(); + const auto &TTI = + TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); + // The Load/Res may point to different values and both of them are output + // variables. + SDValue Load; + SDValue Res; + if (!IsExpanding && + TTI.hasConditionalLoadStoreForType(Src0Operand->getType())) + Res = TLI.visitMaskedLoad(DAG, sdl, InChain, MMO, Load, Ptr, Src0, Mask); + else + Res = Load = + DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, + ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding); if (AddToChain) PendingLoads.push_back(Load.getValue(1)); - setValue(&I, Load); + setValue(&I, Res); } void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { @@ -4799,9 +5022,8 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOLoad, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges); + LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(), + Ranges); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); @@ -4841,9 +5063,9 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), - DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering, - FailureOrdering); + MachinePointerInfo(I.getPointerOperand()), Flags, + LocationSize::precise(MemVT.getStoreSize()), DAG.getEVTAlign(MemVT), + AAMDNodes(), nullptr, SSID, SuccessOrdering, FailureOrdering); SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, @@ -4895,8 +5117,9 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), - DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering); + MachinePointerInfo(I.getPointerOperand()), Flags, + LocationSize::precise(MemVT.getStoreSize()), DAG.getEVTAlign(MemVT), + AAMDNodes(), nullptr, SSID, Ordering); SDValue L = DAG.getAtomic(NT, dl, MemVT, InChain, @@ -4941,8 +5164,9 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), - I.getAlign(), AAMDNodes(), nullptr, SSID, Order); + MachinePointerInfo(I.getPointerOperand()), Flags, + LocationSize::precise(MemVT.getStoreSize()), I.getAlign(), AAMDNodes(), + nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); @@ -4978,8 +5202,9 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), - I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering); + MachinePointerInfo(I.getPointerOperand()), Flags, + LocationSize::precise(MemVT.getStoreSize()), I.getAlign(), AAMDNodes(), + nullptr, SSID, Ordering); SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) @@ -5064,6 +5289,17 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Create the node. SDValue Result; + + if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + SDValue ConvControlToken = getValue(Token); + assert(Ops.back().getValueType() != MVT::Glue && + "Did not expected another glue node here."); + ConvControlToken = + DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); + Ops.push_back(ConvControlToken); + } + // In some cases, custom collection of operands from CallInst I may be needed. TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); if (IsTgtIntrinsic) { @@ -5106,9 +5342,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); } - - setValue(&I, Result); } + + setValue(&I, Result); } /// GetSignificand - Get the significand and build it into a floating-point @@ -5831,12 +6067,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( // incorrect hoisting of the DBG_VALUE to the function entry). // Notice that we allow one dbg.value per IR level argument, to accommodate // for the situation with fragments above. + // If there is no node for the value being handled, we return true to skip + // the normal generation of debug info, as it would kill existing debug + // info for the parameter in case of duplicates. if (VariableIsFunctionInputArg) { unsigned ArgNo = Arg->getArgNo(); if (ArgNo >= FuncInfo.DescribedArgs.size()) FuncInfo.DescribedArgs.resize(ArgNo + 1, false); else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo)) - return false; + return !NodeMap[V].getNode(); FuncInfo.DescribedArgs.set(ArgNo); } } @@ -6065,6 +6304,85 @@ bool SelectionDAGBuilder::visitEntryValueDbgValue( } /// Lower the call to the specified intrinsic function. +void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I, + unsigned Intrinsic) { + SDLoc sdl = getCurSDLoc(); + switch (Intrinsic) { + case Intrinsic::experimental_convergence_anchor: + setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ANCHOR, sdl, MVT::Untyped)); + break; + case Intrinsic::experimental_convergence_entry: + setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ENTRY, sdl, MVT::Untyped)); + break; + case Intrinsic::experimental_convergence_loop: { + auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl); + auto *Token = Bundle->Inputs[0].get(); + setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_LOOP, sdl, MVT::Untyped, + getValue(Token))); + break; + } + } +} + +void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I, + unsigned IntrinsicID) { + // For now, we're only lowering an 'add' histogram. + // We can add others later, e.g. saturating adds, min/max. + assert(IntrinsicID == Intrinsic::experimental_vector_histogram_add && + "Tried to lower unsupported histogram type"); + SDLoc sdl = getCurSDLoc(); + Value *Ptr = I.getOperand(0); + SDValue Inc = getValue(I.getOperand(1)); + SDValue Mask = getValue(I.getOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + DataLayout TargetDL = DAG.getDataLayout(); + EVT VT = Inc.getValueType(); + Align Alignment = DAG.getEVTAlign(VT); + + const MDNode *Ranges = getRangeMetadata(I); + + SDValue Root = DAG.getRoot(); + SDValue Base; + SDValue Index; + ISD::MemIndexType IndexType; + SDValue Scale; + bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, + I.getParent(), VT.getScalarStoreSize()); + + unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), + MachineMemOperand::MOLoad | MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges); + + if (!UniformBase) { + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; + Scale = + DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); + } + + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index); + } + + SDValue ID = DAG.getTargetConstant(IntrinsicID, sdl, MVT::i32); + + SDValue Ops[] = {Root, Inc, Mask, Base, Index, Scale, ID}; + SDValue Histogram = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), VT, sdl, + Ops, MMO, IndexType); + + setValue(&I, Histogram); + DAG.setRoot(Histogram); +} + +/// Lower the call to the specified intrinsic function. void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -6142,14 +6460,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Align SrcAlign = MCI.getSourceAlign().valueOrOne(); Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy( - Root, sdl, Op1, Op2, Op3, Alignment, isVol, - /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); + SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, + /* AlwaysInline */ false, &I, std::nullopt, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -6164,13 +6482,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Align SrcAlign = MCI.getSourceAlign().valueOrOne(); Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. - SDValue MC = DAG.getMemcpy( - getRoot(), sdl, Dst, Src, Size, Alignment, isVol, - /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, + /* AlwaysInline */ true, &I, std::nullopt, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -6182,11 +6500,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // @llvm.memset defines 0 and 1 to both mean no alignment. Align Alignment = MSI.getDestAlign().valueOrOne(); bool isVol = MSI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); SDValue MS = DAG.getMemset( Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, - isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); + &I, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); updateDAGForMaybeTailCall(MS); return; } @@ -6199,10 +6516,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // @llvm.memset defines 0 and 1 to both mean no alignment. Align DstAlign = MSII.getDestAlign().valueOrOne(); bool isVol = MSII.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol, - /* AlwaysInline */ true, isTC, + /* AlwaysInline */ true, &I, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); updateDAGForMaybeTailCall(MC); @@ -6218,12 +6534,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Align SrcAlign = MMI.getSourceAlign().valueOrOne(); Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MMI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memmove DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, - isTC, MachinePointerInfo(I.getArgOperand(0)), + SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, &I, + /* OverrideTailCall */ std::nullopt, + MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MM); @@ -6329,7 +6645,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } case Intrinsic::dbg_assign: { - // Debug intrinsics are handled seperately in assignment tracking mode. + // Debug intrinsics are handled separately in assignment tracking mode. if (AssignmentTrackingEnabled) return; // If assignment tracking hasn't been enabled then fall through and treat @@ -6337,7 +6653,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, [[fallthrough]]; } case Intrinsic::dbg_value: { - // Debug intrinsics are handled seperately in assignment tracking mode. + // Debug intrinsics are handled separately in assignment tracking mode. if (AssignmentTrackingEnabled) return; const DbgValueInst &DI = cast<DbgValueInst>(I); @@ -6473,6 +6789,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::fabs: case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::tan: + case Intrinsic::asin: + case Intrinsic::acos: + case Intrinsic::atan: + case Intrinsic::sinh: + case Intrinsic::cosh: + case Intrinsic::tanh: case Intrinsic::exp10: case Intrinsic::floor: case Intrinsic::ceil: @@ -6483,22 +6806,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::roundeven: case Intrinsic::canonicalize: { unsigned Opcode; + // clang-format off switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; - case Intrinsic::fabs: Opcode = ISD::FABS; break; - case Intrinsic::sin: Opcode = ISD::FSIN; break; - case Intrinsic::cos: Opcode = ISD::FCOS; break; - case Intrinsic::exp10: Opcode = ISD::FEXP10; break; - case Intrinsic::floor: Opcode = ISD::FFLOOR; break; - case Intrinsic::ceil: Opcode = ISD::FCEIL; break; - case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; - case Intrinsic::rint: Opcode = ISD::FRINT; break; - case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; - case Intrinsic::round: Opcode = ISD::FROUND; break; - case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break; + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::fabs: Opcode = ISD::FABS; break; + case Intrinsic::sin: Opcode = ISD::FSIN; break; + case Intrinsic::cos: Opcode = ISD::FCOS; break; + case Intrinsic::tan: Opcode = ISD::FTAN; break; + case Intrinsic::asin: Opcode = ISD::FASIN; break; + case Intrinsic::acos: Opcode = ISD::FACOS; break; + case Intrinsic::atan: Opcode = ISD::FATAN; break; + case Intrinsic::sinh: Opcode = ISD::FSINH; break; + case Intrinsic::cosh: Opcode = ISD::FCOSH; break; + case Intrinsic::tanh: Opcode = ISD::FTANH; break; + case Intrinsic::exp10: Opcode = ISD::FEXP10; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break; case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } + // clang-format on setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -6510,6 +6842,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::lrint: case Intrinsic::llrint: { unsigned Opcode; + // clang-format off switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::lround: Opcode = ISD::LROUND; break; @@ -6517,6 +6850,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::lrint: Opcode = ISD::LRINT; break; case Intrinsic::llrint: Opcode = ISD::LLRINT; break; } + // clang-format on EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(Opcode, sdl, RetVT, @@ -6710,7 +7044,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, auto MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, + MPI, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), TempAlign); Chain = DAG.getGetFPEnv(Chain, sdl, Temp, EnvVT, MMO); Res = DAG.getLoad(EnvVT, sdl, Chain, Temp, MPI); @@ -6739,7 +7073,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Chain = DAG.getStore(Chain, sdl, Env, Temp, MPI, TempAlign, MachineMemOperand::MOStore); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, + MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), TempAlign); Chain = DAG.getSetFPEnv(Chain, sdl, Temp, EnvVT, MMO); } @@ -6781,6 +7115,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.setRoot(Res.getValue(1)); return; } + case Intrinsic::readsteadycounter: { + SDValue Op = getRoot(); + Res = DAG.getNode(ISD::READSTEADYCOUNTER, sdl, + DAG.getVTList(MVT::i64, MVT::Other), Op); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return; + } case Intrinsic::bitreverse: setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -6918,6 +7260,20 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1)); return; } + case Intrinsic::scmp: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::SCMP, sdl, DestVT, Op1, Op2)); + break; + } + case Intrinsic::ucmp: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::UCMP, sdl, DestVT, Op1, Op2)); + break; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -6948,11 +7304,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::stackguard: { MachineFunction &MF = DAG.getMachineFunction(); const Module &M = *MF.getFunction().getParent(); + EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); + Res = DAG.getPtrExtOrTrunc(Res, sdl, PtrTy); } else { - EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Global = TLI.getSDagStackGuard(M); Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), @@ -7018,8 +7375,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::codeview_annotation: { // Emit a label associated with this metadata. MachineFunction &MF = DAG.getMachineFunction(); - MCSymbol *Label = - MF.getMMI().getContext().createTempSymbol("annotation", true); + MCSymbol *Label = MF.getContext().createTempSymbol("annotation", true); Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); MF.addCodeViewAnnotation(Label, cast<MDNode>(MD)); Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); @@ -7117,6 +7473,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } + case Intrinsic::allow_runtime_check: + case Intrinsic::allow_ubsan_check: + setValue(&I, getValue(ConstantInt::getTrue(I.getType()))); + return; + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::usub_with_overflow: @@ -7223,11 +7584,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::invariant_end: // Discard region information. return; - case Intrinsic::clear_cache: - /// FunctionName may be null. - if (const char *FunctionName = TLI.getClearCacheBuiltinName()) - lowerCallToExternalSymbol(I, FunctionName); + case Intrinsic::clear_cache: { + SDValue InputChain = DAG.getRoot(); + SDValue StartVal = getValue(I.getArgOperand(0)); + SDValue EndVal = getValue(I.getArgOperand(1)); + Res = DAG.getNode(ISD::CLEAR_CACHE, sdl, DAG.getVTList(MVT::Other), + {InputChain, StartVal, EndVal}); + setValue(&I, Res); + DAG.setRoot(Res); return; + } case Intrinsic::donothing: case Intrinsic::seh_try_begin: case Intrinsic::seh_scope_begin: @@ -7239,7 +7605,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, visitStackmap(I); return; case Intrinsic::experimental_patchpoint_void: - case Intrinsic::experimental_patchpoint_i64: + case Intrinsic::experimental_patchpoint: visitPatchpoint(I); return; case Intrinsic::experimental_gc_statepoint: @@ -7263,8 +7629,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, llvm_unreachable("instrprof failed to lower mcdc parameters"); case Intrinsic::instrprof_mcdc_tvbitmap_update: llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update"); - case Intrinsic::instrprof_mcdc_condbitmap_update: - llvm_unreachable("instrprof failed to lower an mcdc condbitmap update"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); @@ -7279,9 +7643,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, assert(FuncInfo.StaticAllocaMap.count(Slot) && "can only escape static allocas"); int FI = FuncInfo.StaticAllocaMap[Slot]; - MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); + MCSymbol *FrameAllocSym = MF.getContext().getOrCreateFrameAllocSymbol( + GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) @@ -7300,9 +7663,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); unsigned IdxVal = unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max())); - MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); + MCSymbol *FrameAllocSym = MF.getContext().getOrCreateFrameAllocSymbol( + GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); Value *FP = I.getArgOperand(1); SDValue FPVal = getValue(FP); @@ -7544,9 +7906,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Ptr = getValue(I.getOperand(0)); SDValue Mask = getValue(I.getOperand(1)); - EVT PtrVT = Ptr.getValueType(); - assert(PtrVT == Mask.getValueType() && - "Pointers with different index type are not supported by SDAG"); + // On arm64_32, pointers are 32 bits when stored in memory, but + // zero-extended to 64 bits when in registers. Thus the mask is 32 bits to + // match the index type, but the pointer is 64 bits, so the the mask must be + // zero-extended up to 64 bits to match the pointer. + EVT PtrVT = + TLI.getValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); + EVT MemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); + assert(PtrVT == Ptr.getValueType()); + assert(MemVT == Mask.getValueType()); + if (MemVT != PtrVT) + Mask = DAG.getPtrExtOrTrunc(Mask, sdl, PtrVT); + setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask)); return; } @@ -7612,6 +7984,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Trunc); return; } + case Intrinsic::experimental_vector_partial_reduce_add: { + SDValue OpNode = getValue(I.getOperand(1)); + EVT ReducedTy = EVT::getEVT(I.getType()); + EVT FullTy = OpNode.getValueType(); + + unsigned Stride = ReducedTy.getVectorMinNumElements(); + unsigned ScaleFactor = FullTy.getVectorMinNumElements() / Stride; + + // Collect all of the subvectors + std::deque<SDValue> Subvectors; + Subvectors.push_back(getValue(I.getOperand(0))); + for (unsigned i = 0; i < ScaleFactor; i++) { + auto SourceIndex = DAG.getVectorIdxConstant(i * Stride, sdl); + Subvectors.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ReducedTy, + {OpNode, SourceIndex})); + } + + // Flatten the subvector tree + while (Subvectors.size() > 1) { + Subvectors.push_back(DAG.getNode(ISD::ADD, sdl, ReducedTy, + {Subvectors[0], Subvectors[1]})); + Subvectors.pop_front(); + Subvectors.pop_front(); + } + + assert(Subvectors.size() == 1 && + "There should only be one subvector after tree flattening"); + + setValue(&I, Subvectors[0]); + return; + } case Intrinsic::experimental_cttz_elts: { auto DL = getCurSDLoc(); SDValue Op = getValue(I.getOperand(0)); @@ -7630,20 +8033,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op = DAG.getSetCC(DL, OpVT, Op, AllZero, ISD::SETNE); } - // Find the smallest "sensible" element type to use for the expansion. - ConstantRange CR( - APInt(64, OpVT.getVectorElementCount().getKnownMinValue())); - if (OpVT.isScalableVT()) - CR = CR.umul_sat(getVScaleRange(I.getCaller(), 64)); - // If the zero-is-poison flag is set, we can assume the upper limit // of the result is VF-1. - if (!cast<ConstantSDNode>(getValue(I.getOperand(1)))->isZero()) - CR = CR.subtract(APInt(64, 1)); - - unsigned EltWidth = I.getType()->getScalarSizeInBits(); - EltWidth = std::min(EltWidth, (unsigned)CR.getActiveBits()); - EltWidth = std::max(llvm::bit_ceil(EltWidth), (unsigned)8); + bool ZeroIsPoison = + !cast<ConstantSDNode>(getValue(I.getOperand(1)))->isZero(); + ConstantRange VScaleRange(1, true); // Dummy value. + if (isa<ScalableVectorType>(I.getOperand(0)->getType())) + VScaleRange = getVScaleRange(I.getCaller(), 64); + unsigned EltWidth = TLI.getBitWidthForCttzElements( + I.getType(), OpVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange); MVT NewEltTy = MVT::getIntegerVT(EltWidth); @@ -7699,21 +8097,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index)); return; } - case Intrinsic::experimental_vector_reverse: + case Intrinsic::vector_reverse: visitVectorReverse(I); return; - case Intrinsic::experimental_vector_splice: + case Intrinsic::vector_splice: visitVectorSplice(I); return; case Intrinsic::callbr_landingpad: visitCallBrLandingPad(I); return; - case Intrinsic::experimental_vector_interleave2: + case Intrinsic::vector_interleave2: visitVectorInterleave(I); return; - case Intrinsic::experimental_vector_deinterleave2: + case Intrinsic::vector_deinterleave2: visitVectorDeinterleave(I); return; + case Intrinsic::experimental_vector_compress: + setValue(&I, DAG.getNode(ISD::VECTOR_COMPRESS, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); + return; + case Intrinsic::experimental_convergence_anchor: + case Intrinsic::experimental_convergence_entry: + case Intrinsic::experimental_convergence_loop: + visitConvergenceControl(I, Intrinsic); + return; + case Intrinsic::experimental_vector_histogram_add: { + visitVectorHistogram(I, Intrinsic); + return; + } } } @@ -7727,16 +8141,8 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( SDValue Chain = DAG.getRoot(); SmallVector<SDValue, 4> Opers; Opers.push_back(Chain); - if (FPI.isUnaryOp()) { - Opers.push_back(getValue(FPI.getArgOperand(0))); - } else if (FPI.isTernaryOp()) { - Opers.push_back(getValue(FPI.getArgOperand(0))); - Opers.push_back(getValue(FPI.getArgOperand(1))); - Opers.push_back(getValue(FPI.getArgOperand(2))); - } else { - Opers.push_back(getValue(FPI.getArgOperand(0))); - Opers.push_back(getValue(FPI.getArgOperand(1))); - } + for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I) + Opers.push_back(getValue(FPI.getArgOperand(I))); auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) { assert(Result.getNode()->getNumValues() == 2); @@ -7841,6 +8247,11 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ; break; } + case Intrinsic::vp_cttz_elts: { + bool IsZeroPoison = cast<ConstantInt>(VPIntrin.getArgOperand(1))->isOne(); + ResOPC = IsZeroPoison ? ISD::VP_CTTZ_ELTS_ZERO_UNDEF : ISD::VP_CTTZ_ELTS; + break; + } #define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ case Intrinsic::VPID: \ ResOPC = ISD::VPSD; \ @@ -7880,7 +8291,7 @@ void SelectionDAGBuilder::visitVPLoad( SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], MMO, false /*IsExpanding */); if (AddToChain) @@ -7903,8 +8314,8 @@ void SelectionDAGBuilder::visitVPGather( unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + MachinePointerInfo(AS), MachineMemOperand::MOLoad, + LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); SDValue Base, Index, Scale; ISD::MemIndexType IndexType; bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, @@ -7944,7 +8355,7 @@ void SelectionDAGBuilder::visitVPStore( SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, *Alignment, AAInfo); + LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, /* IsTruncating */ false, /*IsCompressing*/ false); @@ -7967,7 +8378,7 @@ void SelectionDAGBuilder::visitVPScatter( PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, *Alignment, AAInfo); + LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); SDValue Base, Index, Scale; ISD::MemIndexType IndexType; bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, @@ -8007,9 +8418,10 @@ void SelectionDAGBuilder::visitVPStridedLoad( MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + MachinePointerInfo(AS), MachineMemOperand::MOLoad, + LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], OpValues[3], MMO, @@ -8029,9 +8441,10 @@ void SelectionDAGBuilder::visitVPStridedStore( if (!Alignment) Alignment = DAG.getEVTAlign(VT.getScalarType()); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, *Alignment, AAInfo); + MachinePointerInfo(AS), MachineMemOperand::MOStore, + LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); SDValue ST = DAG.getStridedStoreVP( getMemoryRoot(), DL, OpValues[0], OpValues[1], @@ -8191,7 +8604,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( case ISD::VP_CTLZ: case ISD::VP_CTLZ_ZERO_UNDEF: case ISD::VP_CTTZ: - case ISD::VP_CTTZ_ZERO_UNDEF: { + case ISD::VP_CTTZ_ZERO_UNDEF: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + case ISD::VP_CTTZ_ELTS: { SDValue Result = DAG.getNode(Opcode, DL, VTs, {OpValues[0], OpValues[2], OpValues[3]}); setValue(&VPIntrin, Result); @@ -8208,7 +8623,7 @@ SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain, // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. - BeginLabel = MMI.getContext().createTempSymbol(); + BeginLabel = MF.getContext().createTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. @@ -8230,11 +8645,10 @@ SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II, assert(BeginLabel && "BeginLabel should've been set"); MachineFunction &MF = DAG.getMachineFunction(); - MachineModuleInfo &MMI = MF.getMMI(); // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. - MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); + MCSymbol *EndLabel = MF.getContext().createTempSymbol(); Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel); // Inform MachineModuleInfo of range. @@ -8289,15 +8703,16 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (EHPadBB) { DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB, BeginLabel)); + Result.second = getRoot(); } return Result; } void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, - bool isTailCall, - bool isMustTailCall, - const BasicBlock *EHPadBB) { + bool isTailCall, bool isMustTailCall, + const BasicBlock *EHPadBB, + const TargetLowering::PtrAuthInfo *PAI) { auto &DL = DAG.getDataLayout(); FunctionType *FTy = CB.getFunctionType(); Type *RetTy = CB.getType(); @@ -8388,6 +8803,12 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, } } + SDValue ConvControlToken; + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + ConvControlToken = getValue(Token); + } + TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) @@ -8396,7 +8817,17 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, .setConvergent(CB.isConvergent()) .setIsPreallocated( CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) - .setCFIType(CFIType); + .setCFIType(CFIType) + .setConvergenceControlToken(ConvControlToken); + + // Set the pointer authentication info if we have it. + if (PAI) { + if (!TLI.supportPtrAuthBundles()) + report_fatal_error( + "This target doesn't support calls with ptrauth operand bundles."); + CLI.setPtrAuth(*PAI); + } + std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { @@ -8609,11 +9040,10 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue Root = getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, false, false, - /*isTailCall=*/false, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + Root, sdl, Dst, Src, Size, Alignment, false, false, /*CI=*/nullptr, + std::nullopt, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata()); assert(MC.getNode() != nullptr && "** memcpy should not be lowered as TailCall in mempcpy context **"); DAG.setRoot(MC); @@ -8837,6 +9267,48 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: + if (visitUnaryFloatCall(I, ISD::FTAN)) + return; + break; + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asinl: + if (visitUnaryFloatCall(I, ISD::FASIN)) + return; + break; + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acosl: + if (visitUnaryFloatCall(I, ISD::FACOS)) + return; + break; + case LibFunc_atan: + case LibFunc_atanf: + case LibFunc_atanl: + if (visitUnaryFloatCall(I, ISD::FATAN)) + return; + break; + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: + if (visitUnaryFloatCall(I, ISD::FSINH)) + return; + break; + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + if (visitUnaryFloatCall(I, ISD::FCOSH)) + return; + break; + case LibFunc_tanh: + case LibFunc_tanhf: + case LibFunc_tanhl: + if (visitUnaryFloatCall(I, ISD::FTANH)) + return; + break; case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: @@ -8942,18 +9414,24 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } } + if (I.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) { + LowerCallSiteWithPtrAuthBundle(cast<CallBase>(I), /*EHPadBB=*/nullptr); + return; + } + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. // CFGuardTarget bundles are lowered in LowerCallTo. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet, LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, - LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) && + LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi, + LLVMContext::OB_convergencectrl}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledOperand()); - if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) + if (I.hasDeoptState()) LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); else // Check if we can potentially perform a tail call. More detailed checking @@ -8962,6 +9440,39 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall()); } +void SelectionDAGBuilder::LowerCallSiteWithPtrAuthBundle( + const CallBase &CB, const BasicBlock *EHPadBB) { + auto PAB = CB.getOperandBundle("ptrauth"); + const Value *CalleeV = CB.getCalledOperand(); + + // Gather the call ptrauth data from the operand bundle: + // [ i32 <key>, i64 <discriminator> ] + const auto *Key = cast<ConstantInt>(PAB->Inputs[0]); + const Value *Discriminator = PAB->Inputs[1]; + + assert(Key->getType()->isIntegerTy(32) && "Invalid ptrauth key"); + assert(Discriminator->getType()->isIntegerTy(64) && + "Invalid ptrauth discriminator"); + + // Look through ptrauth constants to find the raw callee. + // Do a direct unauthenticated call if we found it and everything matches. + if (const auto *CalleeCPA = dyn_cast<ConstantPtrAuth>(CalleeV)) + if (CalleeCPA->isKnownCompatibleWith(Key, Discriminator, + DAG.getDataLayout())) + return LowerCallTo(CB, getValue(CalleeCPA->getPointer()), CB.isTailCall(), + CB.isMustTailCall(), EHPadBB); + + // Functions should never be ptrauth-called directly. + assert(!isa<Function>(CalleeV) && "invalid direct ptrauth call"); + + // Otherwise, do an authenticated indirect call. + TargetLowering::PtrAuthInfo PAI = {Key->getZExtValue(), + getValue(Discriminator)}; + + LowerCallTo(CB, getValue(CalleeV), CB.isTailCall(), CB.isMustTailCall(), + EHPadBB, &PAI); +} + namespace { /// AsmOperandInfo - This contains information for each constraint that we are @@ -9055,10 +9566,15 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, // Otherwise, create a stack slot and emit a store to it before the asm. Type *Ty = OpVal->getType(); auto &DL = DAG.getDataLayout(); - uint64_t TySize = DL.getTypeAllocSize(Ty); + TypeSize TySize = DL.getTypeAllocSize(Ty); MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo().CreateStackObject( - TySize, DL.getPrefTypeAlign(Ty), false); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + int StackID = 0; + if (TySize.isScalable()) + StackID = TFI->getStackIDForScalableVectors(); + int SSFI = MF.getFrameInfo().CreateStackObject(TySize.getKnownMinValue(), + DL.getPrefTypeAlign(Ty), false, + nullptr, StackID); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI), @@ -9629,9 +10145,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, break; } - assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || - OpInfo.ConstraintType == TargetLowering::C_Register) && - "Unknown constraint type!"); + if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && + OpInfo.ConstraintType != TargetLowering::C_Register) { + emitInlineAsmError(Call, "unknown asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } // TODO: Support this. if (OpInfo.isIndirect) { @@ -9812,8 +10331,8 @@ void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call, return; SmallVector<SDValue, 1> Ops; - for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i) - Ops.push_back(DAG.getUNDEF(ValueVTs[i])); + for (const EVT &VT : ValueVTs) + Ops.push_back(DAG.getUNDEF(VT)); setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc())); } @@ -9859,19 +10378,16 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op) { - const MDNode *Range = getRangeMetadata(I); - if (!Range) - return Op; + std::optional<ConstantRange> CR = getRange(I); - ConstantRange CR = getConstantRangeFromMetadata(*Range); - if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped()) + if (!CR || CR->isFullSet() || CR->isEmptySet() || CR->isUpperWrapped()) return Op; - APInt Lo = CR.getUnsignedMin(); + APInt Lo = CR->getUnsignedMin(); if (!Lo.isMinValue()) return Op; - APInt Hi = CR.getUnsignedMax(); + APInt Hi = CR->getUnsignedMax(); unsigned Bits = std::max(Hi.getActiveBits(), static_cast<unsigned>(IntegerType::MIN_INT_BITS)); @@ -10035,12 +10551,12 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { /// Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB) { - // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, - // i32 <numBytes>, - // i8* <target>, - // i32 <numArgs>, - // [Args...], - // [live variables...]) + // <ty> @llvm.experimental.patchpoint.<ty>(i64 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) CallingConv::ID CC = CB.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; @@ -10078,6 +10594,8 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); + if (CallEnd->getOpcode() == ISD::EH_LABEL) + CallEnd = CallEnd->getOperand(0).getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) CallEnd = CallEnd->getOperand(0).getNode(); @@ -10279,14 +10797,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { CLI.Ins.clear(); Type *OrigRetTy = CLI.RetTy; SmallVector<EVT, 4> RetTys; - SmallVector<uint64_t, 4> Offsets; + SmallVector<TypeSize, 4> Offsets; auto &DL = CLI.DAG.getDataLayout(); - ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets, 0); + ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); if (CLI.IsPostTypeLegalization) { // If we are lowering a libcall after legalization, split the return type. SmallVector<EVT, 4> OldRetTys; - SmallVector<uint64_t, 4> OldOffsets; + SmallVector<TypeSize, 4> OldOffsets; RetTys.swap(OldRetTys); Offsets.swap(OldOffsets); @@ -10298,7 +10816,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; RetTys.append(NumRegs, RegisterVT); for (unsigned j = 0; j != NumRegs; ++j) - Offsets.push_back(Offset + j * RegisterVTByteSZ); + Offsets.push_back(TypeSize::getFixed(Offset + j * RegisterVTByteSZ)); } } @@ -11555,17 +12073,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // table branch. if (FallthroughUnreachable) { Function &CurFunc = CurMF->getFunction(); - bool HasBranchTargetEnforcement = false; - if (CurFunc.hasFnAttribute("branch-target-enforcement")) { - HasBranchTargetEnforcement = - CurFunc.getFnAttribute("branch-target-enforcement") - .getValueAsBool(); - } else { - HasBranchTargetEnforcement = - CurMF->getMMI().getModule()->getModuleFlag( - "branch-target-enforcement"); - } - if (!HasBranchTargetEnforcement) + if (!CurFunc.hasFnAttribute("branch-target-enforcement")) JTH->FallthroughUnreachable = true; } @@ -11997,9 +12505,8 @@ void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) { // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node. if (VT.isScalableVector()) { - MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2, - DAG.getConstant(Imm, DL, IdxVT))); + DAG.getVectorIdxConstant(Imm, DL))); return; } @@ -12092,12 +12599,12 @@ void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) { // getRegistersForValue may produce 1 to many registers based on whether // the OpInfo.ConstraintVT is legal on the target or not. - for (size_t i = 0, e = OpInfo.AssignedRegs.Regs.size(); i != e; ++i) { + for (unsigned &Reg : OpInfo.AssignedRegs.Regs) { Register OriginalDef = FollowCopyChain(MRI, InitialDef++); if (Register::isPhysicalRegister(OriginalDef)) FuncInfo.MBB->addLiveIn(OriginalDef); // Update the assigned registers to use the original defs. - OpInfo.AssignedRegs.Regs[i] = OriginalDef; + Reg = OriginalDef; } SDValue V = OpInfo.AssignedRegs.getCopyFromRegs( diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 40e2f791f59e..1a98fbd7589f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -21,11 +21,11 @@ #include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" #include "llvm/Support/BranchProbability.h" @@ -385,6 +385,11 @@ public: N = NewN; } + bool shouldKeepJumpConditionsTogether( + const FunctionLoweringInfo &FuncInfo, const BranchInst &I, + Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs, + TargetLoweringBase::CondMergingParams Params) const; + void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, @@ -401,7 +406,8 @@ public: void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall, - bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr); + bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr, + const TargetLowering::PtrAuthInfo *PAI = nullptr); // Lower range metadata from 0 to N to assert zext to an integer of nearest // floor power of two. @@ -444,7 +450,7 @@ public: ArrayRef<const Use> GCTransitionArgs; /// The ID that the resulting STATEPOINT instruction has to report. - unsigned ID = -1; + uint64_t ID = -1; /// Information regarding the underlying call instruction. TargetLowering::CallLoweringInfo CLI; @@ -485,6 +491,9 @@ public: bool VarArgDisallowed, bool ForceVoidReturnTy); + void LowerCallSiteWithPtrAuthBundle(const CallBase &CB, + const BasicBlock *EHPadBB); + /// Returns the type of FrameIndex and TargetFrameIndex nodes. MVT getFrameIndexTy() { return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()); @@ -554,8 +563,8 @@ private: void visitShl (const User &I) { visitShift(I, ISD::SHL); } void visitLShr(const User &I) { visitShift(I, ISD::SRL); } void visitAShr(const User &I) { visitShift(I, ISD::SRA); } - void visitICmp(const User &I); - void visitFCmp(const User &I); + void visitICmp(const ICmpInst &I); + void visitFCmp(const FCmpInst &I); // Visit the conversion instructions void visitTrunc(const User &I); void visitZExt(const User &I); @@ -618,6 +627,8 @@ private: void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); + void visitConvergenceControl(const CallInst &I, unsigned Intrinsic); + void visitVectorHistogram(const CallInst &I, unsigned IntrinsicID); void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl<SDValue> &OpValues); void visitVPStore(const VPIntrinsic &VPIntrin, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9ebef642e423..16fc52caebb7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -27,6 +26,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -75,6 +75,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { } return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; + // clang-format off #ifndef NDEBUG case ISD::DELETED_NODE: return "<<Deleted Node!>>"; #endif @@ -96,6 +97,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd"; + case ISD::ATOMIC_LOAD_FMIN: return "AtomicLoadFMin"; + case ISD::ATOMIC_LOAD_FMAX: return "AtomicLoadFMax"; case ISD::ATOMIC_LOAD_UINC_WRAP: return "AtomicLoadUIncWrap"; case ISD::ATOMIC_LOAD_UDEC_WRAP: @@ -104,6 +107,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_STORE: return "AtomicStore"; case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::READSTEADYCOUNTER: return "ReadSteadyCounter"; case ISD::SRCVALUE: return "SrcValue"; case ISD::MDNODE_SDNODE: return "MDNode"; case ISD::EntryToken: return "EntryToken"; @@ -123,6 +127,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ConstantFP: return "ConstantFP"; case ISD::GlobalAddress: return "GlobalAddress"; case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::PtrAuthGlobalAddress: return "PtrAuthGlobalAddress"; case ISD::FrameIndex: return "FrameIndex"; case ISD::JumpTable: return "JumpTable"; case ISD::JUMP_TABLE_DEBUG_INFO: @@ -164,6 +169,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { if (cast<ConstantSDNode>(this)->isOpaque()) return "OpaqueTargetConstant"; return "TargetConstant"; + case ISD::TargetConstantFP: return "TargetConstantFP"; case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; @@ -206,6 +212,20 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCOS: return "fcos"; case ISD::STRICT_FCOS: return "strict_fcos"; case ISD::FSINCOS: return "fsincos"; + case ISD::FTAN: return "ftan"; + case ISD::STRICT_FTAN: return "strict_ftan"; + case ISD::FASIN: return "fasin"; + case ISD::STRICT_FASIN: return "strict_fasin"; + case ISD::FACOS: return "facos"; + case ISD::STRICT_FACOS: return "strict_facos"; + case ISD::FATAN: return "fatan"; + case ISD::STRICT_FATAN: return "strict_fatan"; + case ISD::FSINH: return "fsinh"; + case ISD::STRICT_FSINH: return "strict_fsinh"; + case ISD::FCOSH: return "fcosh"; + case ISD::STRICT_FCOSH: return "strict_fcosh"; + case ISD::FTANH: return "ftanh"; + case ISD::STRICT_FTANH: return "strict_ftanh"; case ISD::FTRUNC: return "ftrunc"; case ISD::STRICT_FTRUNC: return "strict_ftrunc"; case ISD::FFLOOR: return "ffloor"; @@ -285,6 +305,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SMAX: return "smax"; case ISD::UMIN: return "umin"; case ISD::UMAX: return "umax"; + case ISD::SCMP: return "scmp"; + case ISD::UCMP: return "ucmp"; case ISD::FLDEXP: return "fldexp"; case ISD::STRICT_FLDEXP: return "strict_fldexp"; @@ -295,6 +317,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETCCCARRY: return "setcccarry"; case ISD::STRICT_FSETCC: return "strict_fsetcc"; case ISD::STRICT_FSETCCS: return "strict_fsetccs"; + case ISD::FPTRUNC_ROUND: return "fptrunc_round"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; @@ -379,7 +402,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16"; case ISD::BF16_TO_FP: return "bf16_to_fp"; + case ISD::STRICT_BF16_TO_FP: return "strict_bf16_to_fp"; case ISD::FP_TO_BF16: return "fp_to_bf16"; + case ISD::STRICT_FP_TO_BF16: return "strict_fp_to_bf16"; case ISD::LROUND: return "lround"; case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; @@ -409,6 +434,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::MSTORE: return "masked_store"; case ISD::MGATHER: return "masked_gather"; case ISD::MSCATTER: return "masked_scatter"; + case ISD::VECTOR_COMPRESS: return "vector_compress"; case ISD::VAARG: return "vaarg"; case ISD::VACOPY: return "vacopy"; case ISD::VAEND: return "vaend"; @@ -446,6 +472,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SET_FPMODE: return "set_fpmode"; case ISD::RESET_FPMODE: return "reset_fpmode"; + // Convergence control instructions + case ISD::CONVERGENCECTRL_ANCHOR: return "convergencectrl_anchor"; + case ISD::CONVERGENCECTRL_ENTRY: return "convergencectrl_entry"; + case ISD::CONVERGENCECTRL_LOOP: return "convergencectrl_loop"; + case ISD::CONVERGENCECTRL_GLUE: return "convergencectrl_glue"; + // Bit manipulation case ISD::ABS: return "abs"; case ISD::BITREVERSE: return "bitreverse"; @@ -461,6 +493,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::INIT_TRAMPOLINE: return "init_trampoline"; case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + // clang-format on + case ISD::CONDCODE: switch (cast<CondCodeSDNode>(this)->get()) { default: llvm_unreachable("Unknown setcc condition!"); @@ -513,6 +547,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { return "stackmap"; case ISD::PATCHPOINT: return "patchpoint"; + case ISD::CLEAR_CACHE: + return "clear_cache"; + + case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: + return "histogram"; // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ @@ -828,6 +867,18 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const MemSDNode *M = dyn_cast<MemSDNode>(this)) { OS << "<"; printMemOperand(OS, *M->getMemOperand(), G); + if (auto *A = dyn_cast<AtomicSDNode>(M)) + if (A->getOpcode() == ISD::ATOMIC_LOAD) { + bool doExt = true; + switch (A->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << A->getMemoryVT(); + } OS << ">"; } else if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(this)) { @@ -879,6 +930,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { MD->printAsOperand(OS, G->getMachineFunction().getFunction().getParent()); OS << ']'; } + + if (MDNode *MMRA = G ? G->getMMRAMetadata(this) : nullptr) { + OS << " [mmra "; + MMRA->printAsOperand(OS, + G->getMachineFunction().getFunction().getParent()); + OS << ']'; + } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 359d738d2ca0..df3d207d85d3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -48,7 +48,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -61,6 +60,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -78,6 +78,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PrintPasses.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" @@ -310,15 +311,6 @@ namespace llvm { } // end namespace llvm -// EmitInstrWithCustomInserter - This method should be implemented by targets -// that mark instructions with the 'usesCustomInserter' flag. These -// instructions are special in various ways, which require special support to -// insert. The specified MachineInstr is created but not inserted into any -// basic blocks, and this method is called to expand it into a sequence of -// instructions, potentially also creating new basic blocks and control flow. -// When new basic blocks are inserted and the edges from MBB to its successors -// are modified, the method should insert pairs of <OldSucc, NewSucc> into the -// DenseMap. MachineBasicBlock * TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -341,9 +333,49 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm, - CodeGenOptLevel OL) - : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()), +SelectionDAGISelLegacy::SelectionDAGISelLegacy( + char &ID, std::unique_ptr<SelectionDAGISel> S) + : MachineFunctionPass(ID), Selector(std::move(S)) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool SelectionDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) { + // If we already selected that function, we do not need to run SDISel. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected)) + return false; + + // Do some sanity-checking on the command-line options. + if (EnableFastISelAbort && !Selector->TM.Options.EnableFastISel) + report_fatal_error("-fast-isel-abort > 0 requires -fast-isel"); + + // Decide what flavour of variable location debug-info will be used, before + // we change the optimisation level. + MF.setUseDebugInstrRef(MF.shouldUseDebugInstrRef()); + + // Reset the target options before resetting the optimization + // level below. + // FIXME: This is a horrible hack and should be processed via + // codegen looking at the optimization level explicitly when + // it wants to look at it. + Selector->TM.resetTargetOptions(MF.getFunction()); + // Reset OptLevel to None for optnone functions. + CodeGenOptLevel NewOptLevel = skipFunction(MF.getFunction()) + ? CodeGenOptLevel::None + : Selector->OptLevel; + + Selector->MF = &MF; + OptLevelChanger OLC(*Selector, NewOptLevel); + Selector->initializeAnalysisResults(*this); + return Selector->runOnMachineFunction(MF); +} + +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL) + : TM(tm), FuncInfo(new FunctionLoweringInfo()), SwiftError(new SwiftErrorValueTracking()), CurDAG(new SelectionDAG(tm, OL)), SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError, @@ -361,14 +393,17 @@ SelectionDAGISel::~SelectionDAGISel() { delete SwiftError; } -void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { +void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + CodeGenOptLevel OptLevel = Selector->OptLevel; if (OptLevel != CodeGenOptLevel::None) AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); AU.addRequired<StackProtector>(); AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); +#ifndef NDEBUG AU.addRequired<TargetTransformInfoWrapperPass>(); +#endif AU.addRequired<AssumptionCacheTracker>(); if (UseMBPI && OptLevel != CodeGenOptLevel::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); @@ -406,66 +441,128 @@ static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F, } } -bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { +PreservedAnalyses +SelectionDAGISelPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { // If we already selected that function, we do not need to run SDISel. - if (mf.getProperties().hasProperty( + if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::Selected)) - return false; - // Do some sanity-checking on the command-line options. - assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && - "-fast-isel-abort > 0 requires -fast-isel"); - - const Function &Fn = mf.getFunction(); - MF = &mf; + return PreservedAnalyses::all(); -#ifndef NDEBUG - StringRef FuncName = Fn.getName(); - MatchFilterFuncName = isFunctionInPrintList(FuncName); -#else - (void)MatchFilterFuncName; -#endif + // Do some sanity-checking on the command-line options. + if (EnableFastISelAbort && !Selector->TM.Options.EnableFastISel) + report_fatal_error("-fast-isel-abort > 0 requires -fast-isel"); // Decide what flavour of variable location debug-info will be used, before // we change the optimisation level. - bool InstrRef = mf.shouldUseDebugInstrRef(); - mf.setUseDebugInstrRef(InstrRef); + MF.setUseDebugInstrRef(MF.shouldUseDebugInstrRef()); // Reset the target options before resetting the optimization // level below. // FIXME: This is a horrible hack and should be processed via // codegen looking at the optimization level explicitly when // it wants to look at it. - TM.resetTargetOptions(Fn); + Selector->TM.resetTargetOptions(MF.getFunction()); // Reset OptLevel to None for optnone functions. - CodeGenOptLevel NewOptLevel = OptLevel; - if (OptLevel != CodeGenOptLevel::None && skipFunction(Fn)) - NewOptLevel = CodeGenOptLevel::None; - OptLevelChanger OLC(*this, NewOptLevel); + // TODO: Add a function analysis to handle this. + Selector->MF = &MF; + // Reset OptLevel to None for optnone functions. + CodeGenOptLevel NewOptLevel = MF.getFunction().hasOptNone() + ? CodeGenOptLevel::None + : Selector->OptLevel; + + OptLevelChanger OLC(*Selector, NewOptLevel); + Selector->initializeAnalysisResults(MFAM); + Selector->runOnMachineFunction(MF); + + return getMachineFunctionPassPreservedAnalyses(); +} + +void SelectionDAGISel::initializeAnalysisResults( + MachineFunctionAnalysisManager &MFAM) { + auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(*MF) + .getManager(); + auto &MAMP = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(*MF); + Function &Fn = MF->getFunction(); +#ifndef NDEBUG + FuncName = Fn.getName(); + MatchFilterFuncName = isFunctionInPrintList(FuncName); +#else + (void)MatchFilterFuncName; +#endif TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn); - GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; + LibInfo = &FAM.getResult<TargetLibraryAnalysis>(Fn); + GFI = Fn.hasGC() ? &FAM.getResult<GCFunctionAnalysis>(Fn) : nullptr; ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn); - AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction()); - auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + AC = &FAM.getResult<AssumptionAnalysis>(Fn); + auto *PSI = MAMP.getCachedResult<ProfileSummaryAnalysis>(*Fn.getParent()); BlockFrequencyInfo *BFI = nullptr; + FAM.getResult<BlockFrequencyAnalysis>(Fn); if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) - BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); + BFI = &FAM.getResult<BlockFrequencyAnalysis>(Fn); FunctionVarLocs const *FnVarLocs = nullptr; if (isAssignmentTrackingEnabled(*Fn.getParent())) - FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults(); + FnVarLocs = &FAM.getResult<DebugAssignmentTrackingAnalysis>(Fn); + + auto *UA = FAM.getCachedResult<UniformityInfoAnalysis>(Fn); + CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, FnVarLocs); - ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << "\n"); + // Now get the optional analyzes if we want to. + // This is based on the possibly changed OptLevel (after optnone is taken + // into account). That's unfortunate but OK because it just means we won't + // ask for passes that have been required anyway. + + if (UseMBPI && OptLevel != CodeGenOptLevel::None) + FuncInfo->BPI = &FAM.getResult<BranchProbabilityAnalysis>(Fn); + else + FuncInfo->BPI = nullptr; + + if (OptLevel != CodeGenOptLevel::None) + AA = &FAM.getResult<AAManager>(Fn); + else + AA = nullptr; + + SP = &FAM.getResult<SSPLayoutAnalysis>(Fn); + +#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS + TTI = &FAM.getResult<TargetIRAnalysis>(Fn); +#endif +} + +void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { + Function &Fn = MF->getFunction(); +#ifndef NDEBUG + FuncName = Fn.getName(); + MatchFilterFuncName = isFunctionInPrintList(FuncName); +#else + (void)MatchFilterFuncName; +#endif + + TII = MF->getSubtarget().getInstrInfo(); + TLI = MF->getSubtarget().getTargetLowering(); + RegInfo = &MF->getRegInfo(); + LibInfo = &MFP.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn); + GFI = Fn.hasGC() ? &MFP.getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) + : nullptr; + ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn); + AC = &MFP.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(Fn); + auto *PSI = &MFP.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + BlockFrequencyInfo *BFI = nullptr; + if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) + BFI = &MFP.getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); + + FunctionVarLocs const *FnVarLocs = nullptr; + if (isAssignmentTrackingEnabled(*Fn.getParent())) + FnVarLocs = MFP.getAnalysis<AssignmentTrackingAnalysis>().getResults(); UniformityInfo *UA = nullptr; - if (auto *UAPass = getAnalysisIfAvailable<UniformityInfoWrapperPass>()) + if (auto *UAPass = MFP.getAnalysisIfAvailable<UniformityInfoWrapperPass>()) UA = &UAPass->getUniformityInfo(); - CurDAG->init(*MF, *ORE, this, LibInfo, UA, PSI, BFI, FnVarLocs); - FuncInfo->set(Fn, *MF, CurDAG); - SwiftError->setFunction(*MF); + CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, FnVarLocs); // Now get the optional analyzes if we want to. // This is based on the possibly changed OptLevel (after optnone is taken @@ -473,15 +570,33 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // ask for passes that have been required anyway. if (UseMBPI && OptLevel != CodeGenOptLevel::None) - FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + FuncInfo->BPI = + &MFP.getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); else FuncInfo->BPI = nullptr; if (OptLevel != CodeGenOptLevel::None) - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + AA = &MFP.getAnalysis<AAResultsWrapperPass>().getAAResults(); else AA = nullptr; + SP = &MFP.getAnalysis<StackProtector>().getLayoutInfo(); + +#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS + TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); +#endif +} + +bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { + SwiftError->setFunction(mf); + const Function &Fn = mf.getFunction(); + + bool InstrRef = mf.shouldUseDebugInstrRef(); + + FuncInfo->set(MF->getFunction(), *MF, CurDAG); + + ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << '\n'); + SDB->init(GFI, AA, AC, LibInfo); MF->setHasInlineAsm(false); @@ -632,16 +747,16 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // that COPY instructions also need DBG_VALUE, if it is the only // user of LDI->second. MachineInstr *CopyUseMI = nullptr; - for (MachineRegisterInfo::use_instr_iterator - UI = RegInfo->use_instr_begin(LDI->second), - E = RegInfo->use_instr_end(); UI != E; ) { - MachineInstr *UseMI = &*(UI++); - if (UseMI->isDebugValue()) continue; - if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { - CopyUseMI = UseMI; continue; + for (MachineInstr &UseMI : RegInfo->use_instructions(LDI->second)) { + if (UseMI.isDebugValue()) + continue; + if (UseMI.isCopy() && !CopyUseMI && UseMI.getParent() == EntryMBB) { + CopyUseMI = &UseMI; + continue; } // Otherwise this is another use or second copy use. - CopyUseMI = nullptr; break; + CopyUseMI = nullptr; + break; } if (CopyUseMI && TRI.getRegSizeInBits(LDI->second, MRI) == @@ -680,9 +795,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } } - // Determine if there is a call to setjmp in the machine function. - MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); - // Determine if floating point is used for msvc computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI()); @@ -779,11 +891,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { StringRef GroupName = "sdag"; StringRef GroupDescription = "Instruction Selection and Scheduling"; std::string BlockName; - bool MatchFilterBB = false; (void)MatchFilterBB; -#ifndef NDEBUG - TargetTransformInfo &TTI = - getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn); -#endif + bool MatchFilterBB = false; + (void)MatchFilterBB; // Pre-type legalization allow creation of any node types. CurDAG->NewNodesMustHaveLegalTypes = false; @@ -807,8 +916,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (TTI->hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif @@ -827,8 +936,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (TTI->hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif @@ -849,8 +958,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (TTI->hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif @@ -873,8 +982,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (TTI->hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif } @@ -891,8 +1000,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (TTI->hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif @@ -907,8 +1016,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (TTI->hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif @@ -927,8 +1036,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (TTI->hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif } @@ -947,8 +1056,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (TTI->hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif @@ -967,8 +1076,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + if (TTI->hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif @@ -1059,6 +1168,8 @@ public: SDNode *CurNode = &*ISelPosition; if (MDNode *MD = DAG.getPCSections(CurNode)) DAG.addPCSections(N, MD); + if (MDNode *MMRA = DAG.getMMRAMetadata(CurNode)) + DAG.addMMRAMetadata(N, MMRA); } }; @@ -1336,13 +1447,12 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) { llvm::WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo(); if (!EHInfo) return; - for (auto MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = &*MBBI; - const BasicBlock *BB = MBB->getBasicBlock(); + for (MachineBasicBlock &MBB : *MF) { + const BasicBlock *BB = MBB.getBasicBlock(); int State = EHInfo->BlockToStateMap[BB]; if (BB->getFirstMayFaultInst()) { // Report IP range only for blocks with Faulty inst - auto MBBb = MBB->getFirstNonPHI(); + auto MBBb = MBB.getFirstNonPHI(); MachineInstr *MIb = &*MBBb; if (MIb->isTerminator()) continue; @@ -1351,16 +1461,16 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) { MCSymbol *BeginLabel = MMI.getContext().createTempSymbol(); MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); EHInfo->addIPToStateRange(State, BeginLabel, EndLabel); - BuildMI(*MBB, MBBb, SDB->getCurDebugLoc(), + BuildMI(MBB, MBBb, SDB->getCurDebugLoc(), TII->get(TargetOpcode::EH_LABEL)) .addSym(BeginLabel); - auto MBBe = MBB->instr_end(); + auto MBBe = MBB.instr_end(); MachineInstr *MIe = &*(--MBBe); // insert before (possible multiple) terminators while (MIe->isTerminator()) MIe = &*(--MBBe); ++MBBe; - BuildMI(*MBB, MBBe, SDB->getCurDebugLoc(), + BuildMI(MBB, MBBe, SDB->getCurDebugLoc(), TII->get(TargetOpcode::EH_LABEL)) .addSym(EndLabel); } @@ -1461,13 +1571,12 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(), DI->getVariable(), DI->getDebugLoc())) FuncInfo.PreprocessedDbgDeclares.insert(DI); - - for (const DPValue &DPV : I.getDbgValueRange()) { - if (DPV.getType() == DPValue::LocationType::Declare && - processDbgDeclare(FuncInfo, DPV.getVariableLocationOp(0), - DPV.getExpression(), DPV.getVariable(), - DPV.getDebugLoc())) - FuncInfo.PreprocessedDPVDeclares.insert(&DPV); + for (const DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { + if (DVR.Type == DbgVariableRecord::LocationType::Declare && + processDbgDeclare(FuncInfo, DVR.getVariableLocationOp(0), + DVR.getExpression(), DVR.getVariable(), + DVR.getDebugLoc())) + FuncInfo.PreprocessedDVRDeclares.insert(&DVR); } } } @@ -1555,7 +1664,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } // Iterate over all basic blocks in the function. - StackProtector &SP = getAnalysis<StackProtector>(); for (const BasicBlock *LLVMBB : RPOT) { if (OptLevel != CodeGenOptLevel::None) { bool AllPredsVisited = true; @@ -1670,7 +1778,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { raw_string_ostream InstStr(InstStrStorage); InstStr << *Inst; - R << ": " << InstStr.str(); + R << ": " << InstStrStorage; } reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 2); @@ -1719,7 +1827,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { std::string InstStrStorage; raw_string_ostream InstStr(InstStrStorage); InstStr << *Inst; - R << ": " << InstStr.str(); + R << ": " << InstStrStorage; } reportFastISelFailure(*MF, *ORE, R, ShouldAbort); @@ -1731,7 +1839,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); } - if (SP.shouldEmitSDCheck(*LLVMBB)) { + if (SP->shouldEmitSDCheck(*LLVMBB)) { bool FunctionBasedInstrumentation = TLI->getSSPStackGuardCheck(*Fn.getParent()); SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB], @@ -1768,7 +1876,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (Fn.getParent()->getModuleFlag("eh-asynch")) reportIPToStateForBlocks(MF); - SP.copyToMachineFrameInfo(MF->getFrameInfo()); + SP->copyToMachineFrameInfo(MF->getFrameInfo()); SwiftError->propagateVRegs(); @@ -2008,8 +2116,8 @@ SelectionDAGISel::FinishBasicBlock() { // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. - for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - FuncInfo->MBB = Succs[i]; + for (MachineBasicBlock *Succ : Succs) { + FuncInfo->MBB = Succ; FuncInfo->InsertPt = FuncInfo->MBB->end(); // FuncInfo->MBB may have been removed from the CFG if a branch was // constant folded. @@ -2112,24 +2220,27 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, /// by tblgen. Others should not call it. void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, const SDLoc &DL) { - std::vector<SDValue> InOps; - std::swap(InOps, Ops); + // Change the vector of SDValue into a list of SDNodeHandle for x86 might call + // replaceAllUses when matching address. + + std::list<HandleSDNode> Handles; - Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 - Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 - Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc - Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack) + Handles.emplace_back(Ops[InlineAsm::Op_InputChain]); // 0 + Handles.emplace_back(Ops[InlineAsm::Op_AsmString]); // 1 + Handles.emplace_back(Ops[InlineAsm::Op_MDNode]); // 2, !srcloc + Handles.emplace_back( + Ops[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack) - unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); - if (InOps[e-1].getValueType() == MVT::Glue) + unsigned i = InlineAsm::Op_FirstOperand, e = Ops.size(); + if (Ops[e - 1].getValueType() == MVT::Glue) --e; // Don't process a glue operand if it is here. while (i != e) { - InlineAsm::Flag Flags(InOps[i]->getAsZExtVal()); + InlineAsm::Flag Flags(Ops[i]->getAsZExtVal()); if (!Flags.isMemKind() && !Flags.isFuncKind()) { // Just skip over this operand, copying the operands verbatim. - Ops.insert(Ops.end(), InOps.begin() + i, - InOps.begin() + i + Flags.getNumOperandRegisters() + 1); + Handles.insert(Handles.end(), Ops.begin() + i, + Ops.begin() + i + Flags.getNumOperandRegisters() + 1); i += Flags.getNumOperandRegisters() + 1; } else { assert(Flags.getNumOperandRegisters() == 1 && @@ -2139,10 +2250,10 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, if (Flags.isUseOperandTiedToDef(TiedToOperand)) { // We need the constraint ID from the operand this is tied to. unsigned CurOp = InlineAsm::Op_FirstOperand; - Flags = InlineAsm::Flag(InOps[CurOp]->getAsZExtVal()); + Flags = InlineAsm::Flag(Ops[CurOp]->getAsZExtVal()); for (; TiedToOperand; --TiedToOperand) { CurOp += Flags.getNumOperandRegisters() + 1; - Flags = InlineAsm::Flag(InOps[CurOp]->getAsZExtVal()); + Flags = InlineAsm::Flag(Ops[CurOp]->getAsZExtVal()); } } @@ -2150,7 +2261,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, std::vector<SDValue> SelOps; const InlineAsm::ConstraintCode ConstraintID = Flags.getMemoryConstraintID(); - if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps)) + if (SelectInlineAsmMemoryOperand(Ops[i + 1], ConstraintID, SelOps)) report_fatal_error("Could not match memory address. Inline asm" " failure!"); @@ -2159,15 +2270,19 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, : InlineAsm::Kind::Func, SelOps.size()); Flags.setMemConstraint(ConstraintID); - Ops.push_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32)); - llvm::append_range(Ops, SelOps); + Handles.emplace_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32)); + Handles.insert(Handles.end(), SelOps.begin(), SelOps.end()); i += 2; } } // Add the glue input back if present. - if (e != InOps.size()) - Ops.push_back(InOps.back()); + if (e != Ops.size()) + Handles.emplace_back(Ops.back()); + + Ops.clear(); + for (auto &handle : Handles) + Ops.push_back(handle.getValue()); } /// findGlueUse - Return use of MVT::Glue value produced by the specified @@ -2371,6 +2486,21 @@ void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::Select_CONVERGENCECTRL_ANCHOR(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ANCHOR, + N->getValueType(0)); +} + +void SelectionDAGISel::Select_CONVERGENCECTRL_ENTRY(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ENTRY, + N->getValueType(0)); +} + +void SelectionDAGISel::Select_CONVERGENCECTRL_LOOP(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_LOOP, + N->getValueType(0), N->getOperand(0)); +} + void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, SDValue OpVal, SDLoc DL) { SDNode *OpNode = OpVal.getNode(); @@ -3118,6 +3248,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::JUMP_TABLE_DEBUG_INFO: Select_JUMP_TABLE_DEBUG_INFO(NodeToMatch); return; + case ISD::CONVERGENCECTRL_ANCHOR: + Select_CONVERGENCECTRL_ANCHOR(NodeToMatch); + return; + case ISD::CONVERGENCECTRL_ENTRY: + Select_CONVERGENCECTRL_ENTRY(NodeToMatch); + return; + case ISD::CONVERGENCECTRL_LOOP: + Select_CONVERGENCECTRL_LOOP(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); @@ -4238,5 +4377,5 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { else Msg << "unknown intrinsic #" << iid; } - report_fatal_error(Twine(Msg.str())); + report_fatal_error(Twine(msg)); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index b66eeb6d2bb1..ac28f6289478 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -299,7 +299,7 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { } else { O << "CROSS RC COPY"; } - return O.str(); + return s; } void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index cf32350036d4..4268da8670d5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -26,13 +26,13 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GCStrategy.h" @@ -340,6 +340,9 @@ static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo( // to grab the return value from the return register(s), or it can be a LOAD // to load a value returned by reference via a stack slot. + if (CallEnd->getOpcode() == ISD::EH_LABEL) + CallEnd = CallEnd->getOperand(0).getNode(); + bool HasDef = !SI.CLI.RetTy->isVoidTy(); if (HasDef) { if (CallEnd->getOpcode() == ISD::LOAD) @@ -1287,7 +1290,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) { // Lowering relocate(undef) as arbitrary constant. Current constant value // is chosen such that it's unlikely to be a valid pointer. - setValue(&Relocate, DAG.getTargetConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64)); + setValue(&Relocate, DAG.getConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64)); return; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b8ed02e268b1..140c97ccd90b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -62,9 +62,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, // the return. Ignore following attributes because they don't affect the // call sequence. AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs()); - for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, - Attribute::DereferenceableOrNull, Attribute::NoAlias, - Attribute::NonNull, Attribute::NoUndef}) + for (const auto &Attr : + {Attribute::Alignment, Attribute::Dereferenceable, + Attribute::DereferenceableOrNull, Attribute::NoAlias, + Attribute::NonNull, Attribute::NoUndef, Attribute::Range}) CallerAttrs.removeAttribute(Attr); if (CallerAttrs.hasAttributes()) @@ -208,7 +209,7 @@ bool TargetLowering::findOptimalMemOpLowering( // Use the largest integer type whose alignment constraints are satisfied. // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). - VT = MVT::i64; + VT = MVT::LAST_INTEGER_VALUETYPE; if (Op.isFixedDstAlign()) while (Op.getDstAlign() < (VT.getSizeInBits() / 8) && !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign())) @@ -216,7 +217,7 @@ bool TargetLowering::findOptimalMemOpLowering( assert(VT.isInteger()); // Find the largest legal integer type. - MVT LVT = MVT::i64; + MVT LVT = MVT::LAST_INTEGER_VALUETYPE; while (!isTypeLegal(LVT)) LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); assert(LVT.isInteger()); @@ -491,7 +492,7 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // If the address is not even local to this DSO we will have to load it from // a got and then add the offset. - if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + if (!TM.shouldAssumeDSOLocal(GV)) return false; // If the code is position independent we will have to add a base register. @@ -544,7 +545,8 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, if (!C.isSubsetOf(DemandedBits)) { EVT VT = Op.getValueType(); SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT); - SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); + SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC, + Op->getFlags()); return TLO.CombineTo(Op, NewOp); } @@ -585,6 +587,10 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, if (VT.isVector()) return false; + assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth && + Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth && + "ShrinkDemandedOp only supports operands that have the same size!"); + // Don't do this if the node has another user, which may require the // full value. if (!Op.getNode()->hasOneUse()) @@ -742,6 +748,13 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } + case ISD::FREEZE: { + SDValue N0 = Op.getOperand(0); + if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts, + /*PoisonOnly=*/false)) + return N0; + break; + } case ISD::AND: { LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -783,10 +796,10 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( case ISD::SHL: { // If we are only demanding sign bits then we can use the shift source // directly. - if (const APInt *MaxSA = - DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) { + if (std::optional<uint64_t> MaxSA = + DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { SDValue Op0 = Op.getOperand(0); - unsigned ShAmt = MaxSA->getZExtValue(); + unsigned ShAmt = *MaxSA; unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero(); @@ -938,11 +951,11 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1). // or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1). -static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, +static SDValue combineShiftToAVG(SDValue Op, + TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, - const APInt &DemandedElts, - unsigned Depth) { + const APInt &DemandedElts, unsigned Depth) { assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) && "SRL or SRA node is required here!"); // Is the right shift using an immediate value of 1? @@ -993,6 +1006,7 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, // If the shift is unsigned (srl): // - Needs >= 1 zero bit for both operands. // - Needs 1 demanded bit zero and >= 2 sign bits. + SelectionDAG &DAG = TLO.DAG; unsigned ShiftOpc = Op.getOpcode(); bool IsSigned = false; unsigned KnownBits; @@ -1046,12 +1060,14 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, unsigned MinWidth = std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8); EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth)); + if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits()) + return SDValue(); if (VT.isVector()) NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount()); - if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) { + if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) { // If we could not transform, and (both) adds are nuw/nsw, we can use the // larger type size to do the transform. - if (!TLI.isOperationLegalOrCustom(AVGOpc, VT)) + if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT)) return SDValue(); if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0), Add.getOperand(1)) && @@ -1062,6 +1078,12 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, return SDValue(); } + // Don't create a AVGFLOOR node with a scalar constant unless its legal as + // this is likely to stop other folds (reassociation, value tracking etc.) + if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) && + (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB))) + return SDValue(); + SDLoc DL(Op); SDValue ResultAVG = DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT), @@ -1096,7 +1118,6 @@ bool TargetLowering::SimplifyDemandedBits( APInt DemandedBits = OriginalDemandedBits; APInt DemandedElts = OriginalDemandedElts; SDLoc dl(Op); - auto &DL = TLO.DAG.getDataLayout(); // Undef operand. if (Op.isUndef()) @@ -1372,7 +1393,7 @@ bool TargetLowering::SimplifyDemandedBits( // using the bits from the RHS. Below, we use knowledge about the RHS to // simplify the LHS, here we're using information from the LHS to simplify // the RHS. - if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) { + if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) { // Do not increment Depth here; that can cause an infinite loop. KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth); // If the LHS already has zeros where RHSC does, this 'and' is dead. @@ -1424,11 +1445,9 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO, Depth + 1)) return true; - assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. @@ -1476,7 +1495,7 @@ bool TargetLowering::SimplifyDemandedBits( } return true; } - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO, Depth + 1)) { if (Flags.hasDisjoint()) { @@ -1485,7 +1504,6 @@ bool TargetLowering::SimplifyDemandedBits( } return true; } - assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. @@ -1551,11 +1569,9 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1)) return true; - assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. @@ -1651,8 +1667,6 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts, Known2, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) @@ -1668,8 +1682,6 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts, Known2, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Only known if known in both the LHS and RHS. Known = Known.intersectWith(Known2); @@ -1681,8 +1693,6 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts, Known2, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) @@ -1724,9 +1734,9 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op1 = Op.getOperand(1); EVT ShiftVT = Op1.getValueType(); - if (const APInt *SA = - TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { - unsigned ShAmt = SA->getZExtValue(); + if (std::optional<uint64_t> KnownSA = + TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) { + unsigned ShAmt = *KnownSA; if (ShAmt == 0) return TLO.CombineTo(Op, Op0); @@ -1736,9 +1746,9 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { - if (const APInt *SA2 = - TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { - unsigned C1 = SA2->getZExtValue(); + if (std::optional<uint64_t> InnerSA = + TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { + unsigned C1 = *InnerSA; unsigned Opc = ISD::SHL; int Diff = ShAmt - C1; if (Diff < 0) { @@ -1776,9 +1786,9 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() && InnerOp.hasOneUse()) { - if (const APInt *SA2 = - TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) { - unsigned InnerShAmt = SA2->getZExtValue(); + if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount( + InnerOp, DemandedElts, Depth + 2)) { + unsigned InnerShAmt = *SA2; if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && DemandedBits.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && @@ -1807,7 +1817,6 @@ bool TargetLowering::SimplifyDemandedBits( } return true; } - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero <<= ShAmt; Known.One <<= ShAmt; // low bits known zero. @@ -1823,11 +1832,33 @@ bool TargetLowering::SimplifyDemandedBits( } } + // TODO: Can we merge this fold with the one below? // Try shrinking the operation as long as the shift amount will still be // in range. - if ((ShAmt < DemandedBits.getActiveBits()) && - ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) - return true; + if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() && + Op.getNode()->hasOneUse()) { + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + unsigned DemandedSize = DemandedBits.getActiveBits(); + for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize); + SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits); + if (isNarrowingProfitable(VT, SmallVT) && + isTypeDesirableForOp(ISD::SHL, SmallVT) && + isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) && + (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) { + assert(DemandedSize <= SmallVTBits && + "Narrowed below demanded bits?"); + // We found a type with free casts. + SDValue NarrowShl = TLO.DAG.getNode( + ISD::SHL, dl, SmallVT, + TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), + TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl)); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); + } + } + } // Narrow shift to lower half - similar to ShrinkDemandedOp. // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K)) @@ -1849,8 +1880,8 @@ bool TargetLowering::SimplifyDemandedBits( Flags.setNoSignedWrap(IsNSW); Flags.setNoUnsignedWrap(IsNUW); SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0); - SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant( - ShAmt, HalfVT, dl, TLO.LegalTypes()); + SDValue NewShiftAmt = + TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl); SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp, NewShiftAmt, Flags); SDValue NewExt = @@ -1883,9 +1914,9 @@ bool TargetLowering::SimplifyDemandedBits( // If we are only demanding sign bits then we can use the shift source // directly. - if (const APInt *MaxSA = - TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) { - unsigned ShAmt = MaxSA->getZExtValue(); + if (std::optional<uint64_t> MaxSA = + TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { + unsigned ShAmt = *MaxSA; unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero(); @@ -1899,14 +1930,9 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op1 = Op.getOperand(1); EVT ShiftVT = Op1.getValueType(); - // Try to match AVG patterns. - if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits, - DemandedElts, Depth + 1)) - return TLO.CombineTo(Op, AVG); - - if (const APInt *SA = - TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { - unsigned ShAmt = SA->getZExtValue(); + if (std::optional<uint64_t> KnownSA = + TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) { + unsigned ShAmt = *KnownSA; if (ShAmt == 0) return TLO.CombineTo(Op, Op0); @@ -1916,9 +1942,9 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SHL) { if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) { - if (const APInt *SA2 = - TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { - unsigned C1 = SA2->getZExtValue(); + if (std::optional<uint64_t> InnerSA = + TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { + unsigned C1 = *InnerSA; unsigned Opc = ISD::SRL; int Diff = ShAmt - C1; if (Diff < 0) { @@ -1951,8 +1977,8 @@ bool TargetLowering::SimplifyDemandedBits( ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) || TLO.DAG.MaskedValueIsZero(Op0, HiBits))) { SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0); - SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant( - ShAmt, HalfVT, dl, TLO.LegalTypes()); + SDValue NewShiftAmt = + TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl); SDValue NewShift = TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt); return TLO.CombineTo( @@ -1964,7 +1990,6 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); // High bits known zero. @@ -1984,6 +2009,12 @@ bool TargetLowering::SimplifyDemandedBits( // shift amounts. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); } + + // Try to match AVG patterns (after shift simplification). + if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits, + DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, AVG); + break; } case ISD::SRA: { @@ -2005,22 +2036,17 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isOne()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); - // Try to match AVG patterns. - if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits, - DemandedElts, Depth + 1)) - return TLO.CombineTo(Op, AVG); - - if (const APInt *SA = - TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { - unsigned ShAmt = SA->getZExtValue(); + if (std::optional<uint64_t> KnownSA = + TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) { + unsigned ShAmt = *KnownSA; if (ShAmt == 0) return TLO.CombineTo(Op, Op0); // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target // supports sext_inreg. if (Op0.getOpcode() == ISD::SHL) { - if (const APInt *InnerSA = - TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { + if (std::optional<uint64_t> InnerSA = + TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { unsigned LowBits = BitWidth - ShAmt; EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits); if (VT.isVector()) @@ -2060,7 +2086,6 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); @@ -2095,6 +2120,12 @@ bool TargetLowering::SimplifyDemandedBits( } } } + + // Try to match AVG patterns (after shift simplification). + if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits, + DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, AVG); + break; } case ISD::FSHL: @@ -2288,9 +2319,8 @@ bool TargetLowering::SimplifyDemandedBits( // the right place. unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL; if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) { - EVT ShiftAmtTy = getShiftAmountTy(VT, DL); unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ; - SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy); + SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl); SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt); return TLO.CombineTo(Op, NewOp); } @@ -2330,8 +2360,8 @@ bool TargetLowering::SimplifyDemandedBits( if (!AlreadySignExtended) { // Compute the correct shift amount type, which must be getShiftAmountTy // for scalar types after legalization. - SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl, - getShiftAmountTy(VT, DL)); + SDValue ShiftAmt = + TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt)); } @@ -2350,7 +2380,6 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. @@ -2423,7 +2452,6 @@ bool TargetLowering::SimplifyDemandedBits( } return true; } - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); Known = Known.zext(BitWidth); @@ -2473,7 +2501,6 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); // If the sign bit is known one, the top bits match. @@ -2519,7 +2546,6 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); Known = Known.anyext(BitWidth); @@ -2560,22 +2586,31 @@ bool TargetLowering::SimplifyDemandedBits( break; if (Src.getNode()->hasOneUse()) { - const APInt *ShAmtC = - TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts); - if (!ShAmtC || ShAmtC->uge(BitWidth)) + if (isTruncateFree(Src, VT) && + !isTruncateFree(Src.getValueType(), VT)) { + // If truncate is only free at trunc(srl), do not turn it into + // srl(trunc). The check is done by first check the truncate is free + // at Src's opcode(srl), then check the truncate is not done by + // referencing sub-register. In test, if both trunc(srl) and + // srl(trunc)'s trunc are free, srl(trunc) performs better. If only + // trunc(srl)'s trunc is free, trunc(srl) is better. break; - uint64_t ShVal = ShAmtC->getZExtValue(); + } + + std::optional<uint64_t> ShAmtC = + TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2); + if (!ShAmtC || *ShAmtC >= BitWidth) + break; + uint64_t ShVal = *ShAmtC; APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); HighBits.lshrInPlace(ShVal); HighBits = HighBits.trunc(BitWidth); - if (!(HighBits & DemandedBits)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. - SDValue NewShAmt = TLO.DAG.getConstant( - ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes())); + SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl); SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); return TLO.CombineTo( @@ -2585,7 +2620,6 @@ bool TargetLowering::SimplifyDemandedBits( break; } - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case ISD::AssertZext: { @@ -2596,7 +2630,6 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known, TLO, Depth + 1)) return true; - assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero |= ~InMask; Known.One &= (~Known.Zero); @@ -2753,8 +2786,7 @@ bool TargetLowering::SimplifyDemandedBits( unsigned CTZ = DemandedBits.countr_zero(); ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts); if (C && C->getAPIntValue().countr_zero() == CTZ) { - EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); - SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy); + SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl); SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC); return TLO.CombineTo(Op, Shl); } @@ -2777,10 +2809,16 @@ bool TargetLowering::SimplifyDemandedBits( unsigned DemandedBitsLZ = DemandedBits.countl_zero(); APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); KnownBits KnownOp0, KnownOp1; - if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO, - Depth + 1) || - SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO, + auto GetDemandedBitsLHSMask = [&](APInt Demanded, + const KnownBits &KnownRHS) { + if (Op.getOpcode() == ISD::MUL) + Demanded.clearHighBits(KnownRHS.countMinTrailingZeros()); + return Demanded; + }; + if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO, Depth + 1) || + SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1), + DemandedElts, KnownOp0, TLO, Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { @@ -2852,9 +2890,9 @@ bool TargetLowering::SimplifyDemandedBits( return 0; }; - auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) { - EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); - SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy); + auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, + unsigned ShlAmt) { + SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl); SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC); SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl); return TLO.CombineTo(Op, Res); @@ -2879,9 +2917,9 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getOpcode() == ISD::MUL) { Known = KnownBits::mul(KnownOp0, KnownOp1); } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB. - Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD, - Flags.hasNoSignedWrap(), KnownOp0, - KnownOp1); + Known = KnownBits::computeForAddSub( + Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(), + Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1); } break; } @@ -2912,7 +2950,7 @@ bool TargetLowering::SimplifyDemandedBits( const SDNode *N = Op.getNode(); for (SDNode *Op : llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) { - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (auto *C = dyn_cast<ConstantSDNode>(Op)) if (C->isOpaque()) return false; } @@ -3187,6 +3225,20 @@ bool TargetLowering::SimplifyDemandedVectorElts( } break; } + case ISD::FREEZE: { + SDValue N0 = Op.getOperand(0); + if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts, + /*PoisonOnly=*/false)) + return TLO.CombineTo(Op, N0); + + // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE + // freeze(op(x, ...)) -> op(freeze(x), ...). + if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1) + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, + TLO.DAG.getFreeze(N0.getOperand(0)))); + break; + } case ISD::BUILD_VECTOR: { // Check all elements and simplify any unused elements with UNDEF. if (!DemandedElts.isAllOnes()) { @@ -3527,6 +3579,10 @@ bool TargetLowering::SimplifyDemandedVectorElts( } [[fallthrough]]; } + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: case ISD::OR: case ISD::XOR: case ISD::SUB: @@ -3789,7 +3845,15 @@ bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op" " is a target node!"); - return false; + + // If Op can't create undef/poison and none of its operands are undef/poison + // then Op is never undef/poison. + return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly, + /*ConsiderFlags*/ true, Depth) && + all_of(Op->ops(), [&](SDValue V) { + return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly, + Depth + 1); + }); } bool TargetLowering::canCreateUndefOrPoisonForTargetNode( @@ -4087,17 +4151,12 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( XVT, KeptBits)) return SDValue(); - const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits; - assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable"); - - // Unfold into: ((%x << C) a>> C) cond %x + // Unfold into: sext_inreg(%x) cond %x // Where 'cond' will be either 'eq' or 'ne'. - SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT); - SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt); - SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt); - SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond); - - return T2; + SDValue SExtInReg = DAG.getNode( + ISD::SIGN_EXTEND_INREG, DL, XVT, X, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits))); + return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond); } // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 @@ -4204,9 +4263,7 @@ SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, return SDValue(); // (X - Y) == Y --> X == Y << 1 - EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(), - !DCI.isBeforeLegalize()); - SDValue One = DAG.getConstant(1, DL, ShiftVT); + SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL); SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(YShl1.getNode()); @@ -4594,10 +4651,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(0).getNode()->hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) { - LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); + auto *Lod = cast<LoadSDNode>(N0.getOperand(0)); APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; - if (Lod->isSimple() && Lod->isUnindexed()) { + if (Lod->isSimple() && Lod->isUnindexed() && + (Lod->getMemoryVT().isByteSized() || + isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) { + unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits(); unsigned origWidth = N0.getValueSizeInBits(); unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to @@ -4605,40 +4665,51 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Lod->getExtensionType() != ISD::NON_EXTLOAD) origWidth = Lod->getMemoryVT().getSizeInBits(); const APInt &Mask = N0.getConstantOperandAPInt(1); - for (unsigned width = origWidth / 2; width>=8; width /= 2) { + // Only consider power-of-2 widths (and at least one byte) as candiates + // for the narrowed load. + for (unsigned width = 8; width < origWidth; width *= 2) { + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width); + if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) + continue; APInt newMask = APInt::getLowBitsSet(maskWidth, width); - for (unsigned offset=0; offset<origWidth/width; offset++) { + // Avoid accessing any padding here for now (we could use memWidth + // instead of origWidth here otherwise). + unsigned maxOffset = origWidth - width; + for (unsigned offset = 0; offset <= maxOffset; offset += 8) { if (Mask.isSubsetOf(newMask)) { - if (Layout.isLittleEndian()) - bestOffset = (uint64_t)offset * (width/8); - else - bestOffset = (origWidth/width - offset - 1) * (width/8); - bestMask = Mask.lshr(offset * (width/8) * 8); - bestWidth = width; - break; + unsigned ptrOffset = + Layout.isLittleEndian() ? offset : memWidth - width - offset; + unsigned IsFast = 0; + Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8); + if (allowsMemoryAccess( + *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(), + NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) && + IsFast) { + bestOffset = ptrOffset / 8; + bestMask = Mask.lshr(offset); + bestWidth = width; + break; + } } - newMask <<= width; + newMask <<= 8; } + if (bestWidth) + break; } } if (bestWidth) { EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); - if (newVT.isRound() && - shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { - SDValue Ptr = Lod->getBasePtr(); - if (bestOffset != 0) - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(bestOffset), - dl); - SDValue NewLoad = - DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, - Lod->getPointerInfo().getWithOffset(bestOffset), - Lod->getOriginalAlign()); - return DAG.getSetCC(dl, VT, - DAG.getNode(ISD::AND, dl, newVT, NewLoad, - DAG.getConstant(bestMask.trunc(bestWidth), - dl, newVT)), - DAG.getConstant(0LL, dl, newVT), Cond); - } + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset)); + SDValue NewLoad = + DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), + Lod->getOriginalAlign()); + SDValue And = + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT)); + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond); } } @@ -4722,21 +4793,25 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond); } else if ((N1C->isZero() || N1C->isOne()) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC - if (N0.getOpcode() == ISD::SETCC && + // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are + // excluded as they are handled below whilst checking for foldBooleans. + if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) && isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) && (N0.getValueType() == MVT::i1 || - getBooleanContents(N0.getOperand(0).getValueType()) == - ZeroOrOneBooleanContent)) { + getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) && + DAG.MaskedValueIsZero( + N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) { bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); if (TrueWhenTrue) return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. - ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType()); - if (DCI.isBeforeLegalizeOps() || - isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + if (N0.getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType()); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + } } if ((N0.getOpcode() == ISD::XOR || @@ -5038,16 +5113,15 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = - getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. unsigned ShCt = AndRHS->getAPIntValue().logBase2(); if (AndRHS->getAPIntValue().isPowerOf2() && !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { - return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, ShValTy, N0, - DAG.getConstant(ShCt, dl, ShiftTy))); + return DAG.getNode( + ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getShiftAmountConstant(ShCt, ShValTy, dl))); } } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 @@ -5055,9 +5129,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, unsigned ShCt = C1.logBase2(); if (C1.isPowerOf2() && !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { - return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, ShValTy, N0, - DAG.getConstant(ShCt, dl, ShiftTy))); + return DAG.getNode( + ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getShiftAmountConstant(ShCt, ShValTy, dl))); } } } @@ -5065,7 +5140,6 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (C1.getSignificantBits() <= 64 && !isLegalICmpImmediate(C1.getSExtValue())) { - EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); // (X & -256) == 256 -> (X >> 8) == 1 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && N0.getOpcode() == ISD::AND && N0.hasOneUse()) { @@ -5074,9 +5148,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countr_zero(); if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { - SDValue Shift = - DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0), - DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue Shift = DAG.getNode( + ISD::SRL, dl, ShValTy, N0.getOperand(0), + DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl)); SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); } @@ -5103,8 +5177,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (ShiftBits && NewC.getSignificantBits() <= 64 && isLegalICmpImmediate(NewC.getSExtValue()) && !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { - SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0, - DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue Shift = + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl)); SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); } @@ -5547,7 +5622,7 @@ std::pair<unsigned, const TargetRegisterClass *> TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, StringRef Constraint, MVT VT) const { - if (Constraint.empty() || Constraint[0] != '{') + if (!Constraint.starts_with("{")) return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr)); assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?"); @@ -5642,7 +5717,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, // The return value of the call is this value. As such, there is no // corresponding argument. assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); - if (StructType *STy = dyn_cast<StructType>(Call.getType())) { + if (auto *STy = dyn_cast<StructType>(Call.getType())) { OpInfo.ConstraintVT = getSimpleValueType(DL, STy->getElementType(ResNo)); } else { @@ -6017,6 +6092,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, /// Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. +/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDNode *> &Created) { @@ -6039,11 +6115,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, Divisor.ashrInPlace(Shift); UseSRA = true; } - // Calculate the multiplicative inverse, using Newton's method. - APInt t; - APInt Factor = Divisor; - while ((t = Divisor * Factor) != 1) - Factor *= APInt(Divisor.getBitWidth(), 2) - t; + APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); Factors.push_back(DAG.getConstant(Factor, dl, SVT)); return true; @@ -6070,10 +6142,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, } SDValue Res = Op0; - - // Shift the value upfront if it is even, so the LSB is one. if (UseSRA) { - // TODO: For UDIV use SRL instead of SRA. SDNodeFlags Flags; Flags.setExact(true); Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags); @@ -6083,6 +6152,69 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, return DAG.getNode(ISD::MUL, dl, VT, Res, Factor); } +/// Given an exact UDIV by a constant, create a multiplication +/// with the multiplicative inverse of the constant. +/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242 +static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) { + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + + bool UseSRL = false; + SmallVector<SDValue, 16> Shifts, Factors; + + auto BuildUDIVPattern = [&](ConstantSDNode *C) { + if (C->isZero()) + return false; + APInt Divisor = C->getAPIntValue(); + unsigned Shift = Divisor.countr_zero(); + if (Shift) { + Divisor.lshrInPlace(Shift); + UseSRL = true; + } + // Calculate the multiplicative inverse modulo BW. + APInt Factor = Divisor.multiplicativeInverse(); + Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); + Factors.push_back(DAG.getConstant(Factor, dl, SVT)); + return true; + }; + + SDValue Op1 = N->getOperand(1); + + // Collect all magic values from the build vector. + if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern)) + return SDValue(); + + SDValue Shift, Factor; + if (Op1.getOpcode() == ISD::BUILD_VECTOR) { + Shift = DAG.getBuildVector(ShVT, dl, Shifts); + Factor = DAG.getBuildVector(VT, dl, Factors); + } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) { + assert(Shifts.size() == 1 && Factors.size() == 1 && + "Expected matchUnaryPredicate to return one element for scalable " + "vectors"); + Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]); + Factor = DAG.getSplatVector(VT, dl, Factors[0]); + } else { + assert(isa<ConstantSDNode>(Op1) && "Expected a constant"); + Shift = Shifts[0]; + Factor = Factors[0]; + } + + SDValue Res = N->getOperand(0); + if (UseSRL) { + SDNodeFlags Flags; + Flags.setExact(true); + Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, Flags); + Created.push_back(Res.getNode()); + } + + return DAG.getNode(ISD::MUL, dl, VT, Res, Factor); +} + SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl<SDNode *> &Created) const { @@ -6342,20 +6474,16 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return SDValue(); } + // If the udiv has an 'exact' bit we can use a simpler lowering. + if (N->getFlags().hasExact()) + return BuildExactUDIV(*this, N, dl, DAG, Created); + SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Try to use leading zeros of the dividend to reduce the multiplier and // avoid expensive fixups. - // TODO: Support vectors. - unsigned LeadingZeros = 0; - if (!VT.isVector() && isa<ConstantSDNode>(N1)) { - assert(!isOneConstant(N1) && "Unexpected divisor"); - LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros(); - // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in - // the dividend exceeds the leading zeros for the divisor. - LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero()); - } + unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros(); bool UseNPQ = false, UsePreShift = false, UsePostShift = false; SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; @@ -6374,7 +6502,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, MagicFactor = NPQFactor = DAG.getUNDEF(SVT); } else { UnsignedDivisionByConstantInfo magics = - UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros); + UnsignedDivisionByConstantInfo::get( + Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero())); MagicFactor = DAG.getConstant(magics.Magic, dl, SVT); @@ -6572,7 +6701,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, EVT VT = REMNode.getValueType(); EVT SVT = VT.getScalarType(); - EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize()); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); EVT ShSVT = ShVT.getScalarType(); // If MUL is unavailable, we cannot proceed in any case. @@ -6632,10 +6761,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + APInt P = D0.multiplicativeInverse(); assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // Q = floor((2^W - 1) u/ D) @@ -6804,6 +6930,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL, SmallVectorImpl<SDNode *> &Created) const { + // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17. // Fold: // (seteq/ne (srem N, D), 0) // To: @@ -6814,6 +6941,17 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k))) // - Q = floor((2 * A) / (2^K)) // where W is the width of the common type of N and D. + // + // When D is a power of two (and thus D0 is 1), the normal + // formula for A and Q don't apply, because the derivation + // depends on D not dividing 2^(W-1), and thus theorem ZRS + // does not apply. This specifically fails when N = INT_MIN. + // + // Instead, for power-of-two D, we use: + // - A = 2^(W-1) + // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1]) + // - Q = 2^(W-K) - 1 + // |-> Test that the top K bits are zero after rotation assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons."); @@ -6821,7 +6959,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, EVT VT = REMNode.getValueType(); EVT SVT = VT.getScalarType(); - EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize()); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); EVT ShSVT = ShVT.getScalarType(); // If we are after ops legalization, and MUL is unavailable, we can not @@ -6878,10 +7016,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + APInt P = D0.multiplicativeInverse(); assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // A = floor((2^(W - 1) - 1) / D0) & -2^K @@ -6902,6 +7037,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); + // If D was a power of two, apply the alternate constant derivation. + if (D0.isOne()) { + // A = 2^(W-1) + A = APInt::getSignedMinValue(W); + // - Q = 2^(W-K) - 1 + Q = APInt::getAllOnes(W - K).zext(W); + } + // If the divisor is 1 the result can be constant-folded. Likewise, we // don't care about INT_MIN lanes, those can be set to undef if appropriate. if (D.isOne()) { @@ -7599,7 +7742,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // // For division, we can compute the remainder using the algorithm described // above, subtract it from the dividend to get an exact multiple of Constant. -// Then multiply that extact multiply by the multiplicative inverse modulo +// Then multiply that exact multiply by the multiplicative inverse modulo // (1 << (BitWidth / 2)) to get the quotient. // If Constant is even, we can shift right the dividend and the divisor by the @@ -7734,10 +7877,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N, // Multiply by the multiplicative inverse of the divisor modulo // (1 << BitWidth). - APInt Mod = APInt::getSignedMinValue(BitWidth + 1); - APInt MulFactor = Divisor.zext(BitWidth + 1); - MulFactor = MulFactor.multiplicativeInverse(Mod); - MulFactor = MulFactor.trunc(BitWidth); + APInt MulFactor = Divisor.multiplicativeInverse(); SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend, DAG.getConstant(MulFactor, dl, VT)); @@ -7797,7 +7937,7 @@ static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) { InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL); ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask, VL); - ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask, + ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask, VL); } else { // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) @@ -7819,12 +7959,12 @@ static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) { SDValue One = DAG.getConstant(1, DL, ShVT); if (IsFSHL) { ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL); - SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL); - ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL); + SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL); + ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL); } else { SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL); ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL); - ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL); + ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL); } } return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL); @@ -8327,6 +8467,70 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, return SDValue(); } +SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, + SelectionDAG &DAG) const { + SDLoc DL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + unsigned Opc = N->getOpcode(); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + bool IsMax = Opc == ISD::FMAXIMUM; + SDNodeFlags Flags = N->getFlags(); + + // First, implement comparison not propagating NaN. If no native fmin or fmax + // available, use plain select with setcc instead. + SDValue MinMax; + unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; + unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM; + + // FIXME: We should probably define fminnum/fmaxnum variants with correct + // signed zero behavior. + bool MinMaxMustRespectOrderedZero = false; + + if (isOperationLegalOrCustom(CompOpcIeee, VT)) { + MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags); + MinMaxMustRespectOrderedZero = true; + } else if (isOperationLegalOrCustom(CompOpc, VT)) { + MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags); + } else { + if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return DAG.UnrollVectorOp(N); + + // NaN (if exists) will be propagated later, so orderness doesn't matter. + SDValue Compare = + DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT); + MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags); + } + + // Propagate any NaN of both operands + if (!N->getFlags().hasNoNaNs() && + (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) { + ConstantFP *FPNaN = ConstantFP::get( + *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT))); + MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO), + DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags); + } + + // fminimum/fmaximum requires -0.0 less than +0.0 + if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() && + !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) { + SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax, + DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ); + SDValue TestZero = + DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32); + SDValue LCmp = DAG.getSelect( + DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS, + MinMax, Flags); + SDValue RCmp = DAG.getSelect( + DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, + LCmp, Flags); + MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags); + } + + return MinMax; +} + /// Returns a true value if if this FPClassTest can be performed with an ordered /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns /// std::nullopt if it cannot be performed as a compare with 0. @@ -8683,11 +8887,21 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { } // v = (v * 0x01010101...) >> (Len - 8) - SDValue Mask01 = - DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); - return DAG.getNode(ISD::SRL, dl, VT, - DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), - DAG.getConstant(Len - 8, dl, ShVT)); + SDValue V; + if (isOperationLegalOrCustomOrPromote( + ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) { + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01); + } else { + V = Op; + for (unsigned Shift = 8; Shift < Len; Shift *= 2) { + SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl); + V = DAG.getNode(ISD::ADD, dl, VT, V, + DAG.getNode(ISD::SHL, dl, VT, V, ShiftC)); + } + } + return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT)); } SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { @@ -8717,7 +8931,7 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { // v = v - ((v >> 1) & 0x55555555...) Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT, - DAG.getNode(ISD::VP_LSHR, dl, VT, Op, + DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(1, dl, ShVT), Mask, VL), Mask55, Mask, VL); Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL); @@ -8725,13 +8939,13 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL); Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, - DAG.getNode(ISD::VP_LSHR, dl, VT, Op, + DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(2, dl, ShVT), Mask, VL), Mask33, Mask, VL); Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL); // v = (v + (v >> 4)) & 0x0F0F0F0F... - Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT), + Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT), Mask, VL), Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL); Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL); @@ -8740,11 +8954,23 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { return Op; // v = (v * 0x01010101...) >> (Len - 8) - SDValue Mask01 = - DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); - return DAG.getNode(ISD::VP_LSHR, dl, VT, - DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL), - DAG.getConstant(Len - 8, dl, ShVT), Mask, VL); + SDValue V; + if (isOperationLegalOrCustomOrPromote( + ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) { + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL); + } else { + V = Op; + for (unsigned Shift = 8; Shift < Len; Shift *= 2) { + SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl); + V = DAG.getNode(ISD::VP_ADD, dl, VT, V, + DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL), + Mask, VL); + } + } + return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT), + Mask, VL); } SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { @@ -8816,7 +9042,7 @@ SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const { for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) { SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::VP_OR, dl, VT, Op, - DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask, + DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask, VL); } Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask, @@ -8940,17 +9166,50 @@ SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL); } +SDValue TargetLowering::expandVPCTTZElements(SDNode *N, + SelectionDAG &DAG) const { + // %cond = to_bool_vec %source + // %splat = splat /*val=*/VL + // %tz = step_vector + // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat + // %r = vp.reduce.umin %v + SDLoc DL(N); + SDValue Source = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + EVT SrcVT = Source.getValueType(); + EVT ResVT = N->getValueType(0); + EVT ResVecVT = + EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount()); + + // Convert to boolean vector. + if (SrcVT.getScalarType() != MVT::i1) { + SDValue AllZero = DAG.getConstant(0, DL, SrcVT); + SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + SrcVT.getVectorElementCount()); + Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero, + DAG.getCondCode(ISD::SETNE), Mask, EVL); + } + + SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT); + SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL); + SDValue StepVec = DAG.getStepVector(DL, ResVecVT); + SDValue Select = + DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL); + return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL); +} + SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); EVT VT = N->getValueType(0); - EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Op = N->getOperand(0); // abs(x) -> smax(x,sub(0,x)) if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMAX, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); + Op = DAG.getFreeze(Op); return DAG.getNode(ISD::SMAX, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } @@ -8967,8 +9226,8 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, // 0 - abs(x) -> smin(x, sub(0,x)) if (IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMIN, VT)) { - Op = DAG.getFreeze(Op); SDValue Zero = DAG.getConstant(0, dl, VT); + Op = DAG.getFreeze(Op); return DAG.getNode(ISD::SMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } @@ -8982,9 +9241,9 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, return SDValue(); Op = DAG.getFreeze(Op); - SDValue Shift = - DAG.getNode(ISD::SRA, dl, VT, Op, - DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); + SDValue Shift = DAG.getNode( + ISD::SRA, dl, VT, Op, + DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl)); SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y) @@ -9018,15 +9277,87 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const { DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS), DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS)); - // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs)) - // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs)) EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT; SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC); + + // Branchless expansion iff cmp result is allbits: + // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs))) + // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs))) + if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) { + SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp); + return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor); + } + + // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs)) + // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs)) return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS), DAG.getNode(ISD::SUB, dl, VT, RHS, LHS)); } +SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + unsigned Opc = N->getOpcode(); + bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU; + bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS; + unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB; + unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR; + unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL; + unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS || + Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) && + "Unknown AVG node"); + + // If the operands are already extended, we can add+shift. + bool IsExt = + (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 && + DAG.ComputeNumSignBits(RHS) >= 2) || + (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 && + DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1); + if (IsExt) { + SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS); + if (!IsFloor) + Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT)); + return DAG.getNode(ShiftOpc, dl, VT, Sum, + DAG.getShiftAmountConstant(1, VT, dl)); + } + + // For scalars, see if we can efficiently extend/truncate to use add+shift. + if (VT.isScalarInteger()) { + unsigned BW = VT.getScalarSizeInBits(); + EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW); + if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) { + LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS); + RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS); + SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS); + if (!IsFloor) + Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg, + DAG.getConstant(1, dl, ExtVT)); + // Just use SRL as we will be truncating away the extended sign bits. + Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg, + DAG.getShiftAmountConstant(1, ExtVT, dl)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg); + } + } + + // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1)) + // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1)) + // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1)) + // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1)) + LHS = DAG.getFreeze(LHS); + RHS = DAG.getFreeze(RHS); + SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS); + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS); + SDValue Shift = + DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl)); + return DAG.getNode(SumOpc, dl, VT, Sign, Shift); +} + SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { SDLoc dl(N); EVT VT = N->getValueType(0); @@ -9103,7 +9434,7 @@ SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const { case MVT::i16: Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT), Mask, EVL); - Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT), Mask, EVL); return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL); case MVT::i32: @@ -9113,11 +9444,11 @@ SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const { Mask, EVL); Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT), Mask, EVL); - Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT), Mask, EVL); Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT), Mask, EVL); - Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT), + Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT), Mask, EVL); Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL); Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL); @@ -9137,19 +9468,19 @@ SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const { DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL); Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT), Mask, EVL); - Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT), Mask, EVL); Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4, DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL); - Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT), + Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT), Mask, EVL); Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3, DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL); - Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT), + Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT), Mask, EVL); Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL); - Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT), + Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT), Mask, EVL); Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL); Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL); @@ -9248,7 +9579,7 @@ SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const { Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op); // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4) - Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT), + Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT), Mask, EVL); Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT), Mask, EVL); @@ -9259,7 +9590,7 @@ SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const { Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL); // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2) - Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT), + Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT), Mask, EVL); Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT), Mask, EVL); @@ -9270,7 +9601,7 @@ SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const { Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL); // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1) - Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT), + Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT), Mask, EVL); Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT), Mask, EVL); @@ -9330,9 +9661,8 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, for (unsigned Idx = 0; Idx < NumElem; ++Idx) { unsigned ShiftIntoIdx = (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); - SDValue ShiftAmount = - DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(), - LoadVT, SL, /*LegalTypes=*/false); + SDValue ShiftAmount = DAG.getShiftAmountConstant( + ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL); SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount); SDValue Elt = DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask); @@ -9592,9 +9922,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { } // aggregate the two parts - SDValue ShiftAmount = - DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(), - DAG.getDataLayout())); + SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl); SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); @@ -9706,8 +10034,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, unsigned IncrementSize = NumBits / 8; // Divide the stored value in two parts. - SDValue ShiftAmount = DAG.getConstant( - NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout())); + SDValue ShiftAmount = + DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl); SDValue Lo = Val; // If Val is a constant, replace the upper bits with 0. The SRL will constant // fold and not use the upper bits. A smaller constant may be easier to @@ -10110,6 +10438,41 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { return DAG.getSelect(dl, VT, Overflow, Result, SumDiff); } +SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + EVT ResVT = Node->getValueType(0); + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDLoc dl(Node); + + auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT); + auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT); + SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate); + SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate); + + // We can't perform arithmetic on i1 values. Extending them would + // probably result in worse codegen, so let's just use two selects instead. + // Some targets are also just better off using selects rather than subtraction + // because one of the conditions can be merged with one of the selects. + // And finally, if we don't know the contents of high bits of a boolean value + // we can't perform any arithmetic either. + if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 || + getBooleanContents(BoolVT) == UndefinedBooleanContent) { + SDValue SelectZeroOrOne = + DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT), + DAG.getConstant(0, dl, ResVT)); + return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT), + SelectZeroOrOne); + } + + if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent) + std::swap(IsGT, IsLT); + return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl, + ResVT); +} + SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { unsigned Opcode = Node->getOpcode(); bool IsSigned = Opcode == ISD::SSHLSAT; @@ -10149,6 +10512,122 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { return DAG.getSelect(dl, VT, Cond, SatVal, Result); } +void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, + bool Signed, EVT WideVT, + const SDValue LL, const SDValue LH, + const SDValue RL, const SDValue RH, + SDValue &Lo, SDValue &Hi) const { + // We can fall back to a libcall with an illegal type for the MUL if we + // have a libcall big enough. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (WideVT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (WideVT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (WideVT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (WideVT == MVT::i128) + LC = RTLIB::MUL_I128; + + if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) { + // We'll expand the multiplication by brute force because we have no other + // options. This is a trivially-generalized version of the code from + // Hacker's Delight (itself derived from Knuth's Algorithm M from section + // 4.3.1). + EVT VT = LL.getValueType(); + unsigned Bits = VT.getSizeInBits(); + unsigned HalfBits = Bits >> 1; + SDValue Mask = + DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT); + SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask); + SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask); + + SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL); + SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask); + + SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl); + SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift); + SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift); + SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift); + + SDValue U = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH); + SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask); + SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift); + + SDValue V = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL); + SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift); + + SDValue W = + DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH), + DAG.getNode(ISD::ADD, dl, VT, UH, VH)); + Lo = DAG.getNode(ISD::ADD, dl, VT, TL, + DAG.getNode(ISD::SHL, dl, VT, V, Shift)); + + Hi = DAG.getNode(ISD::ADD, dl, VT, W, + DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, RH, LL), + DAG.getNode(ISD::MUL, dl, VT, RL, LH))); + } else { + // Attempt a libcall. + SDValue Ret; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(Signed); + CallOptions.setIsPostTypeLegalization(true); + if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) { + // Halves of WideVT are packed into registers in different order + // depending on platform endianness. This is usually handled by + // the C calling convention, but we can't defer to it in + // the legalizer. + SDValue Args[] = {LL, LH, RL, RH}; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; + } else { + SDValue Args[] = {LH, LL, RH, RL}; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; + } + assert(Ret.getOpcode() == ISD::MERGE_VALUES && + "Ret value is a collection of constituent nodes holding result."); + if (DAG.getDataLayout().isLittleEndian()) { + // Same as above. + Lo = Ret.getOperand(0); + Hi = Ret.getOperand(1); + } else { + Lo = Ret.getOperand(1); + Hi = Ret.getOperand(0); + } + } +} + +void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, + bool Signed, const SDValue LHS, + const SDValue RHS, SDValue &Lo, + SDValue &Hi) const { + EVT VT = LHS.getValueType(); + assert(RHS.getValueType() == VT && "Mismatching operand types"); + + SDValue HiLHS; + SDValue HiRHS; + if (Signed) { + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getFixedSizeInBits(); + HiLHS = DAG.getNode( + ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout()))); + HiRHS = DAG.getNode( + ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout()))); + } else { + HiLHS = DAG.getConstant(0, dl, VT); + HiRHS = DAG.getConstant(0, dl, VT); + } + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); + forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi); +} + SDValue TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { assert((Node->getOpcode() == ISD::SMULFIX || @@ -10213,6 +10692,7 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { SDValue Lo, Hi; unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU; + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2); if (isOperationLegalOrCustom(LoHiOp, VT)) { SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS); Lo = Result.getValue(0); @@ -10220,10 +10700,21 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { } else if (isOperationLegalOrCustom(HiOp, VT)) { Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS); + } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) { + // Try for a multiplication using a wider type. + unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS); + SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS); + SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt); + Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); + SDValue Shifted = + DAG.getNode(ISD::SRA, dl, WideVT, Res, + DAG.getShiftAmountConstant(VTSize, WideVT, dl)); + Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted); } else if (VT.isVector()) { return SDValue(); } else { - report_fatal_error("Unable to expand fixed point multiplication."); + forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi); } if (Scale == VTSize) @@ -10235,9 +10726,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { // The result will need to be shifted right by the scale since both operands // are scaled. The result is given to us in 2 halves, so we only want part of // both in the result. - EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo, - DAG.getConstant(Scale, dl, ShiftTy)); + DAG.getShiftAmountConstant(Scale, VT, dl)); if (!Saturating) return Result; @@ -10265,7 +10755,7 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { if (Scale == 0) { SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo, - DAG.getConstant(VTSize - 1, dl, ShiftTy)); + DAG.getShiftAmountConstant(VTSize - 1, VT, dl)); SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE); // Saturated to SatMin if wide product is negative, and SatMax if wide // product is positive ... @@ -10332,13 +10822,12 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, // RHS down by RHSShift, we can emit a regular division with a final scaling // factor of Scale. - EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout()); if (LHSShift) LHS = DAG.getNode(ISD::SHL, dl, VT, LHS, - DAG.getConstant(LHSShift, dl, ShiftTy)); + DAG.getShiftAmountConstant(LHSShift, VT, dl)); if (RHSShift) RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS, - DAG.getConstant(RHSShift, dl, ShiftTy)); + DAG.getShiftAmountConstant(RHSShift, VT, dl)); SDValue Quot; if (Signed) { @@ -10481,8 +10970,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, if (C.isPowerOf2()) { // smulo(x, signed_min) is same as umulo(x, signed_min). bool UseArithShift = isSigned && !C.isMinSignedValue(); - EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout()); - SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy); + SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl); Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt); Overflow = DAG.getSetCC(dl, SetCCVT, DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL, @@ -10514,84 +11002,21 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); - SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl, - getShiftAmountTy(WideVT, DAG.getDataLayout())); + SDValue ShiftAmt = + DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl); TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt)); } else { if (VT.isVector()) return false; - // We can fall back to a libcall with an illegal type for the MUL if we - // have a libcall big enough. - // Also, we can fall back to a division in some cases, but that's a big - // performance hit in the general case. - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (WideVT == MVT::i16) - LC = RTLIB::MUL_I16; - else if (WideVT == MVT::i32) - LC = RTLIB::MUL_I32; - else if (WideVT == MVT::i64) - LC = RTLIB::MUL_I64; - else if (WideVT == MVT::i128) - LC = RTLIB::MUL_I128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - - SDValue HiLHS; - SDValue HiRHS; - if (isSigned) { - // The high part is obtained by SRA'ing all but one of the bits of low - // part. - unsigned LoSize = VT.getFixedSizeInBits(); - HiLHS = - DAG.getNode(ISD::SRA, dl, VT, LHS, - DAG.getConstant(LoSize - 1, dl, - getPointerTy(DAG.getDataLayout()))); - HiRHS = - DAG.getNode(ISD::SRA, dl, VT, RHS, - DAG.getConstant(LoSize - 1, dl, - getPointerTy(DAG.getDataLayout()))); - } else { - HiLHS = DAG.getConstant(0, dl, VT); - HiRHS = DAG.getConstant(0, dl, VT); - } - - // Here we're passing the 2 arguments explicitly as 4 arguments that are - // pre-lowered to the correct types. This all depends upon WideVT not - // being a legal type for the architecture and thus has to be split to - // two arguments. - SDValue Ret; - TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(isSigned); - CallOptions.setIsPostTypeLegalization(true); - if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) { - // Halves of WideVT are packed into registers in different order - // depending on platform endianness. This is usually handled by - // the C calling convention, but we can't defer to it in - // the legalizer. - SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; - } else { - SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; - } - assert(Ret.getOpcode() == ISD::MERGE_VALUES && - "Ret value is a collection of constituent nodes holding result."); - if (DAG.getDataLayout().isLittleEndian()) { - // Same as above. - BottomHalf = Ret.getOperand(0); - TopHalf = Ret.getOperand(1); - } else { - BottomHalf = Ret.getOperand(1); - TopHalf = Ret.getOperand(0); - } + forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf); } Result = BottomHalf; if (isSigned) { - SDValue ShiftAmt = DAG.getConstant( - VT.getScalarSizeInBits() - 1, dl, - getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout())); + SDValue ShiftAmt = DAG.getShiftAmountConstant( + VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl); SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt); Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE); } else { @@ -10628,7 +11053,7 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(Op, dl); - Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi); + Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags()); VT = HalfVT; } } @@ -10809,6 +11234,128 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select); } +SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op, + const SDLoc &dl, + SelectionDAG &DAG) const { + EVT OperandVT = Op.getValueType(); + if (OperandVT.getScalarType() == ResultVT.getScalarType()) + return Op; + EVT ResultIntVT = ResultVT.changeTypeToInteger(); + // We are rounding binary64/binary128 -> binary32 -> bfloat16. This + // can induce double-rounding which may alter the results. We can + // correct for this using a trick explained in: Boldo, Sylvie, and + // Guillaume Melquiond. "When double rounding is odd." 17th IMACS + // World Congress. 2005. + unsigned BitSize = OperandVT.getScalarSizeInBits(); + EVT WideIntVT = OperandVT.changeTypeToInteger(); + SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op); + SDValue SignBit = + DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt, + DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT)); + SDValue AbsWide; + if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) { + AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op); + } else { + SDValue ClearedSign = DAG.getNode( + ISD::AND, dl, WideIntVT, OpAsInt, + DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT)); + AbsWide = DAG.getBitcast(OperandVT, ClearedSign); + } + SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT); + SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT); + + // We can keep the narrow value as-is if narrowing was exact (no + // rounding error), the wide value was NaN (the narrow value is also + // NaN and should be preserved) or if we rounded to the odd value. + SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow); + SDValue One = DAG.getConstant(1, dl, ResultIntVT); + SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT); + SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One); + EVT ResultIntVTCCVT = getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), And.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, ResultIntVT); + // The result is already odd so we don't need to do anything. + SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE); + + EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + AbsWide.getValueType()); + // We keep results which are exact, odd or NaN. + SDValue KeepNarrow = + DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ); + KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd); + // We morally performed a round-down if AbsNarrow is smaller than + // AbsWide. + SDValue NarrowIsRd = + DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT); + // If the narrow value is odd or exact, pick it. + // Otherwise, narrow is even and corresponds to either the rounded-up + // or rounded-down value. If narrow is the rounded-down value, we want + // the rounded-up value as it will be odd. + SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne); + SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust); + Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted); + int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits(); + SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl); + SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit); + Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit); + return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op); +} + +SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const { + assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!"); + SDValue Op = Node->getOperand(0); + EVT VT = Node->getValueType(0); + SDLoc dl(Node); + if (VT.getScalarType() == MVT::bf16) { + if (Node->getConstantOperandVal(1) == 1) { + return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0)); + } + EVT OperandVT = Op.getValueType(); + SDValue IsNaN = DAG.getSetCC( + dl, + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT), + Op, Op, ISD::SETUO); + + // We are rounding binary64/binary128 -> binary32 -> bfloat16. This + // can induce double-rounding which may alter the results. We can + // correct for this using a trick explained in: Boldo, Sylvie, and + // Guillaume Melquiond. "When double rounding is odd." 17th IMACS + // World Congress. 2005. + EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; + EVT I32 = F32.changeTypeToInteger(); + Op = expandRoundInexactToOdd(F32, Op, dl, DAG); + Op = DAG.getNode(ISD::BITCAST, dl, I32, Op); + + // Conversions should set NaN's quiet bit. This also prevents NaNs from + // turning into infinities. + SDValue NaN = + DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32)); + + // Factor in the contribution of the low 16 bits. + SDValue One = DAG.getConstant(1, dl, I32); + SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op, + DAG.getShiftAmountConstant(16, I32, dl)); + Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One); + SDValue RoundingBias = + DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb); + SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias); + + // Don't round if we had a NaN, we don't want to turn 0x7fffffff into + // 0x80000000. + Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add); + + // Now that we have rounded, shift the bits into position. + Op = DAG.getNode(ISD::SRL, dl, I32, Op, + DAG.getShiftAmountConstant(16, I32, dl)); + Op = DAG.getNode(ISD::BITCAST, dl, I32, Op); + EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16; + Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op); + return DAG.getNode(ISD::BITCAST, dl, VT, Op); + } + return SDValue(); +} + SDValue TargetLowering::expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const { assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!"); @@ -10883,6 +11430,108 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, MachinePointerInfo::getUnknownStack(MF)); } +SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node, + SelectionDAG &DAG) const { + SDLoc DL(Node); + SDValue Vec = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue Passthru = Node->getOperand(2); + + EVT VecVT = Vec.getValueType(); + EVT ScalarVT = VecVT.getScalarType(); + EVT MaskVT = Mask.getValueType(); + EVT MaskScalarVT = MaskVT.getScalarType(); + + // Needs to be handled by targets that have scalable vector types. + if (VecVT.isScalableVector()) + report_fatal_error("Cannot expand masked_compress for scalable vectors."); + + SDValue StackPtr = DAG.CreateStackTemporary( + VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false)); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); + + MVT PositionVT = getVectorIdxTy(DAG.getDataLayout()); + SDValue Chain = DAG.getEntryNode(); + SDValue OutPos = DAG.getConstant(0, DL, PositionVT); + + bool HasPassthru = !Passthru.isUndef(); + + // If we have a passthru vector, store it on the stack, overwrite the matching + // positions and then re-write the last element that was potentially + // overwritten even though mask[i] = false. + if (HasPassthru) + Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo); + + SDValue LastWriteVal; + APInt PassthruSplatVal; + bool IsSplatPassthru = + ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal); + + if (IsSplatPassthru) { + // As we do not know which position we wrote to last, we cannot simply + // access that index from the passthru vector. So we first check if passthru + // is a splat vector, to use any element ... + LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT); + } else if (HasPassthru) { + // ... if it is not a splat vector, we need to get the passthru value at + // position = popcount(mask) and re-load it from the stack before it is + // overwritten in the loop below. + SDValue Popcount = DAG.getNode( + ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask); + Popcount = DAG.getNode(ISD::ZERO_EXTEND, DL, + MaskVT.changeVectorElementType(ScalarVT), Popcount); + Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, ScalarVT, Popcount); + SDValue LastElmtPtr = + getVectorElementPointer(DAG, StackPtr, VecVT, Popcount); + LastWriteVal = DAG.getLoad( + ScalarVT, DL, Chain, LastElmtPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + Chain = LastWriteVal.getValue(1); + } + + unsigned NumElms = VecVT.getVectorNumElements(); + for (unsigned I = 0; I < NumElms; I++) { + SDValue Idx = DAG.getVectorIdxConstant(I, DL); + + SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx); + SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos); + Chain = DAG.getStore( + Chain, DL, ValI, OutPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + + // Get the mask value and add it to the current output position. This + // either increments by 1 if MaskI is true or adds 0 otherwise. + // Freeze in case we have poison/undef mask entries. + SDValue MaskI = DAG.getFreeze( + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx)); + MaskI = DAG.getFreeze(MaskI); + MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI); + MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI); + OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI); + + if (HasPassthru && I == NumElms - 1) { + SDValue EndOfVector = + DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT); + SDValue AllLanesSelected = + DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT); + OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector); + OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos); + + // Re-write the last ValI if all lanes were selected. Otherwise, + // overwrite the last write it with the passthru value. + LastWriteVal = + DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI, LastWriteVal); + Chain = DAG.getStore( + Chain, DL, LastWriteVal, OutPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + } + } + + return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo); +} + bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp index ab57d08e527e..239572bf773e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -161,9 +161,11 @@ class ShrinkWrap : public MachineFunctionPass { /// Current MachineFunction. MachineFunction *MachineFunc = nullptr; - /// Is `true` for block numbers where we can guarantee no stack access - /// or computation of stack-relative addresses on any CFG path including - /// the block itself. + /// Is `true` for the block numbers where we assume possible stack accesses + /// or computation of stack-relative addresses on any CFG path including the + /// block itself. Is `false` for basic blocks where we can guarantee the + /// opposite. False positives won't lead to incorrect analysis results, + /// therefore this approach is fair. BitVector StackAddressUsedBlockInfo; /// Check if \p MI uses or defines a callee-saved register or @@ -223,12 +225,12 @@ class ShrinkWrap : public MachineFunctionPass { /// Initialize the pass for \p MF. void init(MachineFunction &MF) { RCI.runOnMachineFunction(MF); - MDT = &getAnalysis<MachineDominatorTree>(); - MPDT = &getAnalysis<MachinePostDominatorTree>(); + MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(); Save = nullptr; Restore = nullptr; - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); - MLI = &getAnalysis<MachineLoopInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); + MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); EntryFreq = MBFI->getEntryFreq(); const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); @@ -259,10 +261,10 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachinePostDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachinePostDominatorTreeWrapperPass>(); + AU.addRequired<MachineLoopInfoWrapperPass>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -286,10 +288,10 @@ char ShrinkWrap::ID = 0; char &llvm::ShrinkWrapID = ShrinkWrap::ID; INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) @@ -668,8 +670,8 @@ bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF, Save = NewSave; Restore = NewRestore; - MDT->runOnMachineFunction(MF); - MPDT->runOnMachineFunction(MF); + MDT->recalculate(MF); + MPDT->recalculate(MF); assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) && "Incorrect save or restore point due to dominance relations"); @@ -948,6 +950,9 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; + // Initially, conservatively assume that stack addresses can be used in each + // basic block and change the state only for those basic blocks for which we + // were able to prove the opposite. StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true); bool HasCandidate = performShrinkWrapping(RPOT, RS.get()); StackAddressUsedBlockInfo.clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 515b5764a094..054f7d721596 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -150,9 +150,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB, if (!LiveBBs.insert(BB).second) return; // already been here. - df_iterator_default_set<BasicBlock*> Visited; - - for (BasicBlock *B : inverse_depth_first_ext(BB, Visited)) + for (BasicBlock *B : inverse_depth_first(BB)) LiveBBs.insert(B); } @@ -201,10 +199,10 @@ SjLjEHPrepareImpl::setupFunctionContext(Function &F, // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. - auto &DL = F.getParent()->getDataLayout(); + auto &DL = F.getDataLayout(); const Align Alignment = DL.getPrefTypeAlign(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), nullptr, - Alignment, "fn_context", &EntryBB->front()); + Alignment, "fn_context", EntryBB->begin()); // Fill in the function context structure. for (LandingPadInst *LPI : LPads) { @@ -273,7 +271,7 @@ void SjLjEHPrepareImpl::lowerIncomingArguments(Function &F) { Value *TrueValue = ConstantInt::getTrue(F.getContext()); Value *UndefValue = UndefValue::get(Ty); Instruction *SI = SelectInst::Create( - TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt); + TrueValue, &AI, UndefValue, AI.getName() + ".tmp", AfterAllocaInsPt); AI.replaceAllUsesWith(SI); // Reset the operand, because it was clobbered by the RAUW above. @@ -388,7 +386,7 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) { if (Function *Callee = II->getCalledFunction()) if (Callee->getIntrinsicID() == Intrinsic::donothing) { // Remove the NOP invoke. - BranchInst::Create(II->getNormalDest(), II); + BranchInst::Create(II->getNormalDest(), II->getIterator()); II->eraseFromParent(); continue; } @@ -447,7 +445,7 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) { // Record the call site value for the back end so it stays associated with // the invoke. - CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]); + CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]->getIterator()); } // Mark call instructions that aren't nounwind as no-action (call_site == @@ -464,8 +462,8 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) { } // Register the function context and make sure it's known to not throw - CallInst *Register = - CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); + CallInst *Register = CallInst::Create( + RegisterFn, FuncCtx, "", EntryBB->getTerminator()->getIterator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the @@ -482,7 +480,8 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) { } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); StackAddr->insertAfter(&I); - new StoreInst(StackAddr, StackPtr, true, StackAddr->getNextNode()); + new StoreInst(StackAddr, StackPtr, true, + std::next(StackAddr->getIterator())); } } @@ -492,7 +491,7 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) { Instruction *InsertPoint = Return; if (CallInst *CI = Return->getParent()->getTerminatingMustTailCall()) InsertPoint = CI; - CallInst::Create(UnregisterFn, FuncCtx, "", InsertPoint); + CallInst::Create(UnregisterFn, FuncCtx, "", InsertPoint->getIterator()); } return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp index 8b80c6ccb438..1b92a5aa59d1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp @@ -18,28 +18,43 @@ using namespace llvm; #define DEBUG_TYPE "slotindexes" -char SlotIndexes::ID = 0; +AnalysisKey SlotIndexesAnalysis::Key; -SlotIndexes::SlotIndexes() : MachineFunctionPass(ID) { - initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); +SlotIndexesAnalysis::Result +SlotIndexesAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + return Result(MF); +} + +PreservedAnalyses +SlotIndexesPrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + OS << "Slot indexes in machine function: " << MF.getName() << '\n'; + MFAM.getResult<SlotIndexesAnalysis>(MF).print(OS); + return PreservedAnalyses::all(); +} +char SlotIndexesWrapperPass::ID = 0; + +SlotIndexesWrapperPass::SlotIndexesWrapperPass() : MachineFunctionPass(ID) { + initializeSlotIndexesWrapperPassPass(*PassRegistry::getPassRegistry()); } SlotIndexes::~SlotIndexes() { // The indexList's nodes are all allocated in the BumpPtrAllocator. - indexList.clearAndLeakNodesUnsafely(); + indexList.clear(); } -INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE, - "Slot index numbering", false, false) +INITIALIZE_PASS(SlotIndexesWrapperPass, DEBUG_TYPE, "Slot index numbering", + false, false) STATISTIC(NumLocalRenum, "Number of local renumberings"); -void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { +void SlotIndexesWrapperPass::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(au); } -void SlotIndexes::releaseMemory() { +void SlotIndexes::clear() { mi2iMap.clear(); MBBRanges.clear(); idx2MBBMap.clear(); @@ -47,7 +62,7 @@ void SlotIndexes::releaseMemory() { ileAllocator.Reset(); } -bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { +void SlotIndexes::analyze(MachineFunction &fn) { // Compute numbering as follows: // Grab an iterator to the start of the index list. @@ -75,7 +90,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MBBRanges.resize(mf->getNumBlockIDs()); idx2MBBMap.reserve(mf->size()); - indexList.push_back(createEntry(nullptr, index)); + indexList.push_back(*createEntry(nullptr, index)); // Iterate over the function. for (MachineBasicBlock &MBB : *mf) { @@ -87,7 +102,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { continue; // Insert a store index for the instr. - indexList.push_back(createEntry(&MI, index += SlotIndex::InstrDist)); + indexList.push_back(*createEntry(&MI, index += SlotIndex::InstrDist)); // Save this base index in the maps. mi2iMap.insert(std::make_pair( @@ -95,7 +110,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { } // We insert one blank instructions between basic blocks. - indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist)); + indexList.push_back(*createEntry(nullptr, index += SlotIndex::InstrDist)); MBBRanges[MBB.getNumber()].first = blockStartIndex; MBBRanges[MBB.getNumber()].second = SlotIndex(&indexList.back(), @@ -107,9 +122,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { llvm::sort(idx2MBBMap, less_first()); LLVM_DEBUG(mf->print(dbgs(), this)); - - // And we're done! - return false; } void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI, @@ -242,22 +254,23 @@ void SlotIndexes::packIndexes() { Entry.setIndex(Index * SlotIndex::InstrDist); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void SlotIndexes::dump() const { +void SlotIndexes::print(raw_ostream &OS) const { for (const IndexListEntry &ILE : indexList) { - dbgs() << ILE.getIndex() << " "; + OS << ILE.getIndex() << ' '; - if (ILE.getInstr()) { - dbgs() << *ILE.getInstr(); - } else { - dbgs() << "\n"; - } + if (ILE.getInstr()) + OS << *ILE.getInstr(); + else + OS << '\n'; } for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i) - dbgs() << "%bb." << i << "\t[" << MBBRanges[i].first << ';' - << MBBRanges[i].second << ")\n"; + OS << "%bb." << i << "\t[" << MBBRanges[i].first << ';' + << MBBRanges[i].second << ")\n"; } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SlotIndexes::dump() const { print(dbgs()); } #endif // Print a SlotIndex to a raw_ostream. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp index cdb8099e354b..9f91ee493415 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp @@ -56,7 +56,7 @@ INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE, void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); AU.addRequiredTransitive<EdgeBundles>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -200,7 +200,7 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { // Compute total ingoing and outgoing block frequencies for all bundles. BlockFrequencies.resize(mf.getNumBlockIDs()); - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); setThreshold(MBFI->getEntryFreq()); for (auto &I : mf) { unsigned Num = I.getNumber(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp index d6c0a782465e..b671e5103875 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp @@ -184,8 +184,7 @@ void SplitAnalysis::analyzeUses() { // Remove duplicates, keeping the smaller slot for each instruction. // That is what we want for early clobbers. - UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(), - SlotIndex::isSameInstr), + UseSlots.erase(llvm::unique(UseSlots, SlotIndex::isSameInstr), UseSlots.end()); // Compute per-live block info. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp index 37f7aa929005..341ec629bedd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp @@ -517,12 +517,12 @@ char &llvm::StackColoringID = StackColoring::ID; INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, "Merge disjoint stack slots", false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE, "Merge disjoint stack slots", false, false) void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<SlotIndexes>(); + AU.addRequired<SlotIndexesWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -773,6 +773,10 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { void StackColoring::calculateLocalLiveness() { unsigned NumIters = 0; bool changed = true; + // Create BitVector outside the loop and reuse them to avoid repeated heap + // allocations. + BitVector LocalLiveIn; + BitVector LocalLiveOut; while (changed) { changed = false; ++NumIters; @@ -784,7 +788,7 @@ void StackColoring::calculateLocalLiveness() { BlockLifetimeInfo &BlockInfo = BI->second; // Compute LiveIn by unioning together the LiveOut sets of all preds. - BitVector LocalLiveIn; + LocalLiveIn.clear(); for (MachineBasicBlock *Pred : BB->predecessors()) { LivenessMap::const_iterator I = BlockLiveness.find(Pred); // PR37130: transformations prior to stack coloring can @@ -801,7 +805,7 @@ void StackColoring::calculateLocalLiveness() { // because we already handle the case where the BEGIN comes // before the END when collecting the markers (and building the // BEGIN/END vectors). - BitVector LocalLiveOut = LocalLiveIn; + LocalLiveOut = LocalLiveIn; LocalLiveOut.reset(BlockInfo.End); LocalLiveOut |= BlockInfo.Begin; @@ -960,14 +964,14 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { MFI->setObjectSSPLayout(SI.second, FromKind); // The new alloca might not be valid in a llvm.dbg.declare for this - // variable, so undef out the use to make the verifier happy. + // variable, so poison out the use to make the verifier happy. AllocaInst *FromAI = const_cast<AllocaInst *>(From); if (FromAI->isUsedByMetadata()) - ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType())); + ValueAsMetadata::handleRAUW(FromAI, PoisonValue::get(FromAI->getType())); for (auto &Use : FromAI->uses()) { if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get())) if (BCI->isUsedByMetadata()) - ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType())); + ValueAsMetadata::handleRAUW(BCI, PoisonValue::get(BCI->getType())); } // Note that this will not replace uses in MMOs (which we'll update below), @@ -1179,7 +1183,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { << "********** Function: " << Func.getName() << '\n'); MF = &Func; MFI = &MF->getFrameInfo(); - Indexes = &getAnalysis<SlotIndexes>(); + Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI(); BlockLiveness.clear(); BasicBlocks.clear(); BasicBlockNumbering.clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp index 5d3903ed84ce..940aecd1cb36 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp @@ -62,11 +62,14 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { int Align; int Offset; SlotType SlotTy; + bool Scalable; SlotData(const MachineFrameInfo &MFI, const int ValOffset, const int Idx) : Slot(Idx), Size(MFI.getObjectSize(Idx)), Align(MFI.getObjectAlign(Idx).value()), - Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid) { + Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid), + Scalable(false) { + Scalable = MFI.getStackID(Idx) == TargetStackID::ScalableVector; if (MFI.isSpillSlotObjectIndex(Idx)) SlotTy = SlotType::Spill; else if (Idx == MFI.getStackProtectorIndex()) @@ -75,9 +78,12 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { SlotTy = SlotType::Variable; } - // we use this to sort in reverse order, so that the layout is displayed - // correctly - bool operator<(const SlotData &Rhs) const { return Offset > Rhs.Offset; } + // We use this to sort in reverse order, so that the layout is displayed + // correctly. Scalable slots are sorted to the end of the list. + bool operator<(const SlotData &Rhs) const { + return std::make_tuple(!Scalable, Offset) > + std::make_tuple(!Rhs.Scalable, Rhs.Offset); + } }; StackFrameLayoutAnalysisPass() : MachineFunctionPass(ID) {} @@ -153,7 +159,7 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { Rem << Prefix << ore::NV("Offset", D.Offset) << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy)) << ", Align: " << ore::NV("Align", D.Align) - << ", Size: " << ore::NV("Size", D.Size); + << ", Size: " << ore::NV("Size", ElementCount::get(D.Size, D.Scalable)); } void emitSourceLocRemark(const MachineFunction &MF, const DILocalVariable *N, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index 778ac1f5701c..687acd90b405 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -126,8 +126,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { for (auto &MBB : MF) { LLVM_DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n"); LiveRegs.init(*TRI); - // FIXME: This should probably be addLiveOuts(). - LiveRegs.addLiveOutsNoPristines(MBB); + LiveRegs.addLiveOuts(MBB); bool HasStackMap = false; // Reverse iterate over all instructions and add the current live register // set to an instruction if we encounter a patchpoint instruction. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp index c180f4d8f036..df06577e14e7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -64,6 +65,7 @@ namespace { MachineFrameInfo *MFI = nullptr; const TargetInstrInfo *TII = nullptr; const MachineBlockFrequencyInfo *MBFI = nullptr; + SlotIndexes *Indexes = nullptr; // SSIntervals - Spill slot intervals. std::vector<LiveInterval*> SSIntervals; @@ -146,12 +148,20 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); + AU.addRequired<SlotIndexesWrapperPass>(); + AU.addPreserved<SlotIndexesWrapperPass>(); AU.addRequired<LiveStacks>(); - AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); AU.addPreservedID(MachineDominatorsID); + + // In some Target's pipeline, register allocation (RA) might be + // split into multiple phases based on register class. So, this pass + // may be invoked multiple times requiring it to save these analyses to be + // used by RA later. + AU.addPreserved<LiveIntervalsWrapperPass>(); + AU.addPreserved<LiveDebugVariables>(); + MachineFunctionPass::getAnalysisUsage(AU); } @@ -175,9 +185,9 @@ char &llvm::StackSlotColoringID = StackSlotColoring::ID; INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) @@ -214,13 +224,10 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { li.incrementWeight( LiveIntervals::getSpillWeight(false, true, MBFI, MI)); } - for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(), - EE = MI.memoperands_end(); - MMOI != EE; ++MMOI) { - MachineMemOperand *MMO = *MMOI; + for (MachineMemOperand *MMO : MI.memoperands()) { if (const FixedStackPseudoSourceValue *FSV = - dyn_cast_or_null<FixedStackPseudoSourceValue>( - MMO->getPseudoValue())) { + dyn_cast_or_null<FixedStackPseudoSourceValue>( + MMO->getPseudoValue())) { int FI = FSV->getFrameIndex(); if (FI >= 0) SSRefs[FI].push_back(MMO); @@ -390,8 +397,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI); SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS]; - for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) - RefMMOs[i]->setValue(NewSV); + for (MachineMemOperand *MMO : RefMMOs) + MMO->setValue(NewSV); } // Rewrite all MO_FrameIndex operands. Look for dead stores. @@ -480,13 +487,14 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize))) continue; if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 || - LoadSize != StoreSize) + LoadSize != StoreSize || !MFI->isSpillSlotObjectIndex(FirstSS)) continue; ++NumDead; changed = true; - if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) { + if (NextMI->findRegisterUseOperandIdx(LoadReg, /*TRI=*/nullptr, true) != + -1) { ++NumDead; toErase.push_back(&*ProbableLoadMI); } @@ -495,8 +503,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++I; } - for (MachineInstr *MI : toErase) + for (MachineInstr *MI : toErase) { + if (Indexes) + Indexes->removeMachineInstrFromMaps(*MI); MI->eraseFromParent(); + } return changed; } @@ -513,7 +524,8 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { MFI = &MF.getFrameInfo(); TII = MF.getSubtarget().getInstrInfo(); LS = &getAnalysis<LiveStacks>(); - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); + Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI(); bool Changed = false; @@ -537,8 +549,8 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { Next = -1; SSIntervals.clear(); - for (unsigned i = 0, e = SSRefs.size(); i != e; ++i) - SSRefs[i].clear(); + for (auto &RefMMOs : SSRefs) + RefMMOs.clear(); SSRefs.clear(); OrigAlignments.clear(); OrigSizes.clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 8922fa589813..e741a0fc49fb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -104,7 +104,8 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, // for the Case Statement'" (1994), but builds the MinPartitions array in // reverse order to make it easier to reconstruct the partitions in ascending // order. In the choice between two optimal partitionings, it picks the one - // which yields more jump tables. + // which yields more jump tables. The algorithm is described in + // https://arxiv.org/pdf/1910.02351v2 // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. SmallVector<unsigned, 8> MinPartitions(N); @@ -574,4 +575,4 @@ SwitchCG::SwitchLowering::computeSplitWorkItemInfo( assert(FirstRight <= W.LastCluster); return SplitWorkItemInfo{LastLeft, FirstRight, LeftProb, RightProb}; -}
\ No newline at end of file +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp index bf3d2088e196..25f20d9c899b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp @@ -40,7 +40,7 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -84,7 +84,7 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + auto MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); auto *MBFI = (PSI && PSI->hasProfileSummary()) ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() : diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp index 5ed67bd0a121..c5fa4e6211a6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp @@ -68,6 +68,18 @@ static cl::opt<unsigned> TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); +static cl::opt<unsigned> + TailDupPredSize("tail-dup-pred-size", + cl::desc("Maximum predecessors (maximum successors at the " + "same time) to consider tail duplicating blocks."), + cl::init(16), cl::Hidden); + +static cl::opt<unsigned> + TailDupSuccSize("tail-dup-succ-size", + cl::desc("Maximum successors (maximum predecessors at the " + "same time) to consider tail duplicating blocks."), + cl::init(16), cl::Hidden); + static cl::opt<bool> TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -85,7 +97,6 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, TII = MF->getSubtarget().getInstrInfo(); TRI = MF->getSubtarget().getRegisterInfo(); MRI = &MF->getRegInfo(); - MMI = &MF->getMMI(); MBPI = MBPIin; MBFI = MBFIin; PSI = PSIin; @@ -189,8 +200,7 @@ bool TailDuplicator::tailDuplicateAndUpdate( // Update SSA form. if (!SSAUpdateVRs.empty()) { - for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { - unsigned VReg = SSAUpdateVRs[i]; + for (unsigned VReg : SSAUpdateVRs) { SSAUpdate.Initialize(VReg); // If the original definition is still around, add it as an available @@ -241,8 +251,7 @@ bool TailDuplicator::tailDuplicateAndUpdate( // Eliminate some of the copies inserted by tail duplication to maintain // SSA form. - for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - MachineInstr *Copy = Copies[i]; + for (MachineInstr *Copy : Copies) { if (!Copy->isCopy()) continue; Register Dst = Copy->getOperand(0).getReg(); @@ -565,6 +574,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; + // Duplicating a BB which has both multiple predecessors and successors will + // result in a complex CFG and also may cause huge amount of PHI nodes. If we + // want to remove this limitation, we have to address + // https://github.com/llvm/llvm-project/issues/78578. + if (TailBB.pred_size() > TailDupPredSize && + TailBB.succ_size() > TailDupSuccSize) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp index 4783742a14ad..3cd1bb296d28 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -919,7 +919,7 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, // instruction is known to not increase the critical path, then don't match // that pattern. bool TargetInstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, + MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns, bool DoRegPressureReduce) const { bool Commute; if (isReassociationCandidate(Root, Commute)) { @@ -941,13 +941,17 @@ bool TargetInstrInfo::getMachineCombinerPatterns( } /// Return true when a code sequence can improve loop throughput. -bool -TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { +bool TargetInstrInfo::isThroughputPattern(unsigned Pattern) const { return false; } +CombinerObjective +TargetInstrInfo::getCombinerObjective(unsigned Pattern) const { + return CombinerObjective::Default; +} + std::pair<unsigned, unsigned> -TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern, +TargetInstrInfo::getReassociationOpcodes(unsigned Pattern, const MachineInstr &Root, const MachineInstr &Prev) const { bool AssocCommutRoot = isAssociativeAndCommutative(Root); @@ -1036,7 +1040,7 @@ TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern, // Return a pair of boolean flags showing if the new root and new prev operands // must be swapped. See visual example of the rule in // TargetInstrInfo::getReassociationOpcodes. -static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) { +static std::pair<bool, bool> mustSwapOperands(unsigned Pattern) { switch (Pattern) { default: llvm_unreachable("Unexpected pattern"); @@ -1051,13 +1055,34 @@ static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) { } } +void TargetInstrInfo::getReassociateOperandIndices( + const MachineInstr &Root, unsigned Pattern, + std::array<unsigned, 5> &OperandIndices) const { + switch (Pattern) { + case MachineCombinerPattern::REASSOC_AX_BY: + OperandIndices = {1, 1, 1, 2, 2}; + break; + case MachineCombinerPattern::REASSOC_AX_YB: + OperandIndices = {2, 1, 2, 2, 1}; + break; + case MachineCombinerPattern::REASSOC_XA_BY: + OperandIndices = {1, 2, 1, 1, 2}; + break; + case MachineCombinerPattern::REASSOC_XA_YB: + OperandIndices = {2, 2, 2, 1, 1}; + break; + default: + llvm_unreachable("unexpected MachineCombinerPattern"); + } +} + /// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( - MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern Pattern, + MachineInstr &Root, MachineInstr &Prev, unsigned Pattern, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, + ArrayRef<unsigned> OperandIndices, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -1065,29 +1090,10 @@ void TargetInstrInfo::reassociateOps( const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); - // This array encodes the operand index for each parameter because the - // operands may be commuted. Each row corresponds to a pattern value, - // and each column specifies the index of A, B, X, Y. - unsigned OpIdx[4][4] = { - { 1, 1, 2, 2 }, - { 1, 2, 2, 1 }, - { 2, 1, 1, 2 }, - { 2, 2, 1, 1 } - }; - - int Row; - switch (Pattern) { - case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break; - case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break; - case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break; - case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break; - default: llvm_unreachable("unexpected MachineCombinerPattern"); - } - - MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]); - MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]); - MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]); - MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]); + MachineOperand &OpA = Prev.getOperand(OperandIndices[1]); + MachineOperand &OpB = Root.getOperand(OperandIndices[2]); + MachineOperand &OpX = Prev.getOperand(OperandIndices[3]); + MachineOperand &OpY = Root.getOperand(OperandIndices[4]); MachineOperand &OpC = Root.getOperand(0); Register RegA = OpA.getReg(); @@ -1126,11 +1132,62 @@ void TargetInstrInfo::reassociateOps( std::swap(KillX, KillY); } + unsigned PrevFirstOpIdx, PrevSecondOpIdx; + unsigned RootFirstOpIdx, RootSecondOpIdx; + switch (Pattern) { + case MachineCombinerPattern::REASSOC_AX_BY: + PrevFirstOpIdx = OperandIndices[1]; + PrevSecondOpIdx = OperandIndices[3]; + RootFirstOpIdx = OperandIndices[2]; + RootSecondOpIdx = OperandIndices[4]; + break; + case MachineCombinerPattern::REASSOC_AX_YB: + PrevFirstOpIdx = OperandIndices[1]; + PrevSecondOpIdx = OperandIndices[3]; + RootFirstOpIdx = OperandIndices[4]; + RootSecondOpIdx = OperandIndices[2]; + break; + case MachineCombinerPattern::REASSOC_XA_BY: + PrevFirstOpIdx = OperandIndices[3]; + PrevSecondOpIdx = OperandIndices[1]; + RootFirstOpIdx = OperandIndices[2]; + RootSecondOpIdx = OperandIndices[4]; + break; + case MachineCombinerPattern::REASSOC_XA_YB: + PrevFirstOpIdx = OperandIndices[3]; + PrevSecondOpIdx = OperandIndices[1]; + RootFirstOpIdx = OperandIndices[4]; + RootSecondOpIdx = OperandIndices[2]; + break; + default: + llvm_unreachable("unexpected MachineCombinerPattern"); + } + + // Basically BuildMI but doesn't add implicit operands by default. + auto buildMINoImplicit = [](MachineFunction &MF, const MIMetadata &MIMD, + const MCInstrDesc &MCID, Register DestReg) { + return MachineInstrBuilder( + MF, MF.CreateMachineInstr(MCID, MIMD.getDL(), /*NoImpl=*/true)) + .setPCSections(MIMD.getPCSections()) + .addReg(DestReg, RegState::Define); + }; + // Create new instructions for insertion. MachineInstrBuilder MIB1 = - BuildMI(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR) - .addReg(RegX, getKillRegState(KillX)) - .addReg(RegY, getKillRegState(KillY)); + buildMINoImplicit(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR); + for (const auto &MO : Prev.explicit_operands()) { + unsigned Idx = MO.getOperandNo(); + // Skip the result operand we'd already added. + if (Idx == 0) + continue; + if (Idx == PrevFirstOpIdx) + MIB1.addReg(RegX, getKillRegState(KillX)); + else if (Idx == PrevSecondOpIdx) + MIB1.addReg(RegY, getKillRegState(KillY)); + else + MIB1.add(MO); + } + MIB1.copyImplicitOps(Prev); if (SwapRootOperands) { std::swap(RegA, NewVR); @@ -1138,9 +1195,20 @@ void TargetInstrInfo::reassociateOps( } MachineInstrBuilder MIB2 = - BuildMI(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC) - .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(KillNewVR)); + buildMINoImplicit(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC); + for (const auto &MO : Root.explicit_operands()) { + unsigned Idx = MO.getOperandNo(); + // Skip the result operand. + if (Idx == 0) + continue; + if (Idx == RootFirstOpIdx) + MIB2 = MIB2.addReg(RegA, getKillRegState(KillA)); + else if (Idx == RootSecondOpIdx) + MIB2 = MIB2.addReg(NewVR, getKillRegState(KillNewVR)); + else + MIB2 = MIB2.add(MO); + } + MIB2.copyImplicitOps(Root); // Propagate FP flags from the original instructions. // But clear poison-generating flags because those may not be valid now. @@ -1177,32 +1245,24 @@ void TargetInstrInfo::reassociateOps( } void TargetInstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern Pattern, + MachineInstr &Root, unsigned Pattern, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const { MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); // Select the previous instruction in the sequence based on the input pattern. - MachineInstr *Prev = nullptr; - switch (Pattern) { - case MachineCombinerPattern::REASSOC_AX_BY: - case MachineCombinerPattern::REASSOC_XA_BY: - Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - break; - case MachineCombinerPattern::REASSOC_AX_YB: - case MachineCombinerPattern::REASSOC_XA_YB: - Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); - break; - default: - llvm_unreachable("Unknown pattern for machine combiner"); - } + std::array<unsigned, 5> OperandIndices; + getReassociateOperandIndices(Root, Pattern, OperandIndices); + MachineInstr *Prev = + MRI.getUniqueVRegDef(Root.getOperand(OperandIndices[0]).getReg()); // Don't reassociate if Prev and Root are in different blocks. if (Prev->getParent() != Root.getParent()) return; - reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); + reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, OperandIndices, + InstIdxForVirtReg); } MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const { @@ -1365,7 +1425,7 @@ bool TargetInstrInfo::getMemOperandWithOffset( const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { SmallVector<const MachineOperand *, 4> BaseOps; - unsigned Width; + LocationSize Width = 0; if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, Width, TRI) || BaseOps.size() != 1) @@ -1470,8 +1530,7 @@ bool TargetInstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const { // since the split part may not be placed in a contiguous region. It may also // be more beneficial to augment the linker to ensure contiguous layout of // split functions within the same section as specified by the attribute. - if (MF.getFunction().hasSection() || - MF.getFunction().hasFnAttribute("implicit-section-name")) + if (MF.getFunction().hasSection()) return false; // We don't want to proceed further for cold functions @@ -1554,7 +1613,8 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, SmallVector<uint64_t, 8> Ops; DIExpression::appendOffset(Ops, Offset); Ops.push_back(dwarf::DW_OP_deref_size); - Ops.push_back(MMO->getSize()); + Ops.push_back(MMO->getSize().hasValue() ? MMO->getSize().getValue() + : ~UINT64_C(0)); Expr = DIExpression::prependOpcodes(Expr, Ops); return ParamLoadedValue(*BaseOp, Expr); } @@ -1690,7 +1750,7 @@ std::string TargetInstrInfo::createMIROperandComment( OS << Info; } - return OS.str(); + return Flags; } int FlagIdx = MI.findInlineAsmFlagIdx(OpIdx); @@ -1724,7 +1784,7 @@ std::string TargetInstrInfo::createMIROperandComment( F.getRegMayBeFolded()) OS << " foldable"; - return OS.str(); + return Flags; } TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() = default; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp index 6c58e21b28bb..2be7fc90a0e7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -28,13 +28,13 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -98,131 +98,6 @@ static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation", cl::desc("Don't mutate strict-float node to a legalize node"), cl::init(false), cl::Hidden); -static bool darwinHasSinCos(const Triple &TT) { - assert(TT.isOSDarwin() && "should be called with darwin triple"); - // Don't bother with 32 bit x86. - if (TT.getArch() == Triple::x86) - return false; - // Macos < 10.9 has no sincos_stret. - if (TT.isMacOSX()) - return !TT.isMacOSXVersionLT(10, 9) && TT.isArch64Bit(); - // iOS < 7.0 has no sincos_stret. - if (TT.isiOS()) - return !TT.isOSVersionLT(7, 0); - // Any other darwin such as WatchOS/TvOS is new enough. - return true; -} - -void TargetLoweringBase::InitLibcalls(const Triple &TT) { -#define HANDLE_LIBCALL(code, name) \ - setLibcallName(RTLIB::code, name); -#include "llvm/IR/RuntimeLibcalls.def" -#undef HANDLE_LIBCALL - // Initialize calling conventions to their default. - for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) - setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C); - - // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf". - if (TT.isPPC()) { - setLibcallName(RTLIB::ADD_F128, "__addkf3"); - setLibcallName(RTLIB::SUB_F128, "__subkf3"); - setLibcallName(RTLIB::MUL_F128, "__mulkf3"); - setLibcallName(RTLIB::DIV_F128, "__divkf3"); - setLibcallName(RTLIB::POWI_F128, "__powikf2"); - setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2"); - setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2"); - setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2"); - setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2"); - setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi"); - setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi"); - setLibcallName(RTLIB::FPTOSINT_F128_I128, "__fixkfti"); - setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi"); - setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi"); - setLibcallName(RTLIB::FPTOUINT_F128_I128, "__fixunskfti"); - setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf"); - setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf"); - setLibcallName(RTLIB::SINTTOFP_I128_F128, "__floattikf"); - setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf"); - setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf"); - setLibcallName(RTLIB::UINTTOFP_I128_F128, "__floatuntikf"); - setLibcallName(RTLIB::OEQ_F128, "__eqkf2"); - setLibcallName(RTLIB::UNE_F128, "__nekf2"); - setLibcallName(RTLIB::OGE_F128, "__gekf2"); - setLibcallName(RTLIB::OLT_F128, "__ltkf2"); - setLibcallName(RTLIB::OLE_F128, "__lekf2"); - setLibcallName(RTLIB::OGT_F128, "__gtkf2"); - setLibcallName(RTLIB::UO_F128, "__unordkf2"); - } - - // A few names are different on particular architectures or environments. - if (TT.isOSDarwin()) { - // For f16/f32 conversions, Darwin uses the standard naming scheme, instead - // of the gnueabi-style __gnu_*_ieee. - // FIXME: What about other targets? - setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); - setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); - - // Some darwins have an optimized __bzero/bzero function. - switch (TT.getArch()) { - case Triple::x86: - case Triple::x86_64: - if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6)) - setLibcallName(RTLIB::BZERO, "__bzero"); - break; - case Triple::aarch64: - case Triple::aarch64_32: - setLibcallName(RTLIB::BZERO, "bzero"); - break; - default: - break; - } - - if (darwinHasSinCos(TT)) { - setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret"); - setLibcallName(RTLIB::SINCOS_STRET_F64, "__sincos_stret"); - if (TT.isWatchABI()) { - setLibcallCallingConv(RTLIB::SINCOS_STRET_F32, - CallingConv::ARM_AAPCS_VFP); - setLibcallCallingConv(RTLIB::SINCOS_STRET_F64, - CallingConv::ARM_AAPCS_VFP); - } - } - } else { - setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee"); - setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee"); - } - - if (TT.isGNUEnvironment() || TT.isOSFuchsia() || - (TT.isAndroid() && !TT.isAndroidVersionLT(9))) { - setLibcallName(RTLIB::SINCOS_F32, "sincosf"); - setLibcallName(RTLIB::SINCOS_F64, "sincos"); - setLibcallName(RTLIB::SINCOS_F80, "sincosl"); - setLibcallName(RTLIB::SINCOS_F128, "sincosl"); - setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl"); - } - - if (TT.isPS()) { - setLibcallName(RTLIB::SINCOS_F32, "sincosf"); - setLibcallName(RTLIB::SINCOS_F64, "sincos"); - } - - if (TT.isOSOpenBSD()) { - setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr); - } - - if (TT.isOSWindows() && !TT.isOSCygMing()) { - setLibcallName(RTLIB::LDEXP_F32, nullptr); - setLibcallName(RTLIB::LDEXP_F80, nullptr); - setLibcallName(RTLIB::LDEXP_F128, nullptr); - setLibcallName(RTLIB::LDEXP_PPCF128, nullptr); - - setLibcallName(RTLIB::FREXP_F32, nullptr); - setLibcallName(RTLIB::FREXP_F80, nullptr); - setLibcallName(RTLIB::FREXP_F128, nullptr); - setLibcallName(RTLIB::FREXP_PPCF128, nullptr); - } -} - /// GetFPLibCall - Helper to return the right libcall for the given floating /// point type, or UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPLibCall(EVT VT, @@ -267,6 +142,9 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { } else if (OpVT == MVT::f80) { if (RetVT == MVT::f128) return FPEXT_F80_F128; + } else if (OpVT == MVT::bf16) { + if (RetVT == MVT::f32) + return FPEXT_BF16_F32; } return UNKNOWN_LIBCALL; @@ -696,41 +574,42 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { } } -/// InitCmpLibcallCCs - Set default comparison libcall CC. -static void InitCmpLibcallCCs(ISD::CondCode *CCs) { - std::fill(CCs, CCs + RTLIB::UNKNOWN_LIBCALL, ISD::SETCC_INVALID); - CCs[RTLIB::OEQ_F32] = ISD::SETEQ; - CCs[RTLIB::OEQ_F64] = ISD::SETEQ; - CCs[RTLIB::OEQ_F128] = ISD::SETEQ; - CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ; - CCs[RTLIB::UNE_F32] = ISD::SETNE; - CCs[RTLIB::UNE_F64] = ISD::SETNE; - CCs[RTLIB::UNE_F128] = ISD::SETNE; - CCs[RTLIB::UNE_PPCF128] = ISD::SETNE; - CCs[RTLIB::OGE_F32] = ISD::SETGE; - CCs[RTLIB::OGE_F64] = ISD::SETGE; - CCs[RTLIB::OGE_F128] = ISD::SETGE; - CCs[RTLIB::OGE_PPCF128] = ISD::SETGE; - CCs[RTLIB::OLT_F32] = ISD::SETLT; - CCs[RTLIB::OLT_F64] = ISD::SETLT; - CCs[RTLIB::OLT_F128] = ISD::SETLT; - CCs[RTLIB::OLT_PPCF128] = ISD::SETLT; - CCs[RTLIB::OLE_F32] = ISD::SETLE; - CCs[RTLIB::OLE_F64] = ISD::SETLE; - CCs[RTLIB::OLE_F128] = ISD::SETLE; - CCs[RTLIB::OLE_PPCF128] = ISD::SETLE; - CCs[RTLIB::OGT_F32] = ISD::SETGT; - CCs[RTLIB::OGT_F64] = ISD::SETGT; - CCs[RTLIB::OGT_F128] = ISD::SETGT; - CCs[RTLIB::OGT_PPCF128] = ISD::SETGT; - CCs[RTLIB::UO_F32] = ISD::SETNE; - CCs[RTLIB::UO_F64] = ISD::SETNE; - CCs[RTLIB::UO_F128] = ISD::SETNE; - CCs[RTLIB::UO_PPCF128] = ISD::SETNE; +void RTLIB::initCmpLibcallCCs(ISD::CondCode *CmpLibcallCCs) { + std::fill(CmpLibcallCCs, CmpLibcallCCs + RTLIB::UNKNOWN_LIBCALL, + ISD::SETCC_INVALID); + CmpLibcallCCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CmpLibcallCCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CmpLibcallCCs[RTLIB::OEQ_F128] = ISD::SETEQ; + CmpLibcallCCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ; + CmpLibcallCCs[RTLIB::UNE_F32] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UNE_F64] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UNE_F128] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UNE_PPCF128] = ISD::SETNE; + CmpLibcallCCs[RTLIB::OGE_F32] = ISD::SETGE; + CmpLibcallCCs[RTLIB::OGE_F64] = ISD::SETGE; + CmpLibcallCCs[RTLIB::OGE_F128] = ISD::SETGE; + CmpLibcallCCs[RTLIB::OGE_PPCF128] = ISD::SETGE; + CmpLibcallCCs[RTLIB::OLT_F32] = ISD::SETLT; + CmpLibcallCCs[RTLIB::OLT_F64] = ISD::SETLT; + CmpLibcallCCs[RTLIB::OLT_F128] = ISD::SETLT; + CmpLibcallCCs[RTLIB::OLT_PPCF128] = ISD::SETLT; + CmpLibcallCCs[RTLIB::OLE_F32] = ISD::SETLE; + CmpLibcallCCs[RTLIB::OLE_F64] = ISD::SETLE; + CmpLibcallCCs[RTLIB::OLE_F128] = ISD::SETLE; + CmpLibcallCCs[RTLIB::OLE_PPCF128] = ISD::SETLE; + CmpLibcallCCs[RTLIB::OGT_F32] = ISD::SETGT; + CmpLibcallCCs[RTLIB::OGT_F64] = ISD::SETGT; + CmpLibcallCCs[RTLIB::OGT_F128] = ISD::SETGT; + CmpLibcallCCs[RTLIB::OGT_PPCF128] = ISD::SETGT; + CmpLibcallCCs[RTLIB::UO_F32] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UO_F64] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UO_F128] = ISD::SETNE; + CmpLibcallCCs[RTLIB::UO_PPCF128] = ISD::SETNE; } /// NOTE: The TargetMachine owns TLOF. -TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) + : TM(tm), Libcalls(TM.getTargetTriple()) { initActions(); // Perform these initializations only once. @@ -763,10 +642,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { MinCmpXchgSizeInBits = 0; SupportsUnalignedAtomics = false; - std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr); - - InitLibcalls(TM.getTargetTriple()); - InitCmpLibcallCCs(CmpLibcallCCs); + RTLIB::initCmpLibcallCCs(CmpLibcallCCs); } void TargetLoweringBase::initActions() { @@ -780,6 +656,12 @@ void TargetLoweringBase::initActions() { std::fill(std::begin(TargetDAGCombineArray), std::end(TargetDAGCombineArray), 0); + // Let extending atomic loads be unsupported by default. + for (MVT ValVT : MVT::all_valuetypes()) + for (MVT MemVT : MVT::all_valuetypes()) + setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT, MemVT, + Expand); + // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to // remove this and targets should individually set these types if not legal. for (ISD::NodeType NT : enum_seq(ISD::DELETED_NODE, ISD::BUILTIN_OP_END, @@ -860,6 +742,9 @@ void TargetLoweringBase::initActions() { setOperationAction({ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT, Expand); + // [US]CMP default to expand + setOperationAction({ISD::UCMP, ISD::SCMP}, VT, Expand); + // Halving adds setOperationAction( {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT, @@ -883,7 +768,8 @@ void TargetLoweringBase::initActions() { setOperationAction( {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, - ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT}, + ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN, ISD::FACOS, + ISD::FASIN, ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, VT, Expand); // Constrained floating-point operations default to expand. @@ -907,6 +793,9 @@ void TargetLoweringBase::initActions() { // Named vector shuffles default to expand. setOperationAction(ISD::VECTOR_SPLICE, VT, Expand); + // Only some target support this vector operation. Most need to expand it. + setOperationAction(ISD::VECTOR_COMPRESS, VT, Expand); + // VP operations default to expand. #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ setOperationAction(ISD::SDOPC, VT, Expand); @@ -924,6 +813,9 @@ void TargetLoweringBase::initActions() { // Most targets also ignore the @llvm.readcyclecounter intrinsic. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand); + // Most targets also ignore the @llvm.readsteadycounter intrinsic. + setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Expand); + // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. @@ -932,12 +824,17 @@ void TargetLoweringBase::initActions() { Expand); // These library functions default to expand. - setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, - ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT, - ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND, - ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN}, + setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, + ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, + ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, + ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT, + ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, + ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, {MVT::f32, MVT::f64, MVT::f128}, Expand); + setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, + ISD::FSINH, ISD::FTANH}, + MVT::f16, Promote); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -955,6 +852,10 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SET_FPMODE, VT, Expand); } setOperationAction(ISD::RESET_FPMODE, MVT::Other, Expand); + + // This one by default will call __clear_cache unless the target + // wants something different. + setOperationAction(ISD::CLEAR_CACHE, MVT::Other, LibCall); } MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, @@ -962,13 +863,12 @@ MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, return MVT::getIntegerVT(DL.getPointerSizeInBits(0)); } -EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL, - bool LegalTypes) const { +EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, + const DataLayout &DL) const { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; - MVT ShiftVT = - LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL); + MVT ShiftVT = getScalarShiftAmountTy(DL, LHSTy); // If any possible shift value won't fit in the prefered type, just use // something safe. Assume it will be legalized when the shift is expanded. if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits())) @@ -996,6 +896,24 @@ bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS, return TM.isNoopAddrSpaceCast(SrcAS, DestAS); } +unsigned TargetLoweringBase::getBitWidthForCttzElements( + Type *RetTy, ElementCount EC, bool ZeroIsPoison, + const ConstantRange *VScaleRange) const { + // Find the smallest "sensible" element type to use for the expansion. + ConstantRange CR(APInt(64, EC.getKnownMinValue())); + if (EC.isScalable()) + CR = CR.umul_sat(*VScaleRange); + + if (ZeroIsPoison) + CR = CR.subtract(APInt(64, 1)); + + unsigned EltWidth = RetTy->getScalarSizeInBits(); + EltWidth = std::min(EltWidth, (unsigned)CR.getActiveBits()); + EltWidth = std::max(llvm::bit_ceil(EltWidth), (unsigned)8); + + return EltWidth; +} + void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { // If the command-line option was specified, ignore this request. if (!JumpIsExpensiveOverride.getNumOccurrences()) @@ -1332,9 +1250,6 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, /// this allows us to compute derived properties we expose. void TargetLoweringBase::computeRegisterProperties( const TargetRegisterInfo *TRI) { - static_assert(MVT::VALUETYPE_SIZE <= MVT::MAX_ALLOWED_VALUETYPE, - "Too many value types for ValueTypeActions to hold!"); - // Everything defaults to needing one register. for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { NumRegistersForVT[i] = 1; @@ -1430,15 +1345,20 @@ void TargetLoweringBase::computeRegisterProperties( // conversions). if (!isTypeLegal(MVT::f16)) { // Allow targets to control how we legalize half. - if (softPromoteHalfType()) { + bool SoftPromoteHalfType = softPromoteHalfType(); + bool UseFPRegsForHalfType = !SoftPromoteHalfType || useFPRegsForHalfType(); + + if (!UseFPRegsForHalfType) { NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; - TransformToType[MVT::f16] = MVT::f32; - ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf); } else { NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; - TransformToType[MVT::f16] = MVT::f32; + } + TransformToType[MVT::f16] = MVT::f32; + if (SoftPromoteHalfType) { + ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf); + } else { ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); } } @@ -1733,15 +1653,8 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, else if (attr.hasRetAttr(Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - MVT MinVT = TLI.getRegisterType(MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) + VT = TLI.getTypeForExtReturn(ReturnType->getContext(), VT, ExtendKind); unsigned NumParts = TLI.getNumRegistersForCallingConv(ReturnType->getContext(), CC, VT); @@ -1759,8 +1672,16 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, else if (attr.hasRetAttr(Attribute::ZExt)) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0)); + for (unsigned i = 0; i < NumParts; ++i) { + ISD::ArgFlagsTy OutFlags = Flags; + if (NumParts > 1 && i == 0) + OutFlags.setSplit(); + else if (i == NumParts - 1 && i != 0) + OutFlags.setSplitEnd(); + + Outs.push_back( + ISD::OutputArg(OutFlags, PartVT, VT, /*isfixed=*/true, 0, 0)); + } } } @@ -1967,6 +1888,10 @@ bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, // The default implementation of this implements a conservative RISCy, r+r and // r+i addr mode. + // Scalable offsets not supported + if (AM.ScalableOffset) + return false; + // Allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) return false; @@ -2025,7 +1950,8 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const { // FreeBSD has "__stack_chk_guard" defined externally on libc.so if (M.getDirectAccessExternalData() && !TM.getTargetTriple().isWindowsGNUEnvironment() && - !(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD()) && + !(TM.getTargetTriple().isPPC64() && + TM.getTargetTriple().isOSFreeBSD()) && (!TM.getTargetTriple().isOSDarwin() || TM.getRelocationModel() == Reloc::Static)) GV->setDSOLocal(true); @@ -2186,7 +2112,7 @@ static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { if (IsDisabled) RecipType = RecipType.substr(1); - if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) + if (RecipType == VTName || RecipType == VTNameNoSize) return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled : TargetLoweringBase::ReciprocalEstimate::Enabled; } @@ -2236,7 +2162,7 @@ static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { continue; RecipType = RecipType.substr(0, RefPos); - if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) + if (RecipType == VTName || RecipType == VTNameNoSize) return RefSteps; } @@ -2292,7 +2218,7 @@ bool TargetLoweringBase::isLoadBitCastBeneficial( } void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { - MF.getRegInfo().freezeReservedRegs(MF); + MF.getRegInfo().freezeReservedRegs(); } MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags( diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a69b71451736..0d3e4ba5662e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmInfoDarwin.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionCOFF.h" @@ -212,13 +213,11 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, // identify N64 from just a triple. TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - // We don't support PC-relative LSDA references in GAS so we use the default - // DW_EH_PE_absptr for those. // FreeBSD must be explicit about the data size and using pcrel since it's // assembler/linker won't do the automatic conversion that the Linux tools // do. - if (TgtM.getTargetTriple().isOSFreeBSD()) { + if (isPositionIndependent() || TgtM.getTargetTriple().isOSFreeBSD()) { PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; } @@ -479,7 +478,7 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { Name == ".llvmbc" || Name == ".llvmcmd") return SectionKind::getMetadata(); - if (Name.empty() || Name[0] != '.') return K; + if (!Name.starts_with(".")) return K; // Default implementation based on some magic section names. if (Name == ".bss" || Name.starts_with(".bss.") || @@ -525,6 +524,8 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) { if (hasPrefix(Name, ".llvm.offloading")) return ELF::SHT_LLVM_OFFLOADING; + if (Name == ".llvm.lto") + return ELF::SHT_LLVM_LTO; if (K.isBSS() || K.isThreadBSS()) return ELF::SHT_NOBITS; @@ -635,21 +636,22 @@ static SmallString<128> getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind, Mangler &Mang, const TargetMachine &TM, unsigned EntrySize, bool UniqueSectionName) { - SmallString<128> Name; + SmallString<128> Name = + getSectionPrefixForGlobal(Kind, TM.isLargeGlobalValue(GO)); if (Kind.isMergeableCString()) { // We also need alignment here. // FIXME: this is getting the alignment of the character, not the // alignment of the global! - Align Alignment = GO->getParent()->getDataLayout().getPreferredAlign( + Align Alignment = GO->getDataLayout().getPreferredAlign( cast<GlobalVariable>(GO)); - std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; - Name = SizeSpec + utostr(Alignment.value()); + Name += ".str"; + Name += utostr(EntrySize); + Name += "."; + Name += utostr(Alignment.value()); } else if (Kind.isMergeableConst()) { - Name = ".rodata.cst"; + Name += ".cst"; Name += utostr(EntrySize); - } else { - Name = getSectionPrefixForGlobal(Kind, TM.isLargeGlobalValue(GO)); } bool HasPrefix = false; @@ -732,15 +734,20 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName, Ctx.isELFGenericMergeableSection(SectionName); // If this is the first ocurrence of this section name, treat it as the // generic section - if (!SymbolMergeable && !SeenSectionNameBefore) - return MCContext::GenericSectionID; + if (!SymbolMergeable && !SeenSectionNameBefore) { + if (TM.getSeparateNamedSections()) + return NextUniqueID++; + else + return MCContext::GenericSectionID; + } // Symbols must be placed into sections with compatible entry sizes. Generate // unique sections for symbols that have not been assigned to compatible // sections. const auto PreviousID = Ctx.getELFUniqueIDForEntsize(SectionName, Flags, EntrySize); - if (PreviousID) + if (PreviousID && (!TM.getSeparateNamedSections() || + *PreviousID == MCContext::GenericSectionID)) return *PreviousID; // If the user has specified the same section name as would be created @@ -796,10 +803,6 @@ static MCSection *selectExplicitSectionGlobal( SectionName = Attrs.getAttribute("data-section").getValueAsString(); } } - const Function *F = dyn_cast<Function>(GO); - if (F && F->hasFnAttribute("implicit-section-name")) { - SectionName = F->getFnAttribute("implicit-section-name").getValueAsString(); - } // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); @@ -933,7 +936,7 @@ MCSection *TargetLoweringObjectFileELF::getUniqueSectionForFunction( unsigned Flags = getELFSectionFlags(Kind); // If the function's section names is pre-determined via pragma or a // section attribute, call selectExplicitSectionGlobal. - if (F.hasSection() || F.hasFnAttribute("implicit-section-name")) + if (F.hasSection()) return selectExplicitSectionGlobal( &F, Kind, TM, getContext(), getMangler(), NextUniqueID, Used.count(&F), /* ForceUnique = */true); @@ -1034,7 +1037,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock( // name, or a unique ID for the section. SmallString<128> Name; StringRef FunctionSectionName = MBB.getParent()->getSection()->getName(); - if (FunctionSectionName.equals(".text") || + if (FunctionSectionName == ".text" || FunctionSectionName.starts_with(".text.")) { // Function is in a regular .text section. StringRef FunctionName = MBB.getParent()->getName(); @@ -1297,11 +1300,6 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( } } - const Function *F = dyn_cast<Function>(GO); - if (F && F->hasFnAttribute("implicit-section-name")) { - SectionName = F->getFnAttribute("implicit-section-name").getValueAsString(); - } - // Parse the section specifier and create it if valid. StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; @@ -1362,7 +1360,7 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - GO->getParent()->getDataLayout().getPreferredAlign( + GO->getDataLayout().getPreferredAlign( cast<GlobalVariable>(GO)) < Align(32)) return CStringSection; @@ -1370,7 +1368,7 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() && - GO->getParent()->getDataLayout().getPreferredAlign( + GO->getDataLayout().getPreferredAlign( cast<GlobalVariable>(GO)) < Align(32)) return UStringSection; @@ -1558,7 +1556,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo, const MCSection &Section) { - if (!AsmInfo.isSectionAtomizableBySymbols(Section)) + if (!MCAsmInfoDarwin::isSectionAtomizableBySymbols(Section)) return true; // FIXME: we should be able to use private labels for sections that can't be @@ -1699,7 +1697,7 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( } } - return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, + return getContext().getCOFFSection(Name, Characteristics, COMDATSymName, Selection); } @@ -1758,12 +1756,12 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( if (getContext().getTargetTriple().isWindowsGNUEnvironment()) raw_svector_ostream(Name) << '$' << ComdatGV->getName(); - return getContext().getCOFFSection(Name, Characteristics, Kind, - COMDATSymName, Selection, UniqueID); + return getContext().getCOFFSection(Name, Characteristics, COMDATSymName, + Selection, UniqueID); } else { SmallString<256> TmpData; getMangler().getNameWithPrefix(TmpData, GO, /*CannotUsePrivateLabel=*/true); - return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, + return getContext().getCOFFSection(Name, Characteristics, TmpData, Selection, UniqueID); } } @@ -1820,9 +1818,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; unsigned UniqueID = NextUniqueID++; - return getContext().getCOFFSection( - SecName, Characteristics, Kind, COMDATSymName, - COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); + return getContext().getCOFFSection(SecName, Characteristics, COMDATSymName, + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, + UniqueID); } bool TargetLoweringObjectFileCOFF::shouldPutJumpTableInFunctionSection( @@ -1849,10 +1847,8 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, GetObjCImageInfo(M, Version, Flags, Section); if (!Section.empty()) { auto &C = getContext(); - auto *S = C.getCOFFSection(Section, - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + auto *S = C.getCOFFSection(Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ); Streamer.switchSection(S); Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); Streamer.emitInt32(Version); @@ -1932,21 +1928,17 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { StaticCtorSection = Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + COFF::IMAGE_SCN_MEM_READ); StaticDtorSection = Ctx.getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + COFF::IMAGE_SCN_MEM_READ); } else { StaticCtorSection = Ctx.getCOFFSection( ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getData()); + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE); StaticDtorSection = Ctx.getCOFFSection( ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getData()); + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE); } } @@ -1984,8 +1976,7 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, if (AddPrioritySuffix) OS << format("%05u", Priority); MCSectionCOFF *Sec = Ctx.getCOFFSection( - Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ); return Ctx.getAssociativeCOFFSection(Sec, KeySym, 0); } @@ -1996,8 +1987,7 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, return Ctx.getAssociativeCOFFSection( Ctx.getCOFFSection(Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getData()), + COFF::IMAGE_SCN_MEM_WRITE), KeySym, 0); } @@ -2115,7 +2105,7 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant( } if (!COMDATSymName.empty()) - return getContext().getCOFFSection(".rdata", Characteristics, Kind, + return getContext().getCOFFSection(".rdata", Characteristics, COMDATSymName, COFF::IMAGE_COMDAT_SELECT_ANY); } @@ -2141,7 +2131,7 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) { return C; } -static unsigned getWasmSectionFlags(SectionKind K) { +static unsigned getWasmSectionFlags(SectionKind K, bool Retain) { unsigned Flags = 0; if (K.isThreadLocal()) @@ -2150,11 +2140,22 @@ static unsigned getWasmSectionFlags(SectionKind K) { if (K.isMergeableCString()) Flags |= wasm::WASM_SEG_FLAG_STRINGS; + if (Retain) + Flags |= wasm::WASM_SEG_FLAG_RETAIN; + // TODO(sbc): Add suport for K.isMergeableConst() return Flags; } +void TargetLoweringObjectFileWasm::getModuleMetadata(Module &M) { + SmallVector<GlobalValue *, 4> Vec; + collectUsedGlobalVariables(M, Vec, false); + for (GlobalValue *GV : Vec) + if (auto *GO = dyn_cast<GlobalObject>(GV)) + Used.insert(GO); +} + MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { // We don't support explict section names for functions in the wasm object @@ -2178,16 +2179,18 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( Group = C->getName(); } - unsigned Flags = getWasmSectionFlags(Kind); + unsigned Flags = getWasmSectionFlags(Kind, Used.count(GO)); MCSectionWasm *Section = getContext().getWasmSection( Name, Kind, Flags, Group, MCContext::GenericSectionID); return Section; } -static MCSectionWasm *selectWasmSectionForGlobal( - MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) { +static MCSectionWasm * +selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM, bool EmitUniqueSection, + unsigned *NextUniqueID, bool Retain) { StringRef Group = ""; if (const Comdat *C = getWasmComdat(GO)) { Group = C->getName(); @@ -2212,7 +2215,7 @@ static MCSectionWasm *selectWasmSectionForGlobal( (*NextUniqueID)++; } - unsigned Flags = getWasmSectionFlags(Kind); + unsigned Flags = getWasmSectionFlags(Kind, Retain); return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID); } @@ -2230,9 +2233,11 @@ MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal( else EmitUniqueSection = TM.getDataSections(); EmitUniqueSection |= GO->hasComdat(); + bool Retain = Used.count(GO); + EmitUniqueSection |= Retain; return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM, - EmitUniqueSection, &NextUniqueID); + EmitUniqueSection, &NextUniqueID, Retain); } bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection( @@ -2318,7 +2323,7 @@ bool TargetLoweringObjectFileXCOFF::ShouldSetSSPCanaryBitInTB( MCSymbol * TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) { - MCSymbol *EHInfoSym = MF->getMMI().getContext().getOrCreateSymbol( + MCSymbol *EHInfoSym = MF->getContext().getOrCreateSymbol( "__ehinfo." + Twine(MF->getFunctionNumber())); cast<MCSymbolXCOFF>(EHInfoSym)->setEHInfo(); return EHInfoSym; @@ -2402,6 +2407,15 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference( SmallString<128> Name; getNameWithPrefix(Name, GO, TM); + // AIX TLS local-dynamic does not need the external reference for the + // "_$TLSML" symbol. + if (GO->getThreadLocalMode() == GlobalVariable::LocalDynamicTLSModel && + GO->hasName() && GO->getName() == "_$TLSML") { + return getContext().getXCOFFSection( + Name, SectionKind::getData(), + XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD)); + } + XCOFF::StorageMappingClass SMC = isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA; if (GO->isThreadLocal()) @@ -2424,8 +2438,10 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( if (GVar->hasAttribute("toc-data")) { SmallString<128> Name; getNameWithPrefix(Name, GO, TM); + XCOFF::SymbolType symType = + GO->hasCommonLinkage() ? XCOFF::XTY_CM : XCOFF::XTY_SD; return getContext().getXCOFFSection( - Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TD, XCOFF::XTY_SD), + Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TD, symType), /* MultiSymbolsAllowed*/ true); } @@ -2653,17 +2669,34 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor( MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( const MCSymbol *Sym, const TargetMachine &TM) const { - // Use TE storage-mapping class when large code model is enabled so that - // the chance of needing -bbigtoc is decreased. Also, the toc-entry for - // EH info is never referenced directly using instructions so it can be - // allocated with TE storage-mapping class. + const XCOFF::StorageMappingClass SMC = [](const MCSymbol *Sym, + const TargetMachine &TM) { + const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(Sym); + + // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC, + // otherwise the AIX assembler will complain. + if (XSym->getSymbolTableName() == "_$TLSML") + return XCOFF::XMC_TC; + + // Use large code model toc entries for ehinfo symbols as they are + // never referenced directly. The runtime loads their TOC entry + // addresses from the trace-back table. + if (XSym->isEHInfo()) + return XCOFF::XMC_TE; + + // If the symbol does not have a code model specified use the module value. + if (!XSym->hasPerSymbolCodeModel()) + return TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE + : XCOFF::XMC_TC; + + return XSym->getPerSymbolCodeModel() == MCSymbolXCOFF::CM_Large + ? XCOFF::XMC_TE + : XCOFF::XMC_TC; + }(Sym, TM); + return getContext().getXCOFFSection( cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(), - XCOFF::CsectProperties((TM.getCodeModel() == CodeModel::Large || - cast<MCSymbolXCOFF>(Sym)->isEHInfo()) - ? XCOFF::XMC_TE - : XCOFF::XMC_TC, - XCOFF::XTY_SD)); + XCOFF::CsectProperties(SMC, XCOFF::XTY_SD)); } MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA( @@ -2693,8 +2726,7 @@ MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal( MCSection *TargetLoweringObjectFileGOFF::getSectionForLSDA( const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const { std::string Name = ".gcc_exception_table." + F.getName().str(); - return getContext().getGOFFSection(Name, SectionKind::getData(), nullptr, - nullptr); + return getContext().getGOFFSection(Name, SectionKind::getData(), nullptr, 0); } MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal( @@ -2702,7 +2734,7 @@ MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal( auto *Symbol = TM.getSymbol(GO); if (Kind.isBSS()) return getContext().getGOFFSection(Symbol->getName(), SectionKind::getBSS(), - nullptr, nullptr); + nullptr, 0); return getContext().getObjectFileInfo()->getTextSection(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp index af5d10103f78..5bf1d265092f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetFrameLowering.h" @@ -21,7 +22,7 @@ using namespace llvm; /// DisableFramePointerElim - This returns true if frame pointer elimination /// optimization should be disabled for the given machine function. bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { - // Check to see if the target want to forcably keep frame pointer. + // Check to see if the target want to forcibly keep frame pointer. if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF)) return true; @@ -34,11 +35,27 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { return true; if (FP == "non-leaf") return MF.getFrameInfo().hasCalls(); - if (FP == "none") + if (FP == "none" || FP == "reserved") return false; llvm_unreachable("unknown frame pointer flag"); } +bool TargetOptions::FramePointerIsReserved(const MachineFunction &MF) const { + // Check to see if the target want to forcibly keep frame pointer. + if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF)) + return true; + + const Function &F = MF.getFunction(); + + if (!F.hasFnAttribute("frame-pointer")) + return false; + + StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString(); + return StringSwitch<bool>(FP) + .Cases("all", "non-leaf", "reserved", true) + .Case("none", false); +} + /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume /// that the rounding mode of the FPU can change from its default. bool TargetOptions::HonorSignDependentRoundingFPMath() const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp index 599ca4818904..3658e8320a0c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -205,6 +205,10 @@ static cl::opt<bool> MISchedPostRA( static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); +static cl::opt<bool> DisableReplaceWithVecLib( + "disable-replace-with-vec-lib", cl::Hidden, + cl::desc("Disable replace with vector math call pass")); + /// Option names for limiting the codegen pipeline. /// Those are used in error reporting and we didn't want /// to duplicate their names all over the place. @@ -856,7 +860,7 @@ void TargetPassConfig::addIRPasses() { if (getOptLevel() != CodeGenOptLevel::None && !DisableConstantHoisting) addPass(createConstantHoistingPass()); - if (getOptLevel() != CodeGenOptLevel::None) + if (getOptLevel() != CodeGenOptLevel::None && !DisableReplaceWithVecLib) addPass(createReplaceWithVeclibLegacyPass()); if (getOptLevel() != CodeGenOptLevel::None && !DisablePartialLibcallInlining) @@ -867,6 +871,9 @@ void TargetPassConfig::addIRPasses() { // passes since it emits those kinds of intrinsics. addPass(createExpandVectorPredicationPass()); + // Instrument function entry after all inlining. + addPass(createPostInlineEntryExitInstrumenterPass()); + // Add scalarization of target's unsupported masked memory intrinsics pass. // the unsupported intrinsic will be replaced with a chain of basic blocks, // that stores/loads element one-by-one if the appropriate mask bit is set. @@ -918,7 +925,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // on catchpads and cleanuppads because it does not outline them into // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we // should remove PHIs there. - addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/false)); + addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/true)); addPass(createWasmEHPass()); break; case ExceptionHandling::None: @@ -1221,19 +1228,13 @@ void TargetPassConfig::addMachinePasses() { addPass(createMIRAddFSDiscriminatorsPass( sampleprof::FSDiscriminatorPass::PassLast)); + bool NeedsBBSections = + TM->getBBSectionsType() != llvm::BasicBlockSection::None; // Machine function splitter uses the basic block sections feature. Both - // cannot be enabled at the same time. Basic block sections takes precedence. - // FIXME: In principle, BasicBlockSection::Labels and splitting can used - // together. Update this check once we have addressed any issues. - if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { - if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { - addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( - TM->getBBSectionsFuncListBuf())); - addPass(llvm::createBasicBlockPathCloningPass()); - } - addPass(llvm::createBasicBlockSectionsPass()); - } else if (TM->Options.EnableMachineFunctionSplitter || - EnableMachineFunctionSplitter) { + // cannot be enabled at the same time. We do not apply machine function + // splitter if -basic-block-sections is requested. + if (!NeedsBBSections && (TM->Options.EnableMachineFunctionSplitter || + EnableMachineFunctionSplitter)) { const std::string ProfileFile = getFSProfileFile(TM); if (!ProfileFile.empty()) { if (EnableFSDiscriminator) { @@ -1250,6 +1251,16 @@ void TargetPassConfig::addMachinePasses() { } addPass(createMachineFunctionSplitterPass()); } + // We run the BasicBlockSections pass if either we need BB sections or BB + // address map (or both). + if (NeedsBBSections || TM->Options.BBAddrMap) { + if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { + addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( + TM->getBBSectionsFuncListBuf())); + addPass(llvm::createBasicBlockPathCloningPass()); + } + addPass(llvm::createBasicBlockSectionsPass()); + } addPostBBSections(); @@ -1423,6 +1434,8 @@ void TargetPassConfig::addFastRegAlloc() { void TargetPassConfig::addOptimizedRegAlloc() { addPass(&DetectDeadLanesID); + addPass(&InitUndefID); + addPass(&ProcessImplicitDefsID); // LiveVariables currently requires pure SSA form. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp index c50b1cf94227..ffc8055dd27e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -21,11 +21,11 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -50,20 +50,16 @@ static cl::opt<unsigned> "high compile time cost in global splitting."), cl::init(5000)); -TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, - regclass_iterator RCB, regclass_iterator RCE, - const char *const *SRINames, - const LaneBitmask *SRILaneMasks, - LaneBitmask SRICoveringLanes, - const RegClassInfo *const RCIs, - const MVT::SimpleValueType *const RCVTLists, - unsigned Mode) - : InfoDesc(ID), SubRegIndexNames(SRINames), - SubRegIndexLaneMasks(SRILaneMasks), - RegClassBegin(RCB), RegClassEnd(RCE), - CoveringLanes(SRICoveringLanes), - RCInfos(RCIs), RCVTLists(RCVTLists), HwMode(Mode) { -} +TargetRegisterInfo::TargetRegisterInfo( + const TargetRegisterInfoDesc *ID, regclass_iterator RCB, + regclass_iterator RCE, const char *const *SRINames, + const SubRegCoveredBits *SubIdxRanges, const LaneBitmask *SRILaneMasks, + LaneBitmask SRICoveringLanes, const RegClassInfo *const RCIs, + const MVT::SimpleValueType *const RCVTLists, unsigned Mode) + : InfoDesc(ID), SubRegIndexNames(SRINames), SubRegIdxRanges(SubIdxRanges), + SubRegIndexLaneMasks(SRILaneMasks), RegClassBegin(RCB), RegClassEnd(RCE), + CoveringLanes(SRICoveringLanes), RCInfos(RCIs), RCVTLists(RCVTLists), + HwMode(Mode) {} TargetRegisterInfo::~TargetRegisterInfo() = default; @@ -478,16 +474,11 @@ bool TargetRegisterInfo::isCalleeSavedPhysReg( } bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { - return !MF.getFunction().hasFnAttribute("no-realign-stack"); + return MF.getFrameInfo().isStackRealignable(); } bool TargetRegisterInfo::shouldRealignStack(const MachineFunction &MF) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - const Function &F = MF.getFunction(); - return F.hasFnAttribute("stackrealign") || - (MFI.getMaxAlign() > TFI->getStackAlign()) || - F.hasFnAttribute(Attribute::StackAlignment); + return MF.getFrameInfo().shouldRealignStack(); } bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, @@ -596,6 +587,18 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes( return BestIdx; } +unsigned TargetRegisterInfo::getSubRegIdxSize(unsigned Idx) const { + assert(Idx && Idx < getNumSubRegIndices() && + "This is not a subregister index"); + return SubRegIdxRanges[HwMode * getNumSubRegIndices() + Idx].Size; +} + +unsigned TargetRegisterInfo::getSubRegIdxOffset(unsigned Idx) const { + assert(Idx && Idx < getNumSubRegIndices() && + "This is not a subregister index"); + return SubRegIdxRanges[HwMode * getNumSubRegIndices() + Idx].Offset; +} + Register TargetRegisterInfo::lookThruCopyLike(Register SrcReg, const MachineRegisterInfo *MRI) const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 74d7904aee33..665d57841a97 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -26,6 +26,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TwoAddressInstructionPass.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -36,10 +37,12 @@ #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -86,7 +89,7 @@ static cl::opt<unsigned> MaxDataFlowEdge( namespace { -class TwoAddressInstructionPass : public MachineFunctionPass { +class TwoAddressInstructionImpl { MachineFunction *MF = nullptr; const TargetInstrInfo *TII = nullptr; const TargetRegisterInfo *TRI = nullptr; @@ -186,43 +189,113 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool processStatepoint(MachineInstr *MI, TiedOperandMap &TiedOperands); public: + TwoAddressInstructionImpl(MachineFunction &MF, MachineFunctionPass *P); + TwoAddressInstructionImpl(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + void setOptLevel(CodeGenOptLevel Level) { OptLevel = Level; } + bool run(); +}; + +class TwoAddressInstructionLegacyPass : public MachineFunctionPass { +public: static char ID; // Pass identification, replacement for typeid - TwoAddressInstructionPass() : MachineFunctionPass(ID) { - initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + TwoAddressInstructionLegacyPass() : MachineFunctionPass(ID) { + initializeTwoAddressInstructionLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + /// Pass entry point. + bool runOnMachineFunction(MachineFunction &MF) override { + TwoAddressInstructionImpl Impl(MF, this); + // Disable optimizations if requested. We cannot skip the whole pass as some + // fixups are necessary for correctness. + if (skipFunction(MF.getFunction())) + Impl.setOptLevel(CodeGenOptLevel::None); + return Impl.run(); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addUsedIfAvailable<AAResultsWrapperPass>(); - AU.addUsedIfAvailable<LiveVariables>(); - AU.addPreserved<LiveVariables>(); - AU.addPreserved<SlotIndexes>(); - AU.addPreserved<LiveIntervals>(); + AU.addUsedIfAvailable<LiveVariablesWrapperPass>(); + AU.addPreserved<LiveVariablesWrapperPass>(); + AU.addPreserved<SlotIndexesWrapperPass>(); + AU.addPreserved<LiveIntervalsWrapperPass>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } - - /// Pass entry point. - bool runOnMachineFunction(MachineFunction&) override; }; } // end anonymous namespace -char TwoAddressInstructionPass::ID = 0; +PreservedAnalyses +TwoAddressInstructionPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + // Disable optimizations if requested. We cannot skip the whole pass as some + // fixups are necessary for correctness. + TwoAddressInstructionImpl Impl(MF, MFAM); + if (MF.getFunction().hasOptNone()) + Impl.setOptLevel(CodeGenOptLevel::None); + + MFPropsModifier _(*this, MF); + bool Changed = Impl.run(); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserve<LiveIntervalsAnalysis>(); + PA.preserve<LiveVariablesAnalysis>(); + PA.preserve<MachineDominatorTreeAnalysis>(); + PA.preserve<MachineLoopAnalysis>(); + PA.preserve<SlotIndexesAnalysis>(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +char TwoAddressInstructionLegacyPass::ID = 0; -char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; +char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionLegacyPass::ID; -INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE, - "Two-Address instruction pass", false, false) +INITIALIZE_PASS_BEGIN(TwoAddressInstructionLegacyPass, DEBUG_TYPE, + "Two-Address instruction pass", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE, - "Two-Address instruction pass", false, false) +INITIALIZE_PASS_END(TwoAddressInstructionLegacyPass, DEBUG_TYPE, + "Two-Address instruction pass", false, false) + +TwoAddressInstructionImpl::TwoAddressInstructionImpl( + MachineFunction &Func, MachineFunctionAnalysisManager &MFAM) + : MF(&Func), TII(Func.getSubtarget().getInstrInfo()), + TRI(Func.getSubtarget().getRegisterInfo()), + InstrItins(Func.getSubtarget().getInstrItineraryData()), + MRI(&Func.getRegInfo()), + LV(MFAM.getCachedResult<LiveVariablesAnalysis>(Func)), + LIS(MFAM.getCachedResult<LiveIntervalsAnalysis>(Func)), + OptLevel(Func.getTarget().getOptLevel()) { + auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(Func) + .getManager(); + AA = FAM.getCachedResult<AAManager>(Func.getFunction()); +} + +TwoAddressInstructionImpl::TwoAddressInstructionImpl(MachineFunction &Func, + MachineFunctionPass *P) + : MF(&Func), TII(Func.getSubtarget().getInstrInfo()), + TRI(Func.getSubtarget().getRegisterInfo()), + InstrItins(Func.getSubtarget().getInstrItineraryData()), + MRI(&Func.getRegInfo()), OptLevel(Func.getTarget().getOptLevel()) { + auto *LVWrapper = P->getAnalysisIfAvailable<LiveVariablesWrapperPass>(); + LV = LVWrapper ? &LVWrapper->getLV() : nullptr; + auto *LISWrapper = P->getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); + LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; + if (auto *AAPass = P->getAnalysisIfAvailable<AAResultsWrapperPass>()) + AA = &AAPass->getAAResults(); + else + AA = nullptr; +} /// Return the MachineInstr* if it is the single def of the Reg in current BB. MachineInstr * -TwoAddressInstructionPass::getSingleDef(Register Reg, +TwoAddressInstructionImpl::getSingleDef(Register Reg, MachineBasicBlock *BB) const { MachineInstr *Ret = nullptr; for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { @@ -243,7 +316,7 @@ TwoAddressInstructionPass::getSingleDef(Register Reg, /// %Tmp2 = copy %ToReg; /// MaxLen specifies the maximum length of the copy chain the func /// can walk through. -bool TwoAddressInstructionPass::isRevCopyChain(Register FromReg, Register ToReg, +bool TwoAddressInstructionImpl::isRevCopyChain(Register FromReg, Register ToReg, int Maxlen) { Register TmpReg = FromReg; for (int i = 0; i < Maxlen; i++) { @@ -263,7 +336,7 @@ bool TwoAddressInstructionPass::isRevCopyChain(Register FromReg, Register ToReg, /// in the MBB that defines the specified register and the two-address /// instruction which is being processed. It also returns the last def location /// by reference. -bool TwoAddressInstructionPass::noUseAfterLastDef(Register Reg, unsigned Dist, +bool TwoAddressInstructionImpl::noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef) { LastDef = 0; unsigned LastUse = Dist; @@ -286,7 +359,7 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(Register Reg, unsigned Dist, /// Return true if the specified MI is a copy instruction or an extract_subreg /// instruction. It also returns the source and destination registers and /// whether they are physical registers by reference. -bool TwoAddressInstructionPass::isCopyToReg(MachineInstr &MI, Register &SrcReg, +bool TwoAddressInstructionImpl::isCopyToReg(MachineInstr &MI, Register &SrcReg, Register &DstReg, bool &IsSrcPhys, bool &IsDstPhys) const { SrcReg = 0; @@ -306,7 +379,7 @@ bool TwoAddressInstructionPass::isCopyToReg(MachineInstr &MI, Register &SrcReg, return true; } -bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI, +bool TwoAddressInstructionImpl::isPlainlyKilled(const MachineInstr *MI, LiveRange &LR) const { // This is to match the kill flag version where undefs don't have kill flags. if (!LR.hasAtLeastOneValue()) @@ -320,7 +393,7 @@ bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI, /// Test if the given register value, which is used by the /// given instruction, is killed by the given instruction. -bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI, +bool TwoAddressInstructionImpl::isPlainlyKilled(const MachineInstr *MI, Register Reg) const { // FIXME: Sometimes tryInstructionTransform() will add instructions and // test whether they can be folded before keeping them. In this case it @@ -339,12 +412,12 @@ bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI, }); } - return MI->killsRegister(Reg); + return MI->killsRegister(Reg, /*TRI=*/nullptr); } /// Test if the register used by the given operand is killed by the operand's /// instruction. -bool TwoAddressInstructionPass::isPlainlyKilled( +bool TwoAddressInstructionImpl::isPlainlyKilled( const MachineOperand &MO) const { return MO.isKill() || isPlainlyKilled(MO.getParent(), MO.getReg()); } @@ -366,7 +439,7 @@ bool TwoAddressInstructionPass::isPlainlyKilled( /// /// If allowFalsePositives is true then likely kills are treated as kills even /// if it can't be proven that they are kills. -bool TwoAddressInstructionPass::isKilled(MachineInstr &MI, Register Reg, +bool TwoAddressInstructionImpl::isKilled(MachineInstr &MI, Register Reg, bool allowFalsePositives) const { MachineInstr *DefMI = &MI; while (true) { @@ -411,7 +484,7 @@ static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) { /// Given a register, if all its uses are in the same basic block, return the /// last use instruction if it's a copy or a two-address use. -MachineInstr *TwoAddressInstructionPass::findOnlyInterestingUse( +MachineInstr *TwoAddressInstructionImpl::findOnlyInterestingUse( Register Reg, MachineBasicBlock *MBB, bool &IsCopy, Register &DstReg, bool &IsDstPhys) const { MachineOperand *UseOp = nullptr; @@ -468,7 +541,7 @@ static MCRegister getMappedReg(Register Reg, } /// Return true if the two registers are equal or aliased. -bool TwoAddressInstructionPass::regsAreCompatible(Register RegA, +bool TwoAddressInstructionImpl::regsAreCompatible(Register RegA, Register RegB) const { if (RegA == RegB) return true; @@ -478,7 +551,7 @@ bool TwoAddressInstructionPass::regsAreCompatible(Register RegA, } /// From RegMap remove entries mapped to a physical register which overlaps MO. -void TwoAddressInstructionPass::removeMapRegEntry( +void TwoAddressInstructionImpl::removeMapRegEntry( const MachineOperand &MO, DenseMap<Register, Register> &RegMap) const { assert( (MO.isReg() || MO.isRegMask()) && @@ -510,7 +583,7 @@ void TwoAddressInstructionPass::removeMapRegEntry( /// /// After the MUL instruction, $rdx contains different value than in the COPY /// instruction. So %2 should not map to $rdx after MUL. -void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) { +void TwoAddressInstructionImpl::removeClobberedSrcRegMap(MachineInstr *MI) { if (MI->isCopy()) { // If a virtual register is copied to its mapped physical register, it // doesn't change the potential coalescing between them, so we don't remove @@ -546,7 +619,7 @@ void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) { } // Returns true if Reg is equal or aliased to at least one register in Set. -bool TwoAddressInstructionPass::regOverlapsSet( +bool TwoAddressInstructionImpl::regOverlapsSet( const SmallVectorImpl<Register> &Set, Register Reg) const { for (unsigned R : Set) if (TRI->regsOverlap(R, Reg)) @@ -557,7 +630,7 @@ bool TwoAddressInstructionPass::regOverlapsSet( /// Return true if it's potentially profitable to commute the two-address /// instruction that's being processed. -bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA, +bool TwoAddressInstructionImpl::isProfitableToCommute(Register RegA, Register RegB, Register RegC, MachineInstr *MI, @@ -662,7 +735,7 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA, /// Commute a two-address instruction and update the basic block, distance map, /// and live variables if needed. Return true if it is successful. -bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, +bool TwoAddressInstructionImpl::commuteInstruction(MachineInstr *MI, unsigned DstIdx, unsigned RegBIdx, unsigned RegCIdx, @@ -693,7 +766,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, /// Return true if it is profitable to convert the given 2-address instruction /// to a 3-address one. -bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA, +bool TwoAddressInstructionImpl::isProfitableToConv3Addr(Register RegA, Register RegB) { // Look for situations like this: // %reg1024 = MOV r1 @@ -710,7 +783,7 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA, /// Convert the specified two-address instruction into a three address one. /// Return true if this transformation was successful. -bool TwoAddressInstructionPass::convertInstTo3Addr( +bool TwoAddressInstructionImpl::convertInstTo3Addr( MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, Register RegA, Register RegB, unsigned &Dist) { MachineInstrSpan MIS(mi, MBB); @@ -752,7 +825,7 @@ bool TwoAddressInstructionPass::convertInstTo3Addr( /// Scan forward recursively for only uses, update maps if the use is a copy or /// a two-address instruction. -void TwoAddressInstructionPass::scanUses(Register DstReg) { +void TwoAddressInstructionImpl::scanUses(Register DstReg) { SmallVector<Register, 4> VirtRegPairs; bool IsDstPhys; bool IsCopy = false; @@ -805,7 +878,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) { /// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is /// potentially joined with r1 on the output side. It's worthwhile to commute /// 'add' to eliminate a copy. -void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { +void TwoAddressInstructionImpl::processCopy(MachineInstr *MI) { if (Processed.count(MI)) return; @@ -831,7 +904,7 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { /// If there is one more local instruction that reads 'Reg' and it kills 'Reg, /// consider moving the instruction below the kill instruction in order to /// eliminate the need for the copy. -bool TwoAddressInstructionPass::rescheduleMIBelowKill( +bool TwoAddressInstructionImpl::rescheduleMIBelowKill( MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, Register Reg) { // Bail immediately if we don't have LV or LIS available. We use them to find @@ -998,7 +1071,7 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill( /// Return true if the re-scheduling will put the given instruction too close /// to the defs of its register dependencies. -bool TwoAddressInstructionPass::isDefTooClose(Register Reg, unsigned Dist, +bool TwoAddressInstructionImpl::isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI) { for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike()) @@ -1019,7 +1092,7 @@ bool TwoAddressInstructionPass::isDefTooClose(Register Reg, unsigned Dist, /// If there is one more local instruction that reads 'Reg' and it kills 'Reg, /// consider moving the kill instruction above the current two-address /// instruction in order to eliminate the need for the copy. -bool TwoAddressInstructionPass::rescheduleKillAboveMI( +bool TwoAddressInstructionImpl::rescheduleKillAboveMI( MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, Register Reg) { // Bail immediately if we don't have LV or LIS available. We use them to find @@ -1171,7 +1244,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI( /// to commute operands in the instruction. /// /// Returns true if the transformation happened. Otherwise, returns false. -bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, +bool TwoAddressInstructionImpl::tryInstructionCommute(MachineInstr *MI, unsigned DstOpIdx, unsigned BaseOpIdx, bool BaseOpKilled, @@ -1236,11 +1309,9 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, /// (either because they were untied, or because mi was rescheduled, and will /// be visited again later). If the shouldOnlyCommute flag is true, only /// instruction commutation is attempted. -bool TwoAddressInstructionPass:: -tryInstructionTransform(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned SrcIdx, unsigned DstIdx, - unsigned &Dist, bool shouldOnlyCommute) { +bool TwoAddressInstructionImpl::tryInstructionTransform( + MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, + unsigned SrcIdx, unsigned DstIdx, unsigned &Dist, bool shouldOnlyCommute) { if (OptLevel == CodeGenOptLevel::None) return false; @@ -1355,8 +1426,10 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, << "2addr: NEW INST: " << *NewMIs[1]); // Transform the instruction, now that it no longer has a load. - unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); - unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + unsigned NewDstIdx = + NewMIs[1]->findRegisterDefOperandIdx(regA, /*TRI=*/nullptr); + unsigned NewSrcIdx = + NewMIs[1]->findRegisterUseOperandIdx(regB, /*TRI=*/nullptr); MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformResult = tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true); @@ -1371,19 +1444,22 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (MO.isReg() && MO.getReg().isVirtual()) { if (MO.isUse()) { if (MO.isKill()) { - if (NewMIs[0]->killsRegister(MO.getReg())) + if (NewMIs[0]->killsRegister(MO.getReg(), /*TRI=*/nullptr)) LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[0]); else { - assert(NewMIs[1]->killsRegister(MO.getReg()) && + assert(NewMIs[1]->killsRegister(MO.getReg(), + /*TRI=*/nullptr) && "Kill missing after load unfold!"); LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[1]); } } } else if (LV->removeVirtualRegisterDead(MO.getReg(), MI)) { - if (NewMIs[1]->registerDefIsDead(MO.getReg())) + if (NewMIs[1]->registerDefIsDead(MO.getReg(), + /*TRI=*/nullptr)) LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[1]); else { - assert(NewMIs[0]->registerDefIsDead(MO.getReg()) && + assert(NewMIs[0]->registerDefIsDead(MO.getReg(), + /*TRI=*/nullptr) && "Dead flag missing after load unfold!"); LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[0]); } @@ -1435,8 +1511,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // Collect tied operands of MI that need to be handled. // Rewrite trivial cases immediately. // Return true if any tied operands where found, including the trivial ones. -bool TwoAddressInstructionPass:: -collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { +bool TwoAddressInstructionImpl::collectTiedOperands( + MachineInstr *MI, TiedOperandMap &TiedOperands) { bool AnyOps = false; unsigned NumOps = MI->getNumOperands(); @@ -1474,10 +1550,9 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { // Process a list of tied MI operands that all use the same source register. // The tied pairs are of the form (SrcIdx, DstIdx). -void -TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, - TiedPairList &TiedPairs, - unsigned &Dist) { +void TwoAddressInstructionImpl::processTiedPairs(MachineInstr *MI, + TiedPairList &TiedPairs, + unsigned &Dist) { bool IsEarlyClobber = llvm::any_of(TiedPairs, [MI](auto const &TP) { return MI->getOperand(TP.second).isEarlyClobber(); }); @@ -1663,7 +1738,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // and replaces all uses of RegA with RegB. // No extra COPY instruction is necessary because tied use is killed at // STATEPOINT. -bool TwoAddressInstructionPass::processStatepoint( +bool TwoAddressInstructionImpl::processStatepoint( MachineInstr *MI, TiedOperandMap &TiedOperands) { bool NeedCopy = false; @@ -1750,25 +1825,7 @@ bool TwoAddressInstructionPass::processStatepoint( } /// Reduce two-address instructions to two operands. -bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { - MF = &Func; - const TargetMachine &TM = MF->getTarget(); - MRI = &MF->getRegInfo(); - TII = MF->getSubtarget().getInstrInfo(); - TRI = MF->getSubtarget().getRegisterInfo(); - InstrItins = MF->getSubtarget().getInstrItineraryData(); - LV = getAnalysisIfAvailable<LiveVariables>(); - LIS = getAnalysisIfAvailable<LiveIntervals>(); - if (auto *AAPass = getAnalysisIfAvailable<AAResultsWrapperPass>()) - AA = &AAPass->getAAResults(); - else - AA = nullptr; - OptLevel = TM.getOptLevel(); - // Disable optimizations if requested. We cannot skip the whole pass as some - // fixups are necessary for correctness. - if (skipFunction(Func.getFunction())) - OptLevel = CodeGenOptLevel::None; - +bool TwoAddressInstructionImpl::run() { bool MadeChange = false; LLVM_DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); @@ -1923,27 +1980,33 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { /// /// undef %dst:ssub0 = COPY %v1 /// %dst:ssub1 = COPY %v2 -void TwoAddressInstructionPass:: -eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { +void TwoAddressInstructionImpl::eliminateRegSequence( + MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; Register DstReg = MI.getOperand(0).getReg(); SmallVector<Register, 4> OrigRegs; + VNInfo *DefVN = nullptr; if (LIS) { OrigRegs.push_back(MI.getOperand(0).getReg()); for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) OrigRegs.push_back(MI.getOperand(i).getReg()); + if (LIS->hasInterval(DstReg)) { + DefVN = LIS->getInterval(DstReg) + .Query(LIS->getInstructionIndex(MI)) + .valueOut(); + } } + LaneBitmask UndefLanes = LaneBitmask::getNone(); bool DefEmitted = false; - bool DefIsPartial = false; for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) { MachineOperand &UseMO = MI.getOperand(i); Register SrcReg = UseMO.getReg(); unsigned SubIdx = MI.getOperand(i+1).getImm(); // Nothing needs to be inserted for undef operands. if (UseMO.isUndef()) { - DefIsPartial = true; + UndefLanes |= TRI->getSubRegIndexLaneMask(SubIdx); continue; } @@ -1991,11 +2054,25 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MI.removeOperand(j); } else { if (LIS) { - // Force interval recomputation if we moved from full definition - // of register to partial. - if (DefIsPartial && LIS->hasInterval(DstReg) && - MRI->shouldTrackSubRegLiveness(DstReg)) + // Force live interval recomputation if we moved to a partial definition + // of the register. Undef flags must be propagate to uses of undefined + // subregister for accurate interval computation. + if (UndefLanes.any() && DefVN && MRI->shouldTrackSubRegLiveness(DstReg)) { + auto &LI = LIS->getInterval(DstReg); + for (MachineOperand &UseOp : MRI->use_operands(DstReg)) { + unsigned SubReg = UseOp.getSubReg(); + if (UseOp.isUndef() || !SubReg) + continue; + auto *VN = + LI.getVNInfoAt(LIS->getInstructionIndex(*UseOp.getParent())); + if (DefVN != VN) + continue; + LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg); + if ((UndefLanes & LaneMask).any()) + UseOp.setIsUndef(true); + } LIS->removeInterval(DstReg); + } LIS->RemoveMachineInstrFromMaps(MI); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp index 053caf518bd1..0940759ddc42 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp @@ -136,6 +136,7 @@ public: class TypePromotionImpl { unsigned TypeSize = 0; + const TargetLowering *TLI = nullptr; LLVMContext *Ctx = nullptr; unsigned RegisterBitWidth = 0; SmallPtrSet<Value *, 16> AllVisited; @@ -272,64 +273,58 @@ bool TypePromotionImpl::isSink(Value *V) { /// Return whether this instruction can safely wrap. bool TypePromotionImpl::isSafeWrap(Instruction *I) { - // We can support a potentially wrapping instruction (I) if: + // We can support a potentially wrapping Add/Sub instruction (I) if: // - It is only used by an unsigned icmp. // - The icmp uses a constant. - // - The wrapping value (I) is decreasing, i.e would underflow - wrapping - // around zero to become a larger number than before. // - The wrapping instruction (I) also uses a constant. // - // We can then use the two constants to calculate whether the result would - // wrap in respect to itself in the original bitwidth. If it doesn't wrap, - // just underflows the range, the icmp would give the same result whether the - // result has been truncated or not. We calculate this by: - // - Zero extending both constants, if needed, to RegisterBitWidth. - // - Take the absolute value of I's constant, adding this to the icmp const. - // - Check that this value is not out of range for small type. If it is, it - // means that it has underflowed enough to wrap around the icmp constant. + // This a common pattern emitted to check if a value is within a range. // // For example: // - // %sub = sub i8 %a, 2 - // %cmp = icmp ule i8 %sub, 254 + // %sub = sub i8 %a, C1 + // %cmp = icmp ule i8 %sub, C2 + // + // or + // + // %add = add i8 %a, C1 + // %cmp = icmp ule i8 %add, C2. + // + // We will treat an add as though it were a subtract by -C1. To promote + // the Add/Sub we will zero extend the LHS and the subtracted amount. For Add, + // this means we need to negate the constant, zero extend to RegisterBitWidth, + // and negate in the larger type. // - // If %a = 0, %sub = -2 == FE == 254 - // But if this is evalulated as a i32 - // %sub = -2 == FF FF FF FE == 4294967294 - // So the unsigned compares (i8 and i32) would not yield the same result. + // This will produce a value in the range [-zext(C1), zext(X)-zext(C1)] where + // C1 is the subtracted amount. This is either a small unsigned number or a + // large unsigned number in the promoted type. // - // Another way to look at it is: - // %a - 2 <= 254 - // %a + 2 <= 254 + 2 - // %a <= 256 - // And we can't represent 256 in the i8 format, so we don't support it. + // Now we need to correct the compare constant C2. Values >= C1 in the + // original add result range have been remapped to large values in the + // promoted range. If the compare constant fell into this range we need to + // remap it as well. We can do this as -(zext(-C2)). // - // Whereas: + // For example: // - // %sub i8 %a, 1 + // %sub = sub i8 %a, 2 // %cmp = icmp ule i8 %sub, 254 // - // If %a = 0, %sub = -1 == FF == 255 - // As i32: - // %sub = -1 == FF FF FF FF == 4294967295 + // becomes // - // In this case, the unsigned compare results would be the same and this - // would also be true for ult, uge and ugt: - // - (255 < 254) == (0xFFFFFFFF < 254) == false - // - (255 <= 254) == (0xFFFFFFFF <= 254) == false - // - (255 > 254) == (0xFFFFFFFF > 254) == true - // - (255 >= 254) == (0xFFFFFFFF >= 254) == true + // %zext = zext %a to i32 + // %sub = sub i32 %zext, 2 + // %cmp = icmp ule i32 %sub, 4294967294 // - // To demonstrate why we can't handle increasing values: + // Another example: // - // %add = add i8 %a, 2 - // %cmp = icmp ult i8 %add, 127 + // %sub = sub i8 %a, 1 + // %cmp = icmp ule i8 %sub, 254 // - // If %a = 254, %add = 256 == (i8 1) - // As i32: - // %add = 256 + // becomes // - // (1 < 127) != (256 < 127) + // %zext = zext %a to i32 + // %sub = sub i32 %zext, 1 + // %cmp = icmp ule i32 %sub, 254 unsigned Opc = I->getOpcode(); if (Opc != Instruction::Add && Opc != Instruction::Sub) @@ -356,25 +351,32 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) { APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue(); if (Opc == Instruction::Sub) OverflowConst = -OverflowConst; - if (!OverflowConst.isNonPositive()) - return false; - // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that: - // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2 - // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2 - if (OverflowConst.sgt(ICmpConst)) { - LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext " + // If the constant is positive, we will end up filling the promoted bits with + // all 1s. Make sure that results in a cheap add constant. + if (!OverflowConst.isNonPositive()) { + // We don't have the true promoted width, just use 64 so we can create an + // int64_t for the isLegalAddImmediate call. + if (OverflowConst.getBitWidth() >= 64) + return false; + + APInt NewConst = -((-OverflowConst).zext(64)); + if (!TLI->isLegalAddImmediate(NewConst.getSExtValue())) + return false; + } + + SafeWrap.insert(I); + + if (OverflowConst == 0 || OverflowConst.ugt(ICmpConst)) { + LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for " << "const of " << *I << "\n"); - SafeWrap.insert(I); - return true; - } else { - LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext " - << "const of " << *I << " and " << *CI << "\n"); - SafeWrap.insert(I); - SafeWrap.insert(CI); return true; } - return false; + + LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for " + << "const of " << *I << " and " << *CI << "\n"); + SafeWrap.insert(CI); + return true; } bool TypePromotionImpl::shouldPromote(Value *V) { @@ -488,18 +490,24 @@ void IRPromoter::PromoteTree() { continue; if (auto *Const = dyn_cast<ConstantInt>(Op)) { - // For subtract, we don't need to sext the constant. We only put it in + // For subtract, we only need to zext the constant. We only put it in // SafeWrap because SafeWrap.size() is used elsewhere. - // For cmp, we need to sign extend a constant appearing in either - // operand. For add, we should only sign extend the RHS. - Constant *NewConst = - ConstantInt::get(Const->getContext(), - (SafeWrap.contains(I) && - (I->getOpcode() == Instruction::ICmp || i == 1) && - I->getOpcode() != Instruction::Sub) - ? Const->getValue().sext(PromotedWidth) - : Const->getValue().zext(PromotedWidth)); - I->setOperand(i, NewConst); + // For Add and ICmp we need to find how far the constant is from the + // top of its original unsigned range and place it the same distance + // from the top of its new unsigned range. We can do this by negating + // the constant, zero extending it, then negating in the new type. + APInt NewConst; + if (SafeWrap.contains(I)) { + if (I->getOpcode() == Instruction::ICmp) + NewConst = -((-Const->getValue()).zext(PromotedWidth)); + else if (I->getOpcode() == Instruction::Add && i == 1) + NewConst = -((-Const->getValue()).zext(PromotedWidth)); + else + NewConst = Const->getValue().zext(PromotedWidth); + } else + NewConst = Const->getValue().zext(PromotedWidth); + + I->setOperand(i, ConstantInt::get(Const->getContext(), NewConst)); } else if (isa<UndefValue>(Op)) I->setOperand(i, ConstantInt::get(ExtTy, 0)); } @@ -635,7 +643,7 @@ void IRPromoter::ConvertTruncs() { ConstantInt *Mask = ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask); - if (SrcTy != ExtTy) + if (SrcTy->getBitWidth() > ExtTy->getBitWidth()) Masked = Builder.CreateTrunc(Masked, ExtTy); if (auto *I = dyn_cast<Instruction>(Masked)) @@ -916,12 +924,12 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM, SafeToPromote.clear(); SafeWrap.clear(); bool MadeChange = false; - const DataLayout &DL = F.getParent()->getDataLayout(); + const DataLayout &DL = F.getDataLayout(); const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F); - const TargetLowering *TLI = SubtargetInfo->getTargetLowering(); + TLI = SubtargetInfo->getTargetLowering(); RegisterBitWidth = TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue(); - Ctx = &F.getParent()->getContext(); + Ctx = &F.getContext(); // Return the preferred integer width of the instruction, or zero if we // shouldn't try. @@ -937,6 +945,8 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM, return 0; EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT); + if (TLI->isSExtCheaperThanZExt(SrcVT, PromotedVT)) + return 0; if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) { LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " << "for promoted type\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp index 1a60e9abbe2e..8194f3ca5610 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -89,8 +89,8 @@ INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination", char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID; void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -98,8 +98,12 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { df_iterator_default_set<MachineBasicBlock*> Reachable; bool ModifiedPHI = false; - MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); - MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + MachineDominatorTreeWrapperPass *MDTWrapper = + getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); + MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; + MachineLoopInfoWrapperPass *MLIWrapper = + getAnalysisIfAvailable<MachineLoopInfoWrapperPass>(); + MachineLoopInfo *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr; // Mark all reachable blocks. for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index fc1cbfefb0db..0cddf59d0ca2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -130,12 +130,12 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { // Now see if there are no other dependencies to instructions already // in the packet. if (IsTop) { - for (unsigned i = 0, e = Packet.size(); i != e; ++i) - if (hasDependence(Packet[i], SU)) + for (const SUnit *U : Packet) + if (hasDependence(U, SU)) return false; } else { - for (unsigned i = 0, e = Packet.size(); i != e; ++i) - if (hasDependence(SU, Packet[i])) + for (const SUnit *U : Packet) + if (hasDependence(SU, U)) return false; } return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp index ba3b9e00e34e..b0f736a49c20 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp @@ -181,6 +181,7 @@ std::string EVT::getEVTString() const { case MVT::Metadata: return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::funcref: return "funcref"; + case MVT::exnref: return "exnref"; case MVT::externref: return "externref"; case MVT::aarch64svcount: return "aarch64svcount"; @@ -206,21 +207,6 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { assert(isExtended() && "Type is not extended!"); return LLVMTy; case MVT::isVoid: return Type::getVoidTy(Context); - case MVT::i1: return Type::getInt1Ty(Context); - case MVT::i2: return Type::getIntNTy(Context, 2); - case MVT::i4: return Type::getIntNTy(Context, 4); - case MVT::i8: return Type::getInt8Ty(Context); - case MVT::i16: return Type::getInt16Ty(Context); - case MVT::i32: return Type::getInt32Ty(Context); - case MVT::i64: return Type::getInt64Ty(Context); - case MVT::i128: return IntegerType::get(Context, 128); - case MVT::f16: return Type::getHalfTy(Context); - case MVT::bf16: return Type::getBFloatTy(Context); - case MVT::f32: return Type::getFloatTy(Context); - case MVT::f64: return Type::getDoubleTy(Context); - case MVT::f80: return Type::getX86_FP80Ty(Context); - case MVT::f128: return Type::getFP128Ty(Context); - case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); case MVT::aarch64svcount: return TargetExtType::get(Context, "aarch64.svcount"); @@ -228,356 +214,19 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::i64x8: return IntegerType::get(Context, 512); case MVT::externref: return Type::getWasm_ExternrefTy(Context); case MVT::funcref: return Type::getWasm_FuncrefTy(Context); - case MVT::v1i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 1); - case MVT::v2i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 2); - case MVT::v4i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 4); - case MVT::v8i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 8); - case MVT::v16i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 16); - case MVT::v32i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 32); - case MVT::v64i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 64); - case MVT::v128i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 128); - case MVT::v256i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 256); - case MVT::v512i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 512); - case MVT::v1024i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 1024); - case MVT::v2048i1: - return FixedVectorType::get(Type::getInt1Ty(Context), 2048); - case MVT::v128i2: - return FixedVectorType::get(Type::getIntNTy(Context, 2), 128); - case MVT::v256i2: - return FixedVectorType::get(Type::getIntNTy(Context, 2), 256); - case MVT::v64i4: - return FixedVectorType::get(Type::getIntNTy(Context, 4), 64); - case MVT::v128i4: - return FixedVectorType::get(Type::getIntNTy(Context, 4), 128); - case MVT::v1i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 1); - case MVT::v2i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 2); - case MVT::v4i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 4); - case MVT::v8i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 8); - case MVT::v16i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 16); - case MVT::v32i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 32); - case MVT::v64i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 64); - case MVT::v128i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 128); - case MVT::v256i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 256); - case MVT::v512i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 512); - case MVT::v1024i8: - return FixedVectorType::get(Type::getInt8Ty(Context), 1024); - case MVT::v1i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 1); - case MVT::v2i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 2); - case MVT::v3i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 3); - case MVT::v4i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 4); - case MVT::v8i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 8); - case MVT::v16i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 16); - case MVT::v32i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 32); - case MVT::v64i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 64); - case MVT::v128i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 128); - case MVT::v256i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 256); - case MVT::v512i16: - return FixedVectorType::get(Type::getInt16Ty(Context), 512); - case MVT::v1i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 1); - case MVT::v2i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 2); - case MVT::v3i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 3); - case MVT::v4i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 4); - case MVT::v5i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 5); - case MVT::v6i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 6); - case MVT::v7i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 7); - case MVT::v8i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 8); - case MVT::v9i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 9); - case MVT::v10i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 10); - case MVT::v11i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 11); - case MVT::v12i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 12); - case MVT::v16i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 16); - case MVT::v32i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 32); - case MVT::v64i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 64); - case MVT::v128i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 128); - case MVT::v256i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 256); - case MVT::v512i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 512); - case MVT::v1024i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 1024); - case MVT::v2048i32: - return FixedVectorType::get(Type::getInt32Ty(Context), 2048); - case MVT::v1i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 1); - case MVT::v2i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 2); - case MVT::v3i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 3); - case MVT::v4i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 4); - case MVT::v8i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 8); - case MVT::v16i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 16); - case MVT::v32i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 32); - case MVT::v64i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 64); - case MVT::v128i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 128); - case MVT::v256i64: - return FixedVectorType::get(Type::getInt64Ty(Context), 256); - case MVT::v1i128: - return FixedVectorType::get(Type::getInt128Ty(Context), 1); - case MVT::v1f16: - return FixedVectorType::get(Type::getHalfTy(Context), 1); - case MVT::v2f16: - return FixedVectorType::get(Type::getHalfTy(Context), 2); - case MVT::v3f16: - return FixedVectorType::get(Type::getHalfTy(Context), 3); - case MVT::v4f16: - return FixedVectorType::get(Type::getHalfTy(Context), 4); - case MVT::v8f16: - return FixedVectorType::get(Type::getHalfTy(Context), 8); - case MVT::v16f16: - return FixedVectorType::get(Type::getHalfTy(Context), 16); - case MVT::v32f16: - return FixedVectorType::get(Type::getHalfTy(Context), 32); - case MVT::v64f16: - return FixedVectorType::get(Type::getHalfTy(Context), 64); - case MVT::v128f16: - return FixedVectorType::get(Type::getHalfTy(Context), 128); - case MVT::v256f16: - return FixedVectorType::get(Type::getHalfTy(Context), 256); - case MVT::v512f16: - return FixedVectorType::get(Type::getHalfTy(Context), 512); - case MVT::v2bf16: - return FixedVectorType::get(Type::getBFloatTy(Context), 2); - case MVT::v3bf16: - return FixedVectorType::get(Type::getBFloatTy(Context), 3); - case MVT::v4bf16: - return FixedVectorType::get(Type::getBFloatTy(Context), 4); - case MVT::v8bf16: - return FixedVectorType::get(Type::getBFloatTy(Context), 8); - case MVT::v16bf16: - return FixedVectorType::get(Type::getBFloatTy(Context), 16); - case MVT::v32bf16: - return FixedVectorType::get(Type::getBFloatTy(Context), 32); - case MVT::v64bf16: - return FixedVectorType::get(Type::getBFloatTy(Context), 64); - case MVT::v128bf16: - return FixedVectorType::get(Type::getBFloatTy(Context), 128); - case MVT::v1f32: - return FixedVectorType::get(Type::getFloatTy(Context), 1); - case MVT::v2f32: - return FixedVectorType::get(Type::getFloatTy(Context), 2); - case MVT::v3f32: - return FixedVectorType::get(Type::getFloatTy(Context), 3); - case MVT::v4f32: - return FixedVectorType::get(Type::getFloatTy(Context), 4); - case MVT::v5f32: - return FixedVectorType::get(Type::getFloatTy(Context), 5); - case MVT::v6f32: - return FixedVectorType::get(Type::getFloatTy(Context), 6); - case MVT::v7f32: - return FixedVectorType::get(Type::getFloatTy(Context), 7); - case MVT::v8f32: - return FixedVectorType::get(Type::getFloatTy(Context), 8); - case MVT::v9f32: - return FixedVectorType::get(Type::getFloatTy(Context), 9); - case MVT::v10f32: - return FixedVectorType::get(Type::getFloatTy(Context), 10); - case MVT::v11f32: - return FixedVectorType::get(Type::getFloatTy(Context), 11); - case MVT::v12f32: - return FixedVectorType::get(Type::getFloatTy(Context), 12); - case MVT::v16f32: - return FixedVectorType::get(Type::getFloatTy(Context), 16); - case MVT::v32f32: - return FixedVectorType::get(Type::getFloatTy(Context), 32); - case MVT::v64f32: - return FixedVectorType::get(Type::getFloatTy(Context), 64); - case MVT::v128f32: - return FixedVectorType::get(Type::getFloatTy(Context), 128); - case MVT::v256f32: - return FixedVectorType::get(Type::getFloatTy(Context), 256); - case MVT::v512f32: - return FixedVectorType::get(Type::getFloatTy(Context), 512); - case MVT::v1024f32: - return FixedVectorType::get(Type::getFloatTy(Context), 1024); - case MVT::v2048f32: - return FixedVectorType::get(Type::getFloatTy(Context), 2048); - case MVT::v1f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 1); - case MVT::v2f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 2); - case MVT::v3f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 3); - case MVT::v4f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 4); - case MVT::v8f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 8); - case MVT::v16f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 16); - case MVT::v32f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 32); - case MVT::v64f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 64); - case MVT::v128f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 128); - case MVT::v256f64: - return FixedVectorType::get(Type::getDoubleTy(Context), 256); - case MVT::nxv1i1: - return ScalableVectorType::get(Type::getInt1Ty(Context), 1); - case MVT::nxv2i1: - return ScalableVectorType::get(Type::getInt1Ty(Context), 2); - case MVT::nxv4i1: - return ScalableVectorType::get(Type::getInt1Ty(Context), 4); - case MVT::nxv8i1: - return ScalableVectorType::get(Type::getInt1Ty(Context), 8); - case MVT::nxv16i1: - return ScalableVectorType::get(Type::getInt1Ty(Context), 16); - case MVT::nxv32i1: - return ScalableVectorType::get(Type::getInt1Ty(Context), 32); - case MVT::nxv64i1: - return ScalableVectorType::get(Type::getInt1Ty(Context), 64); - case MVT::nxv1i8: - return ScalableVectorType::get(Type::getInt8Ty(Context), 1); - case MVT::nxv2i8: - return ScalableVectorType::get(Type::getInt8Ty(Context), 2); - case MVT::nxv4i8: - return ScalableVectorType::get(Type::getInt8Ty(Context), 4); - case MVT::nxv8i8: - return ScalableVectorType::get(Type::getInt8Ty(Context), 8); - case MVT::nxv16i8: - return ScalableVectorType::get(Type::getInt8Ty(Context), 16); - case MVT::nxv32i8: - return ScalableVectorType::get(Type::getInt8Ty(Context), 32); - case MVT::nxv64i8: - return ScalableVectorType::get(Type::getInt8Ty(Context), 64); - case MVT::nxv1i16: - return ScalableVectorType::get(Type::getInt16Ty(Context), 1); - case MVT::nxv2i16: - return ScalableVectorType::get(Type::getInt16Ty(Context), 2); - case MVT::nxv4i16: - return ScalableVectorType::get(Type::getInt16Ty(Context), 4); - case MVT::nxv8i16: - return ScalableVectorType::get(Type::getInt16Ty(Context), 8); - case MVT::nxv16i16: - return ScalableVectorType::get(Type::getInt16Ty(Context), 16); - case MVT::nxv32i16: - return ScalableVectorType::get(Type::getInt16Ty(Context), 32); - case MVT::nxv1i32: - return ScalableVectorType::get(Type::getInt32Ty(Context), 1); - case MVT::nxv2i32: - return ScalableVectorType::get(Type::getInt32Ty(Context), 2); - case MVT::nxv4i32: - return ScalableVectorType::get(Type::getInt32Ty(Context), 4); - case MVT::nxv8i32: - return ScalableVectorType::get(Type::getInt32Ty(Context), 8); - case MVT::nxv16i32: - return ScalableVectorType::get(Type::getInt32Ty(Context), 16); - case MVT::nxv32i32: - return ScalableVectorType::get(Type::getInt32Ty(Context), 32); - case MVT::nxv1i64: - return ScalableVectorType::get(Type::getInt64Ty(Context), 1); - case MVT::nxv2i64: - return ScalableVectorType::get(Type::getInt64Ty(Context), 2); - case MVT::nxv4i64: - return ScalableVectorType::get(Type::getInt64Ty(Context), 4); - case MVT::nxv8i64: - return ScalableVectorType::get(Type::getInt64Ty(Context), 8); - case MVT::nxv16i64: - return ScalableVectorType::get(Type::getInt64Ty(Context), 16); - case MVT::nxv32i64: - return ScalableVectorType::get(Type::getInt64Ty(Context), 32); - case MVT::nxv1f16: - return ScalableVectorType::get(Type::getHalfTy(Context), 1); - case MVT::nxv2f16: - return ScalableVectorType::get(Type::getHalfTy(Context), 2); - case MVT::nxv4f16: - return ScalableVectorType::get(Type::getHalfTy(Context), 4); - case MVT::nxv8f16: - return ScalableVectorType::get(Type::getHalfTy(Context), 8); - case MVT::nxv16f16: - return ScalableVectorType::get(Type::getHalfTy(Context), 16); - case MVT::nxv32f16: - return ScalableVectorType::get(Type::getHalfTy(Context), 32); - case MVT::nxv1bf16: - return ScalableVectorType::get(Type::getBFloatTy(Context), 1); - case MVT::nxv2bf16: - return ScalableVectorType::get(Type::getBFloatTy(Context), 2); - case MVT::nxv4bf16: - return ScalableVectorType::get(Type::getBFloatTy(Context), 4); - case MVT::nxv8bf16: - return ScalableVectorType::get(Type::getBFloatTy(Context), 8); - case MVT::nxv16bf16: - return ScalableVectorType::get(Type::getBFloatTy(Context), 16); - case MVT::nxv32bf16: - return ScalableVectorType::get(Type::getBFloatTy(Context), 32); - case MVT::nxv1f32: - return ScalableVectorType::get(Type::getFloatTy(Context), 1); - case MVT::nxv2f32: - return ScalableVectorType::get(Type::getFloatTy(Context), 2); - case MVT::nxv4f32: - return ScalableVectorType::get(Type::getFloatTy(Context), 4); - case MVT::nxv8f32: - return ScalableVectorType::get(Type::getFloatTy(Context), 8); - case MVT::nxv16f32: - return ScalableVectorType::get(Type::getFloatTy(Context), 16); - case MVT::nxv1f64: - return ScalableVectorType::get(Type::getDoubleTy(Context), 1); - case MVT::nxv2f64: - return ScalableVectorType::get(Type::getDoubleTy(Context), 2); - case MVT::nxv4f64: - return ScalableVectorType::get(Type::getDoubleTy(Context), 4); - case MVT::nxv8f64: - return ScalableVectorType::get(Type::getDoubleTy(Context), 8); case MVT::Metadata: return Type::getMetadataTy(Context); +#define GET_VT_EVT(Ty, EVT) case MVT::Ty: return EVT; +#include "llvm/CodeGen/GenVT.inc" +#undef GET_VT_EVT } // clang-format on } -/// Return the value type corresponding to the specified type. This returns all -/// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned -/// as Other, otherwise they are invalid. +/// Return the value type corresponding to the specified type. +/// If HandleUnknown is true, unknown types are returned as Other, otherwise +/// they are invalid. +/// NB: This includes pointer types, which require a DataLayout to convert +/// to a concrete value type. MVT MVT::getVT(Type *Ty, bool HandleUnknown){ assert(Ty != nullptr && "Invalid type"); switch (Ty->getTypeID()) { @@ -607,7 +256,6 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ case Type::X86_AMXTyID: return MVT(MVT::x86amx); case Type::FP128TyID: return MVT(MVT::f128); case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); - case Type::PointerTyID: return MVT(MVT::iPTR); case Type::FixedVectorTyID: case Type::ScalableVectorTyID: { VectorType *VTy = cast<VectorType>(Ty); @@ -618,13 +266,17 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ } } -/// getEVT - Return the value type corresponding to the specified type. This -/// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types -/// are returned as Other, otherwise they are invalid. +/// getEVT - Return the value type corresponding to the specified type. +/// If HandleUnknown is true, unknown types are returned as Other, otherwise +/// they are invalid. +/// NB: This includes pointer types, which require a DataLayout to convert +/// to a concrete value type. EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ switch (Ty->getTypeID()) { default: return MVT::getVT(Ty, HandleUnknown); + case Type::TokenTyID: + return MVT::Untyped; case Type::IntegerTyID: return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth()); case Type::FixedVectorTyID: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp index 48f4ee29fbe9..4acc4f845291 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp @@ -16,9 +16,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/VirtRegMap.h" -#include "LiveDebugVariables.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveStacks.h" @@ -228,8 +228,8 @@ char &llvm::VirtRegRewriterID = VirtRegRewriter::ID; INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", "Virtual Register Rewriter", false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) @@ -238,10 +238,10 @@ INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervalsWrapperPass>(); + AU.addPreserved<LiveIntervalsWrapperPass>(); + AU.addRequired<SlotIndexesWrapperPass>(); + AU.addPreserved<SlotIndexesWrapperPass>(); AU.addRequired<LiveDebugVariables>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); @@ -258,8 +258,8 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); MRI = &MF->getRegInfo(); - Indexes = &getAnalysis<SlotIndexes>(); - LIS = &getAnalysis<LiveIntervals>(); + Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI(); + LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); VRM = &getAnalysis<VirtRegMap>(); DebugVars = &getAnalysis<LiveDebugVariables>(); LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp index 1a9e1ba869c3..7514d49fb18a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -84,6 +84,7 @@ #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsWebAssembly.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -252,12 +253,11 @@ bool WasmEHPrepareImpl::prepareEHPads(Function &F) { M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); - LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0, - "lpad_index_gep"); - LSDAField = - IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 1, "lsda_gep"); - SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2, - "selector_gep"); + LPadIndexField = LPadContextGV; + LSDAField = IRB.CreateConstInBoundsGEP2_32(LPadContextTy, LPadContextGV, 0, 1, + "lsda_gep"); + SelectorField = IRB.CreateConstInBoundsGEP2_32(LPadContextTy, LPadContextGV, + 0, 2, "selector_gep"); // wasm.landingpad.index() intrinsic, which is to specify landingpad index LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp index 95976c218c2f..c58c67b70fe3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -138,7 +139,7 @@ bool WinEHPrepareImpl::runOnFunction(Function &Fn) { if (!isScopedEHPersonality(Personality)) return false; - DL = &Fn.getParent()->getDataLayout(); + DL = &Fn.getDataLayout(); return prepareExplicitEH(Fn); } @@ -1234,10 +1235,10 @@ AllocaInst *WinEHPrepareImpl::insertPHILoads(PHINode *PN, Function &F) { // that will dominate all uses. SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr, Twine(PN->getName(), ".wineh.spillslot"), - &F.getEntryBlock().front()); + F.getEntryBlock().begin()); Value *V = new LoadInst(PN->getType(), SpillSlot, Twine(PN->getName(), ".wineh.reload"), - &*PHIBlock->getFirstInsertionPt()); + PHIBlock->getFirstInsertionPt()); PN->replaceAllUsesWith(V); return SpillSlot; } @@ -1309,7 +1310,7 @@ void WinEHPrepareImpl::insertPHIStore( } // Otherwise, insert the store at the end of the basic block. - new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator()); + new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator()->getIterator()); } void WinEHPrepareImpl::replaceUseWithLoad( @@ -1319,7 +1320,7 @@ void WinEHPrepareImpl::replaceUseWithLoad( if (!SpillSlot) SpillSlot = new AllocaInst(V->getType(), DL->getAllocaAddrSpace(), nullptr, Twine(V->getName(), ".wineh.spillslot"), - &F.getEntryBlock().front()); + F.getEntryBlock().begin()); auto *UsingInst = cast<Instruction>(U.getUser()); if (auto *UsingPHI = dyn_cast<PHINode>(UsingInst)) { @@ -1376,16 +1377,16 @@ void WinEHPrepareImpl::replaceUseWithLoad( Value *&Load = Loads[IncomingBlock]; // Insert the load into the predecessor block if (!Load) - Load = new LoadInst(V->getType(), SpillSlot, - Twine(V->getName(), ".wineh.reload"), - /*isVolatile=*/false, IncomingBlock->getTerminator()); + Load = new LoadInst( + V->getType(), SpillSlot, Twine(V->getName(), ".wineh.reload"), + /*isVolatile=*/false, IncomingBlock->getTerminator()->getIterator()); U.set(Load); } else { // Reload right before the old use. auto *Load = new LoadInst(V->getType(), SpillSlot, Twine(V->getName(), ".wineh.reload"), - /*isVolatile=*/false, UsingInst); + /*isVolatile=*/false, UsingInst->getIterator()); U.set(Load); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp new file mode 100644 index 000000000000..0777480499e5 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp @@ -0,0 +1,702 @@ +//======----------- WindowScheduler.cpp - window scheduler -------------======// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// An implementation of the Window Scheduling software pipelining algorithm. +// +// The fundamental concept of the window scheduling algorithm involves folding +// the original MBB at a specific position, followed by list scheduling on the +// folded MIs. The optimal scheduling result is then chosen from various folding +// positions as the final scheduling outcome. +// +// The primary challenge in this algorithm lies in generating the folded MIs and +// establishing their dependencies. We have innovatively employed a new MBB, +// created by copying the original MBB three times, known as TripleMBB. This +// TripleMBB enables the convenient implementation of MI folding and dependency +// establishment. To facilitate the algorithm's implementation, we have also +// devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle. +// +// Another challenge in the algorithm is the scheduling of phis. Semantically, +// it is difficult to place the phis in the window and perform list scheduling. +// Therefore, we schedule these phis separately after each list scheduling. +// +// The provided implementation is designed for use before the Register Allocator +// (RA). If the target requires implementation after RA, it is recommended to +// reimplement analyseII(), schedulePhi(), and expand(). Additionally, +// target-specific logic can be added in initialize(), preProcess(), and +// postProcess(). +// +// Lastly, it is worth mentioning that getSearchIndexes() is an important +// function. We have experimented with more complex heuristics on downstream +// target and achieved favorable results. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/WindowScheduler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachinePipeliner.h" +#include "llvm/CodeGen/ModuloSchedule.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TimeProfiler.h" + +using namespace llvm; + +#define DEBUG_TYPE "pipeliner" + +namespace { +STATISTIC(NumTryWindowSchedule, + "Number of loops that we attempt to use window scheduling"); +STATISTIC(NumTryWindowSearch, + "Number of times that we run list schedule in the window scheduling"); +STATISTIC(NumWindowSchedule, + "Number of loops that we successfully use window scheduling"); +STATISTIC(NumFailAnalyseII, + "Window scheduling abort due to the failure of the II analysis"); + +cl::opt<unsigned> + WindowSearchNum("window-search-num", + cl::desc("The number of searches per loop in the window " + "algorithm. 0 means no search number limit."), + cl::Hidden, cl::init(6)); + +cl::opt<unsigned> WindowSearchRatio( + "window-search-ratio", + cl::desc("The ratio of searches per loop in the window algorithm. 100 " + "means search all positions in the loop, while 0 means not " + "performing any search."), + cl::Hidden, cl::init(40)); + +cl::opt<unsigned> WindowIICoeff( + "window-ii-coeff", + cl::desc( + "The coefficient used when initializing II in the window algorithm."), + cl::Hidden, cl::init(5)); + +cl::opt<unsigned> WindowRegionLimit( + "window-region-limit", + cl::desc( + "The lower limit of the scheduling region in the window algorithm."), + cl::Hidden, cl::init(3)); + +cl::opt<unsigned> WindowDiffLimit( + "window-diff-limit", + cl::desc("The lower limit of the difference between best II and base II in " + "the window algorithm. If the difference is smaller than " + "this lower limit, window scheduling will not be performed."), + cl::Hidden, cl::init(2)); +} // namespace + +// WindowIILimit serves as an indicator of abnormal scheduling results and could +// potentially be referenced by the derived target window scheduler. +cl::opt<unsigned> + WindowIILimit("window-ii-limit", + cl::desc("The upper limit of II in the window algorithm."), + cl::Hidden, cl::init(1000)); + +WindowScheduler::WindowScheduler(MachineSchedContext *C, MachineLoop &ML) + : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML), + Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()), + TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) { + TripleDAG = std::unique_ptr<ScheduleDAGInstrs>( + createMachineScheduler(/*OnlyBuildGraph=*/true)); +} + +bool WindowScheduler::run() { + if (!initialize()) { + LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n"); + return false; + } + // The window algorithm is time-consuming, and its compilation time should be + // taken into consideration. + TimeTraceScope Scope("WindowSearch"); + ++NumTryWindowSchedule; + // Performing the relevant processing before window scheduling. + preProcess(); + // The main window scheduling begins. + std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler()); + auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio); + for (unsigned Idx : SearchIndexes) { + OriToCycle.clear(); + ++NumTryWindowSearch; + // The scheduling starts with non-phi instruction, so SchedPhiNum needs to + // be added to Idx. + unsigned Offset = Idx + SchedPhiNum; + auto Range = getScheduleRange(Offset, SchedInstrNum); + SchedDAG->startBlock(MBB); + SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum); + SchedDAG->schedule(); + LLVM_DEBUG(SchedDAG->dump()); + unsigned II = analyseII(*SchedDAG, Offset); + if (II == WindowIILimit) { + restoreTripleMBB(); + LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n"); + ++NumFailAnalyseII; + continue; + } + schedulePhi(Offset, II); + updateScheduleResult(Offset, II); + restoreTripleMBB(); + LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is " + << II << ".\n"); + } + // Performing the relevant processing after window scheduling. + postProcess(); + // Check whether the scheduling result is valid. + if (!isScheduleValid()) { + LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n"); + return false; + } + LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset + << " and Best II is " << BestII << ".\n"); + // Expand the scheduling result to prologue, kernel, and epilogue. + expand(); + ++NumWindowSchedule; + return true; +} + +ScheduleDAGInstrs * +WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) { + return OnlyBuildGraph + ? new ScheduleDAGMI( + Context, std::make_unique<PostGenericScheduler>(Context), + true) + : Context->PassConfig->createMachineScheduler(Context); +} + +bool WindowScheduler::initialize() { + if (!Subtarget->enableWindowScheduler()) { + LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n"); + return false; + } + // Initialized the member variables used by window algorithm. + OriMIs.clear(); + TriMIs.clear(); + TriToOri.clear(); + OriToCycle.clear(); + SchedResult.clear(); + SchedPhiNum = 0; + SchedInstrNum = 0; + BestII = UINT_MAX; + BestOffset = 0; + BaseII = 0; + // List scheduling used in the window algorithm depends on LiveIntervals. + if (!Context->LIS) { + LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n"); + return false; + } + // Check each MI in MBB. + SmallSet<Register, 8> PrevDefs; + SmallSet<Register, 8> PrevUses; + auto IsLoopCarried = [&](MachineInstr &Phi) { + // Two cases are checked here: (1)The virtual register defined by the + // preceding phi is used by the succeeding phi;(2)The preceding phi uses the + // virtual register defined by the succeeding phi. + if (PrevUses.count(Phi.getOperand(0).getReg())) + return true; + PrevDefs.insert(Phi.getOperand(0).getReg()); + for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) { + if (PrevDefs.count(Phi.getOperand(I).getReg())) + return true; + PrevUses.insert(Phi.getOperand(I).getReg()); + } + return false; + }; + auto PLI = TII->analyzeLoopForPipelining(MBB); + for (auto &MI : *MBB) { + if (MI.isMetaInstruction() || MI.isTerminator()) + continue; + if (MI.isPHI()) { + if (IsLoopCarried(MI)) { + LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n"); + return false; + } + ++SchedPhiNum; + ++BestOffset; + } else + ++SchedInstrNum; + if (TII->isSchedulingBoundary(MI, MBB, *MF)) { + LLVM_DEBUG( + dbgs() << "Boundary MI is not allowed in window scheduling!\n"); + return false; + } + if (PLI->shouldIgnoreForPipelining(&MI)) { + LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in " + "window scheduling!\n"); + return false; + } + for (auto &Def : MI.all_defs()) + if (Def.isReg() && Def.getReg().isPhysical()) + return false; + } + if (SchedInstrNum <= WindowRegionLimit) { + LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n"); + return false; + } + return true; +} + +void WindowScheduler::preProcess() { + // Prior to window scheduling, it's necessary to backup the original MBB, + // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB. + backupMBB(); + generateTripleMBB(); + TripleDAG->startBlock(MBB); + TripleDAG->enterRegion( + MBB, MBB->begin(), MBB->getFirstTerminator(), + std::distance(MBB->begin(), MBB->getFirstTerminator())); + TripleDAG->buildSchedGraph(Context->AA); +} + +void WindowScheduler::postProcess() { + // After window scheduling, it's necessary to clear the TripleDAG and restore + // to the original MBB. + TripleDAG->exitRegion(); + TripleDAG->finishBlock(); + restoreMBB(); +} + +void WindowScheduler::backupMBB() { + for (auto &MI : MBB->instrs()) + OriMIs.push_back(&MI); + // Remove MIs and the corresponding live intervals. + for (auto &MI : make_early_inc_range(*MBB)) { + Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true); + MBB->remove(&MI); + } +} + +void WindowScheduler::restoreMBB() { + // Erase MIs and the corresponding live intervals. + for (auto &MI : make_early_inc_range(*MBB)) { + Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true); + MI.eraseFromParent(); + } + // Restore MBB to the state before window scheduling. + for (auto *MI : OriMIs) + MBB->push_back(MI); + updateLiveIntervals(); +} + +void WindowScheduler::generateTripleMBB() { + const unsigned DuplicateNum = 3; + TriMIs.clear(); + TriToOri.clear(); + assert(OriMIs.size() > 0 && "The Original MIs were not backed up!"); + // Step 1: Performing the first copy of MBB instructions, excluding + // terminators. At the same time, we back up the anti-register of phis. + // DefPairs hold the old and new define register pairs. + DenseMap<Register, Register> DefPairs; + for (auto *MI : OriMIs) { + if (MI->isMetaInstruction() || MI->isTerminator()) + continue; + if (MI->isPHI()) + if (Register AntiReg = getAntiRegister(MI)) + DefPairs[MI->getOperand(0).getReg()] = AntiReg; + auto *NewMI = MF->CloneMachineInstr(MI); + MBB->push_back(NewMI); + TriMIs.push_back(NewMI); + TriToOri[NewMI] = MI; + } + // Step 2: Performing the remaining two copies of MBB instructions excluding + // phis, and the last one contains terminators. At the same time, registers + // are updated accordingly. + for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) { + for (auto *MI : OriMIs) { + if (MI->isPHI() || MI->isMetaInstruction() || + (MI->isTerminator() && Cnt < DuplicateNum - 1)) + continue; + auto *NewMI = MF->CloneMachineInstr(MI); + DenseMap<Register, Register> NewDefs; + // New defines are updated. + for (auto MO : NewMI->all_defs()) + if (MO.isReg() && MO.getReg().isVirtual()) { + Register NewDef = + MRI->createVirtualRegister(MRI->getRegClass(MO.getReg())); + NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI); + NewDefs[MO.getReg()] = NewDef; + } + // New uses are updated. + for (auto DefRegPair : DefPairs) + if (NewMI->readsRegister(DefRegPair.first, TRI)) { + Register NewUse = DefRegPair.second; + // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3': + // + // BB.3: DefPairs + // ================================== + // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] (%1,%7) + // ... + // ================================== + // ... + // %4 = sub i32 %1, %3 + // ... + // %7 = add i32 %5, %6 + // ... + // ---------------------------------- + // ... + // %8 = sub i32 %7, %3 (%1,%7),(%4,%8) + // ... + // %9 = add i32 %5, %6 (%1,%7),(%4,%8),(%7,%9) + // ... + // ---------------------------------- + // ... + // %10 = sub i32 %9, %3 (%1,%7),(%4,%10),(%7,%9) + // ... ^ + // %11 = add i32 %5, %6 (%1,%7),(%4,%10),(%7,%11) + // ... + // ================================== + // < Terminators > + // ================================== + if (DefPairs.count(NewUse)) + NewUse = DefPairs[NewUse]; + NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI); + } + // DefPairs is updated at last. + for (auto &NewDef : NewDefs) + DefPairs[NewDef.first] = NewDef.second; + MBB->push_back(NewMI); + TriMIs.push_back(NewMI); + TriToOri[NewMI] = MI; + } + } + // Step 3: The registers used by phis are updated, and they are generated in + // the third copy of MBB. + // In the privious example, the old phi is: + // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] + // The new phi is: + // %1 = phi i32 [%2, %BB.1], [%11, %BB.3] + for (auto &Phi : MBB->phis()) { + for (auto DefRegPair : DefPairs) + if (Phi.readsRegister(DefRegPair.first, TRI)) + Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI); + } + updateLiveIntervals(); +} + +void WindowScheduler::restoreTripleMBB() { + // After list scheduling, the MBB is restored in one traversal. + for (size_t I = 0; I < TriMIs.size(); ++I) { + auto *MI = TriMIs[I]; + auto OldPos = MBB->begin(); + std::advance(OldPos, I); + auto CurPos = MI->getIterator(); + if (CurPos != OldPos) { + MBB->splice(OldPos, MBB, CurPos); + Context->LIS->handleMove(*MI, /*UpdateFlags=*/false); + } + } +} + +SmallVector<unsigned> WindowScheduler::getSearchIndexes(unsigned SearchNum, + unsigned SearchRatio) { + // We use SearchRatio to get the index range, and then evenly get the indexes + // according to the SearchNum. This is a simple huristic. Depending on the + // characteristics of the target, more complex algorithms can be used for both + // performance and compilation time. + assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!"); + unsigned MaxIdx = SchedInstrNum * SearchRatio / 100; + unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1; + SmallVector<unsigned> SearchIndexes; + for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step) + SearchIndexes.push_back(Idx); + return SearchIndexes; +} + +int WindowScheduler::getEstimatedII(ScheduleDAGInstrs &DAG) { + // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1. + unsigned MaxDepth = 1; + for (auto &SU : DAG.SUnits) + MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth); + return MaxDepth * WindowIICoeff; +} + +int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, + unsigned Offset) { + int InitII = getEstimatedII(DAG); + ResourceManager RM(Subtarget, &DAG); + RM.init(InitII); + // ResourceManager and DAG are used to calculate the maximum cycle for the + // scheduled MIs. Since MIs in the Region have already been scheduled, the + // emit cycles can be estimated in order here. + int CurCycle = 0; + auto Range = getScheduleRange(Offset, SchedInstrNum); + for (auto &MI : Range) { + auto *SU = DAG.getSUnit(&MI); + int ExpectCycle = CurCycle; + // The predecessors of current MI determine its earliest issue cycle. + for (auto &Pred : SU->Preds) { + if (Pred.isWeak()) + continue; + auto *PredMI = Pred.getSUnit()->getInstr(); + int PredCycle = getOriCycle(PredMI); + ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency()); + } + // ResourceManager can be used to detect resource conflicts between the + // current MI and the previously inserted MIs. + while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { + ++CurCycle; + if (CurCycle == (int)WindowIILimit) + return CurCycle; + } + RM.reserveResources(*SU, CurCycle); + OriToCycle[getOriMI(&MI)] = CurCycle; + LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S." + << getOriStage(getOriMI(&MI), Offset) << "]: " << MI); + } + LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n"); + return CurCycle; +} + +// By utilizing TripleDAG, we can easily establish dependencies between A and B. +// Based on the MaxCycle and the issue cycle of A and B, we can determine +// whether it is necessary to add a stall cycle. This is because, without +// inserting the stall cycle, the latency constraint between A and B cannot be +// satisfied. The details are as follows: +// +// New MBB: +// ======================================== +// < Phis > +// ======================================== (sliding direction) +// MBB copy 1 | +// V +// +// ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~ ----schedule window----- +// | | +// ===================V==================== | +// MBB copy 2 < MI B > | +// | +// < MI A > V +// ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~ ------------------------ +// : +// ===================V==================== +// MBB copy 3 < MI B'> +// +// +// +// +// ======================================== +// < Terminators > +// ======================================== +int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { + int MaxStallCycle = 0; + auto Range = getScheduleRange(Offset, SchedInstrNum); + for (auto &MI : Range) { + auto *SU = TripleDAG->getSUnit(&MI); + int DefCycle = getOriCycle(&MI); + for (auto &Succ : SU->Succs) { + if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU) + continue; + // If the expected cycle does not exceed MaxCycle, no check is needed. + if (DefCycle + (int)Succ.getLatency() <= MaxCycle) + continue; + // If the cycle of the scheduled MI A is less than that of the scheduled + // MI B, the scheduling will fail because the lifetime of the + // corresponding register exceeds II. + auto *SuccMI = Succ.getSUnit()->getInstr(); + int UseCycle = getOriCycle(SuccMI); + if (DefCycle < UseCycle) + return WindowIILimit; + // Get the stall cycle introduced by the register between two trips. + int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle; + MaxStallCycle = std::max(MaxStallCycle, StallCycle); + } + } + LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n"); + return MaxStallCycle; +} + +unsigned WindowScheduler::analyseII(ScheduleDAGInstrs &DAG, unsigned Offset) { + LLVM_DEBUG(dbgs() << "Start analyzing II:\n"); + int MaxCycle = calculateMaxCycle(DAG, Offset); + if (MaxCycle == (int)WindowIILimit) + return MaxCycle; + int StallCycle = calculateStallCycle(Offset, MaxCycle); + if (StallCycle == (int)WindowIILimit) + return StallCycle; + // The value of II is equal to the maximum execution cycle plus 1. + return MaxCycle + StallCycle + 1; +} + +void WindowScheduler::schedulePhi(int Offset, unsigned &II) { + LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n"); + for (auto &Phi : MBB->phis()) { + int LateCycle = INT_MAX; + auto *SU = TripleDAG->getSUnit(&Phi); + for (auto &Succ : SU->Succs) { + // Phi doesn't have any Anti successors. + if (Succ.getKind() != SDep::Data) + continue; + // Phi is scheduled before the successor of stage 0. The issue cycle of + // phi is the latest cycle in this interval. + auto *SuccMI = Succ.getSUnit()->getInstr(); + int Cycle = getOriCycle(SuccMI); + if (getOriStage(getOriMI(SuccMI), Offset) == 0) + LateCycle = std::min(LateCycle, Cycle); + } + // The anti-dependency of phi need to be handled separately in the same way. + if (Register AntiReg = getAntiRegister(&Phi)) { + auto *AntiMI = MRI->getVRegDef(AntiReg); + // AntiReg may be defined outside the kernel MBB. + if (AntiMI->getParent() == MBB) { + auto AntiCycle = getOriCycle(AntiMI); + if (getOriStage(getOriMI(AntiMI), Offset) == 0) + LateCycle = std::min(LateCycle, AntiCycle); + } + } + // If there is no limit to the late cycle, a default value is given. + if (LateCycle == INT_MAX) + LateCycle = (int)(II - 1); + LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi); + // The issue cycle of phi is set to the latest cycle in the interval. + auto *OriPhi = getOriMI(&Phi); + OriToCycle[OriPhi] = LateCycle; + } +} + +DenseMap<MachineInstr *, int> WindowScheduler::getIssueOrder(unsigned Offset, + unsigned II) { + // At each issue cycle, phi is placed before MIs in stage 0. So the simplest + // way is to put phi at the beginning of the current cycle. + DenseMap<int, SmallVector<MachineInstr *>> CycleToMIs; + auto Range = getScheduleRange(Offset, SchedInstrNum); + for (auto &Phi : MBB->phis()) + CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi)); + for (auto &MI : Range) + CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI)); + // Each MI is assigned a separate ordered Id, which is used as a sort marker + // in the following expand process. + DenseMap<MachineInstr *, int> IssueOrder; + int Id = 0; + for (int Cycle = 0; Cycle < (int)II; ++Cycle) { + if (!CycleToMIs.count(Cycle)) + continue; + for (auto *MI : CycleToMIs[Cycle]) + IssueOrder[MI] = Id++; + } + return IssueOrder; +} + +void WindowScheduler::updateScheduleResult(unsigned Offset, unsigned II) { + // At the first update, Offset is equal to SchedPhiNum. At this time, only + // BestII, BestOffset, and BaseII need to be updated. + if (Offset == SchedPhiNum) { + BestII = II; + BestOffset = SchedPhiNum; + BaseII = II; + return; + } + // The update will only continue if the II is smaller than BestII and the II + // is sufficiently small. + if ((II >= BestII) || (II + WindowDiffLimit > BaseII)) + return; + BestII = II; + BestOffset = Offset; + // Record the result of the current list scheduling, noting that each MI is + // stored unordered in SchedResult. + SchedResult.clear(); + auto IssueOrder = getIssueOrder(Offset, II); + for (auto &Pair : OriToCycle) { + assert(IssueOrder.count(Pair.first) && "Cannot find original MI!"); + SchedResult.push_back(std::make_tuple(Pair.first, Pair.second, + getOriStage(Pair.first, Offset), + IssueOrder[Pair.first])); + } +} + +void WindowScheduler::expand() { + // The MIs in the SchedResult are sorted by the issue order ID. + llvm::stable_sort(SchedResult, + [](const std::tuple<MachineInstr *, int, int, int> &A, + const std::tuple<MachineInstr *, int, int, int> &B) { + return std::get<3>(A) < std::get<3>(B); + }); + // Use the scheduling infrastructure for expansion, noting that InstrChanges + // is not supported here. + DenseMap<MachineInstr *, int> Cycles, Stages; + std::vector<MachineInstr *> OrderedInsts; + for (auto &Info : SchedResult) { + auto *MI = std::get<0>(Info); + OrderedInsts.push_back(MI); + Cycles[MI] = std::get<1>(Info); + Stages[MI] = std::get<2>(Info); + LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI] + << "]: " << *MI); + } + ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles), + std::move(Stages)); + ModuloScheduleExpander MSE(*MF, MS, *Context->LIS, + ModuloScheduleExpander::InstrChangesTy()); + MSE.expand(); + MSE.cleanup(); +} + +void WindowScheduler::updateLiveIntervals() { + SmallVector<Register, 128> UsedRegs; + for (MachineInstr &MI : *MBB) + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.getReg() == 0) + continue; + Register Reg = MO.getReg(); + if (!is_contained(UsedRegs, Reg)) + UsedRegs.push_back(Reg); + } + Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs); +} + +iterator_range<MachineBasicBlock::iterator> +WindowScheduler::getScheduleRange(unsigned Offset, unsigned Num) { + auto RegionBegin = MBB->begin(); + std::advance(RegionBegin, Offset); + auto RegionEnd = RegionBegin; + std::advance(RegionEnd, Num); + return make_range(RegionBegin, RegionEnd); +} + +int WindowScheduler::getOriCycle(MachineInstr *NewMI) { + assert(TriToOri.count(NewMI) && "Cannot find original MI!"); + auto *OriMI = TriToOri[NewMI]; + assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!"); + return OriToCycle[OriMI]; +} + +MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) { + assert(TriToOri.count(NewMI) && "Cannot find original MI!"); + return TriToOri[NewMI]; +} + +unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) { + assert(llvm::find(OriMIs, OriMI) != OriMIs.end() && + "Cannot find OriMI in OriMIs!"); + // If there is no instruction fold, all MI stages are 0. + if (Offset == SchedPhiNum) + return 0; + // For those MIs with an ID less than the Offset, their stages are set to 0, + // while the rest are set to 1. + unsigned Id = 0; + for (auto *MI : OriMIs) { + if (MI->isMetaInstruction()) + continue; + if (MI == OriMI) + break; + ++Id; + } + return Id >= (size_t)Offset ? 1 : 0; +} + +Register WindowScheduler::getAntiRegister(MachineInstr *Phi) { + assert(Phi->isPHI() && "Expecting PHI!"); + Register AntiReg; + for (auto MO : Phi->uses()) { + if (MO.isReg()) + AntiReg = MO.getReg(); + else if (MO.isMBB() && MO.getMBB() == MBB) + return AntiReg; + } + return 0; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp index d40725838c94..d7cc5d5c2b41 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -52,8 +52,8 @@ struct XRayInstrumentation : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -170,7 +170,9 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { if (!IgnoreLoops) { // Get MachineDominatorTree or compute it on the fly if it's unavailable - auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + auto *MDTWrapper = + getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); + auto *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; MachineDominatorTree ComputedMDT; if (!MDT) { ComputedMDT.getBase().recalculate(MF); @@ -178,10 +180,11 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { } // Get MachineLoopInfo or compute it on the fly if it's unavailable - auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>(); + auto *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr; MachineLoopInfo ComputedMLI; if (!MLI) { - ComputedMLI.getBase().analyze(MDT->getBase()); + ComputedMLI.analyze(MDT->getBase()); MLI = &ComputedMLI; } @@ -264,6 +267,6 @@ char XRayInstrumentation::ID = 0; char &llvm::XRayInstrumentationID = XRayInstrumentation::ID; INITIALIZE_PASS_BEGIN(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_END(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops", false, false) |