diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-31 21:22:58 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-31 21:22:58 +0000 |
commit | 5ffd83dbcc34f10e07f6d3e968ae6365869615f4 (patch) | |
tree | 0e9f5cf729dde39f949698fddef45a34e2bc7f44 /contrib/llvm-project/llvm/lib/CodeGen | |
parent | 1799696096df87b52968b8996d00c91e0a5de8d9 (diff) | |
parent | cfca06d7963fa0909f90483b42a6d7d194d01e08 (diff) | |
download | src-5ffd83dbcc34f10e07f6d3e968ae6365869615f4.tar.gz src-5ffd83dbcc34f10e07f6d3e968ae6365869615f4.zip |
Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
master 2e10b7a39b9, the last commit before the llvmorg-12-init tag, from
which release/11.x was branched.
Note that for now, I rolled back all our local changes to make merging
easier, and I will reapply the still-relevant ones after updating to
11.0.0-rc1.
Notes
Notes:
svn path=/projects/clang1100-import/; revision=363742
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
224 files changed, 20917 insertions, 11903 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index f64b775a8b77..acf8553f7205 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -15,7 +15,6 @@ #include "AggressiveAntiDepBreaker.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -28,7 +27,6 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" @@ -36,10 +34,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/raw_ostream.h" #include <cassert> -#include <map> -#include <set> #include <utility> -#include <vector> using namespace llvm; @@ -1011,3 +1006,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( return Broken; } + +AntiDepBreaker *llvm::createAggressiveAntiDepBreaker( + MachineFunction &MFi, const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) { + return new AggressiveAntiDepBreaker(MFi, RCI, CriticalPathRCs); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h index 0cf2e6d78f7f..419cb7626945 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -16,8 +16,8 @@ #ifndef LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H #define LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H -#include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/AntiDepBreaker.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Compiler.h" #include <map> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h index 9247dd844936..fa0690ab4ea5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h @@ -17,8 +17,9 @@ #define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCRegister.h" namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp index 1632895fe5fa..7da28ffec85c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; @@ -312,8 +313,8 @@ static const Value *getNoopInput(const Value *V, DataBits = std::min((uint64_t)DataBits, I->getType()->getPrimitiveSizeInBits().getFixedSize()); NoopInput = Op; - } else if (auto CS = ImmutableCallSite(I)) { - const Value *ReturnedOp = CS.getReturnedArgOperand(); + } else if (auto *CB = dyn_cast<CallBase>(I)) { + const Value *ReturnedOp = CB->getReturnedArgOperand(); if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI)) NoopInput = ReturnedOp; } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) { @@ -395,7 +396,7 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, /// For an aggregate type, determine whether a given index is within bounds or /// not. -static bool indexReallyValid(CompositeType *T, unsigned Idx) { +static bool indexReallyValid(Type *T, unsigned Idx) { if (ArrayType *AT = dyn_cast<ArrayType>(T)) return Idx < AT->getNumElements(); @@ -419,7 +420,7 @@ static bool indexReallyValid(CompositeType *T, unsigned Idx) { /// function again on a finished iterator will repeatedly return /// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty /// aggregate or a non-aggregate -static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, +static bool advanceToNextLeafType(SmallVectorImpl<Type *> &SubTypes, SmallVectorImpl<unsigned> &Path) { // First march back up the tree until we can successfully increment one of the // coordinates in Path. @@ -435,16 +436,16 @@ static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, // We know there's *some* valid leaf now, so march back down the tree picking // out the left-most element at each node. ++Path.back(); - Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back()); + Type *DeeperType = + ExtractValueInst::getIndexedType(SubTypes.back(), Path.back()); while (DeeperType->isAggregateType()) { - CompositeType *CT = cast<CompositeType>(DeeperType); - if (!indexReallyValid(CT, 0)) + if (!indexReallyValid(DeeperType, 0)) return true; - SubTypes.push_back(CT); + SubTypes.push_back(DeeperType); Path.push_back(0); - DeeperType = CT->getTypeAtIndex(0U); + DeeperType = ExtractValueInst::getIndexedType(DeeperType, 0); } return true; @@ -460,17 +461,15 @@ static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, /// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup /// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first /// i32 in that type. -static bool firstRealType(Type *Next, - SmallVectorImpl<CompositeType *> &SubTypes, +static bool firstRealType(Type *Next, SmallVectorImpl<Type *> &SubTypes, SmallVectorImpl<unsigned> &Path) { // First initialise the iterator components to the first "leaf" node // (i.e. node with no valid sub-type at any index, so {} does count as a leaf // despite nominally being an aggregate). - while (Next->isAggregateType() && - indexReallyValid(cast<CompositeType>(Next), 0)) { - SubTypes.push_back(cast<CompositeType>(Next)); + while (Type *FirstInner = ExtractValueInst::getIndexedType(Next, 0)) { + SubTypes.push_back(Next); Path.push_back(0); - Next = cast<CompositeType>(Next)->getTypeAtIndex(0U); + Next = FirstInner; } // If there's no Path now, Next was originally scalar already (or empty @@ -480,7 +479,8 @@ static bool firstRealType(Type *Next, // Otherwise, use normal iteration to keep looking through the tree until we // find a non-aggregate type. - while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) { + while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back()) + ->isAggregateType()) { if (!advanceToNextLeafType(SubTypes, Path)) return false; } @@ -490,14 +490,15 @@ static bool firstRealType(Type *Next, /// Set the iterator data-structures to the next non-empty, non-aggregate /// subtype. -static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, +static bool nextRealType(SmallVectorImpl<Type *> &SubTypes, SmallVectorImpl<unsigned> &Path) { do { if (!advanceToNextLeafType(SubTypes, Path)) return false; assert(!Path.empty() && "found a leaf but didn't set the path?"); - } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()); + } while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back()) + ->isAggregateType()); return true; } @@ -509,9 +510,8 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { - const Instruction *I = CS.getInstruction(); - const BasicBlock *ExitBB = I->getParent(); +bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) { + const BasicBlock *ExitBB = Call.getParent(); const Instruction *Term = ExitBB->getTerminator(); const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); @@ -525,33 +525,32 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { // been fully understood. if (!Ret && ((!TM.Options.GuaranteedTailCallOpt && - CS.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term))) + Call.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term))) return false; // If I will have a chain, make sure no other instruction that will have a // chain interposes between I and the return. - if (I->mayHaveSideEffects() || I->mayReadFromMemory() || - !isSafeToSpeculativelyExecute(I)) - for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) { - if (&*BBI == I) - break; - // Debug info intrinsics do not get in the way of tail call optimization. - if (isa<DbgInfoIntrinsic>(BBI)) + // Check for all calls including speculatable functions. + for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) { + if (&*BBI == &Call) + break; + // Debug info intrinsics do not get in the way of tail call optimization. + if (isa<DbgInfoIntrinsic>(BBI)) + continue; + // A lifetime end or assume intrinsic should not stop tail call + // optimization. + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI)) + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::assume) continue; - // A lifetime end or assume intrinsic should not stop tail call - // optimization. - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI)) - if (II->getIntrinsicID() == Intrinsic::lifetime_end || - II->getIntrinsicID() == Intrinsic::assume) - continue; - if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || - !isSafeToSpeculativelyExecute(&*BBI)) - return false; - } + if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || + !isSafeToSpeculativelyExecute(&*BBI)) + return false; + } const Function *F = ExitBB->getParent(); return returnTypeIsEligibleForTailCall( - F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); + F, &Call, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); } bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, @@ -669,7 +668,7 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, } SmallVector<unsigned, 4> RetPath, CallPath; - SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes; + SmallVector<Type *, 4> RetSubTypes, CallSubTypes; bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath); bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath); @@ -692,7 +691,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, // We've exhausted the values produced by the tail call instruction, the // rest are essentially undef. The type doesn't really matter, but we need // *something*. - Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back()); + Type *SlotType = + ExtractValueInst::getIndexedType(RetSubTypes.back(), RetPath.back()); CallVal = UndefValue::get(SlotType); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm-project/llvm/lib/CodeGen/AntiDepBreaker.h deleted file mode 100644 index b11148595136..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGen/AntiDepBreaker.h +++ /dev/null @@ -1,87 +0,0 @@ -//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the AntiDepBreaker class, which implements -// anti-dependence breaking heuristics for post-register-allocation scheduling. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H -#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H - -#include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Support/Compiler.h" -#include <cassert> -#include <utility> -#include <vector> - -namespace llvm { - -/// This class works in conjunction with the post-RA scheduler to rename -/// registers to break register anti-dependencies (WAR hazards). -class LLVM_LIBRARY_VISIBILITY AntiDepBreaker { -public: - using DbgValueVector = - std::vector<std::pair<MachineInstr *, MachineInstr *>>; - - virtual ~AntiDepBreaker(); - - /// Initialize anti-dep breaking for a new basic block. - virtual void StartBlock(MachineBasicBlock *BB) = 0; - - /// Identifiy anti-dependencies within a basic-block region and break them by - /// renaming registers. Return the number of anti-dependencies broken. - virtual unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits, - MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned InsertPosIndex, - DbgValueVector &DbgValues) = 0; - - /// Update liveness information to account for the current - /// instruction, which will not be scheduled. - virtual void Observe(MachineInstr &MI, unsigned Count, - unsigned InsertPosIndex) = 0; - - /// Finish anti-dep breaking for a basic block. - virtual void FinishBlock() = 0; - - /// Update DBG_VALUE if dependency breaker is updating - /// other machine instruction to use NewReg. - void UpdateDbgValue(MachineInstr &MI, unsigned OldReg, unsigned NewReg) { - assert(MI.isDebugValue() && "MI is not DBG_VALUE!"); - if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg) - MI.getOperand(0).setReg(NewReg); - } - - /// Update all DBG_VALUE instructions that may be affected by the dependency - /// breaker's update of ParentMI to use NewReg. - void UpdateDbgValues(const DbgValueVector &DbgValues, MachineInstr *ParentMI, - unsigned OldReg, unsigned NewReg) { - // The following code is dependent on the order in which the DbgValues are - // constructed in ScheduleDAGInstrs::buildSchedGraph. - MachineInstr *PrevDbgMI = nullptr; - for (const auto &DV : make_range(DbgValues.crbegin(), DbgValues.crend())) { - MachineInstr *PrevMI = DV.second; - if ((PrevMI == ParentMI) || (PrevMI == PrevDbgMI)) { - MachineInstr *DbgMI = DV.first; - UpdateDbgValue(*DbgMI, OldReg, NewReg); - PrevDbgMI = DbgMI; - } else if (PrevDbgMI) { - break; // If no match and already found a DBG_VALUE, we're done. - } - } - } -}; - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index f6ef85a5b78f..b634b24377fe 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -46,12 +46,12 @@ void ARMException::beginFunction(const MachineFunction *MF) { if (MoveType == AsmPrinter::CFI_M_Debug) { if (!hasEmittedCFISections) { if (Asm->needsOnlyDebugCFIMoves()) - Asm->OutStreamer->EmitCFISections(false, true); + Asm->OutStreamer->emitCFISections(false, true); hasEmittedCFISections = true; } shouldEmitCFI = true; - Asm->OutStreamer->EmitCFIStartProc(false); + Asm->OutStreamer->emitCFIStartProc(false); } } @@ -75,7 +75,7 @@ void ARMException::endFunction(const MachineFunction *MF) { // Emit references to personality. if (Per) { MCSymbol *PerSym = Asm->getSymbol(Per); - Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global); + Asm->OutStreamer->emitSymbolAttribute(PerSym, MCSA_Global); ATS.emitPersonality(PerSym); } @@ -109,10 +109,10 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding, for (const GlobalValue *GV : reverse(TypeInfos)) { if (VerboseAsm) Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); - Asm->EmitTTypeReference(GV, TTypeEncoding); + Asm->emitTTypeReference(GV, TTypeEncoding); } - Asm->OutStreamer->EmitLabel(TTBaseLabel); + Asm->OutStreamer->emitLabel(TTBaseLabel); // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { @@ -129,7 +129,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding, Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry)); } - Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]), + Asm->emitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]), TTypeEncoding); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index b1b7921ea976..dea0227f7578 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -271,7 +271,7 @@ void AccelTableWriter::emitOffsets(const MCSymbol *Base) const { continue; PrevHash = HashValue; Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); - Asm->EmitLabelDifference(Hash->Sym, Base, sizeof(uint32_t)); + Asm->emitLabelDifference(Hash->Sym, Base, sizeof(uint32_t)); } } } @@ -337,7 +337,7 @@ void AppleAccelTableWriter::emitData() const { PrevHash != Hash->HashValue) Asm->emitInt32(0); // Remember to emit the label for our offset. - Asm->OutStreamer->EmitLabel(Hash->Sym); + Asm->OutStreamer->emitLabel(Hash->Sym); Asm->OutStreamer->AddComment(Hash->Name.getString()); Asm->emitDwarfStringOffset(Hash->Name); Asm->OutStreamer->AddComment("Num DIEs"); @@ -368,9 +368,9 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit( AsmPrinter *Asm = Ctx.Asm; Asm->OutStreamer->AddComment("Header: unit length"); - Asm->EmitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart, + Asm->emitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart, sizeof(uint32_t)); - Asm->OutStreamer->EmitLabel(Ctx.ContributionStart); + Asm->OutStreamer->emitLabel(Ctx.ContributionStart); Asm->OutStreamer->AddComment("Header: version"); Asm->emitInt16(Version); Asm->OutStreamer->AddComment("Header: padding"); @@ -386,12 +386,12 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit( Asm->OutStreamer->AddComment("Header: name count"); Asm->emitInt32(NameCount); Asm->OutStreamer->AddComment("Header: abbreviation table size"); - Asm->EmitLabelDifference(Ctx.AbbrevEnd, Ctx.AbbrevStart, sizeof(uint32_t)); + Asm->emitLabelDifference(Ctx.AbbrevEnd, Ctx.AbbrevStart, sizeof(uint32_t)); Asm->OutStreamer->AddComment("Header: augmentation string size"); assert(AugmentationStringSize % 4 == 0); Asm->emitInt32(AugmentationStringSize); Asm->OutStreamer->AddComment("Header: augmentation string"); - Asm->OutStreamer->EmitBytes({AugmentationString, AugmentationStringSize}); + Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize}); } template <typename DataT> @@ -453,23 +453,23 @@ void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const { template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const { - Asm->OutStreamer->EmitLabel(AbbrevStart); + Asm->OutStreamer->emitLabel(AbbrevStart); for (const auto &Abbrev : Abbreviations) { Asm->OutStreamer->AddComment("Abbrev code"); assert(Abbrev.first != 0); - Asm->EmitULEB128(Abbrev.first); + Asm->emitULEB128(Abbrev.first); Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first)); - Asm->EmitULEB128(Abbrev.first); + Asm->emitULEB128(Abbrev.first); for (const auto &AttrEnc : Abbrev.second) { - Asm->EmitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data()); - Asm->EmitULEB128(AttrEnc.Form, + Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data()); + Asm->emitULEB128(AttrEnc.Form, dwarf::FormEncodingString(AttrEnc.Form).data()); } - Asm->EmitULEB128(0, "End of abbrev"); - Asm->EmitULEB128(0, "End of abbrev"); + Asm->emitULEB128(0, "End of abbrev"); + Asm->emitULEB128(0, "End of abbrev"); } - Asm->EmitULEB128(0, "End of abbrev list"); - Asm->OutStreamer->EmitLabel(AbbrevEnd); + Asm->emitULEB128(0, "End of abbrev list"); + Asm->OutStreamer->emitLabel(AbbrevEnd); } template <typename DataT> @@ -478,13 +478,13 @@ void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const { assert(AbbrevIt != Abbreviations.end() && "Why wasn't this abbrev generated?"); - Asm->EmitULEB128(AbbrevIt->first, "Abbreviation code"); + Asm->emitULEB128(AbbrevIt->first, "Abbreviation code"); for (const auto &AttrEnc : AbbrevIt->second) { Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index)); switch (AttrEnc.Index) { case dwarf::DW_IDX_compile_unit: { DIEInteger ID(getCUIndexForEntry(Entry)); - ID.EmitValue(Asm, AttrEnc.Form); + ID.emitValue(Asm, AttrEnc.Form); break; } case dwarf::DW_IDX_die_offset: @@ -498,11 +498,11 @@ void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const { } template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const { - Asm->OutStreamer->EmitLabel(EntryPool); + Asm->OutStreamer->emitLabel(EntryPool); for (auto &Bucket : Contents.getBuckets()) { for (auto *Hash : Bucket) { // Remember to emit the label for our offset. - Asm->OutStreamer->EmitLabel(Hash->Sym); + Asm->OutStreamer->emitLabel(Hash->Sym); for (const auto *Value : Hash->Values) emitEntry(*static_cast<const DataT *>(Value)); Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString()); @@ -537,8 +537,8 @@ template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() const { emitOffsets(EntryPool); emitAbbrevs(); emitData(); - Asm->OutStreamer->EmitValueToAlignment(4, 0); - Asm->OutStreamer->EmitLabel(ContributionEnd); + Asm->OutStreamer->emitValueToAlignment(4, 0); + Asm->OutStreamer->emitLabel(ContributionEnd); } void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index f11c7de5ed8a..883aaf5aefc4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -30,9 +30,9 @@ MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end"); Asm.OutStreamer->AddComment("Length of contribution"); - Asm.EmitLabelDifference(EndLabel, BeginLabel, + Asm.emitLabelDifference(EndLabel, BeginLabel, 4); // TODO: Support DWARF64 format. - Asm.OutStreamer->EmitLabel(BeginLabel); + Asm.OutStreamer->emitLabel(BeginLabel); Asm.OutStreamer->AddComment("DWARF version number"); Asm.emitInt16(Asm.getDwarfVersion()); Asm.OutStreamer->AddComment("Address size"); @@ -58,7 +58,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { // Define the symbol that marks the start of the contribution. // It is referenced via DW_AT_addr_base. - Asm.OutStreamer->EmitLabel(AddressTableBaseSym); + Asm.OutStreamer->emitLabel(AddressTableBaseSym); // Order the address pool entries by ID SmallVector<const MCExpr *, 64> Entries(Pool.size()); @@ -70,8 +70,8 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { : MCSymbolRefExpr::create(I.first, Asm.OutContext); for (const MCExpr *Entry : Entries) - Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize()); + Asm.OutStreamer->emitValue(Entry, Asm.getDataLayout().getPointerSize()); if (EndLabel) - Asm.OutStreamer->EmitLabel(EndLabel); + Asm.OutStreamer->emitLabel(EndLabel); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 20cd9da31fbd..f8f7b74baf91 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -31,16 +31,13 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -55,7 +52,6 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -81,7 +77,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/RemarkStreamer.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCAsmInfo.h" @@ -106,6 +101,7 @@ #include "llvm/Pass.h" #include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Remarks/RemarkStreamer.h" #include "llvm/Remarks/RemarkStringTable.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -161,11 +157,11 @@ static gcp_map_type &getGCMap(void *&P) { /// getGVAlignment - Return the alignment to use for the specified global /// value. This rounds up to the preferred alignment if possible and legal. -Align AsmPrinter::getGVAlignment(const GlobalValue *GV, const DataLayout &DL, +Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL, Align InAlign) { Align Alignment; if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - Alignment = Align(DL.getPreferredAlignment(GVar)); + Alignment = DL.getPreferredAlign(GVar); // If InAlign is specified, round it to it. if (InAlign > Alignment) @@ -231,7 +227,7 @@ const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { } void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { - S.EmitInstruction(Inst, getSubtargetInfo()); + S.emitInstruction(Inst, getSubtargetInfo()); } void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) { @@ -248,11 +244,8 @@ const MCSection *AsmPrinter::getCurrentSection() const { void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<MachineModuleInfoWrapperPass>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); AU.addRequired<GCModuleInfo>(); - AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); } bool AsmPrinter::doInitialization(Module &M) { @@ -277,16 +270,16 @@ bool AsmPrinter::doInitialization(Module &M) { // use the directive, where it would need the same conditionalization // anyway. const Triple &Target = TM.getTargetTriple(); - OutStreamer->EmitVersionForTarget(Target, M.getSDKVersion()); + OutStreamer->emitVersionForTarget(Target, M.getSDKVersion()); // Allow the target to emit any magic that it wants at the start of the file. - EmitStartOfAsmFile(M); + emitStartOfAsmFile(M); // Very minimal debug info. It is ignored if we emit actual debug info. If we // don't, this at least helps the user find where a global came from. if (MAI->hasSingleParameterDotFile()) { // .file "foo.c" - OutStreamer->EmitFileDirective( + OutStreamer->emitFileDirective( llvm::sys::path::filename(M.getSourceFileName())); } @@ -305,21 +298,21 @@ bool AsmPrinter::doInitialization(Module &M) { TM.getTargetFeatureString())); OutStreamer->AddComment("Start of file scope inline assembly"); OutStreamer->AddBlankLine(); - EmitInlineAsm(M.getModuleInlineAsm()+"\n", + emitInlineAsm(M.getModuleInlineAsm() + "\n", OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions); OutStreamer->AddComment("End of file scope inline assembly"); OutStreamer->AddBlankLine(); } if (MAI->doesSupportDebugInformation()) { - bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); + bool EmitCodeView = M.getCodeViewFlag(); if (EmitCodeView && TM.getTargetTriple().isOSWindows()) { Handlers.emplace_back(std::make_unique<CodeViewDebug>(this), DbgTimerName, DbgTimerDescription, CodeViewLineTablesGroupName, CodeViewLineTablesGroupDescription); } - if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) { + if (!EmitCodeView || M.getDwarfVersion()) { DD = new DwarfDebug(this, &M); DD->beginModule(); Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName, @@ -382,8 +375,7 @@ bool AsmPrinter::doInitialization(Module &M) { DWARFGroupDescription); // Emit tables for any value of cfguard flag (i.e. cfguard=1 or cfguard=2). - if (mdconst::extract_or_null<ConstantInt>( - MMI->getModule()->getModuleFlag("cfguard"))) + if (mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("cfguard"))) Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName, CFGuardDescription, DWARFGroupName, DWARFGroupDescription); @@ -397,7 +389,7 @@ static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) { return GV->canBeOmittedFromSymbolTable(); } -void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { +void AsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { GlobalValue::LinkageTypes Linkage = GV->getLinkage(); switch (Linkage) { case GlobalValue::CommonLinkage: @@ -407,35 +399,31 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { case GlobalValue::WeakODRLinkage: if (MAI->hasWeakDefDirective()) { // .globl _foo - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global); if (!canBeHidden(GV, *MAI)) // .weak_definition _foo - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + OutStreamer->emitSymbolAttribute(GVSym, MCSA_WeakDefinition); else - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); - } else if (MAI->hasLinkOnceDirective()) { + OutStreamer->emitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); + } else if (MAI->avoidWeakIfComdat() && GV->hasComdat()) { // .globl _foo - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global); //NOTE: linkonce is handled by the section the symbol was assigned to. } else { // .weak _foo - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak); + OutStreamer->emitSymbolAttribute(GVSym, MCSA_Weak); } return; case GlobalValue::ExternalLinkage: - // If external, declare as a global symbol: .globl _foo - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global); return; case GlobalValue::PrivateLinkage: - return; case GlobalValue::InternalLinkage: - if (MAI->hasDotLGloblDirective()) - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_LGlobal); return; - case GlobalValue::AppendingLinkage: - case GlobalValue::AvailableExternallyLinkage: case GlobalValue::ExternalWeakLinkage: + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::AppendingLinkage: llvm_unreachable("Should never emit this"); } llvm_unreachable("Unknown linkage type!"); @@ -450,8 +438,27 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { return TM.getSymbol(GV); } +MCSymbol *AsmPrinter::getSymbolPreferLocal(const GlobalValue &GV) const { + // On ELF, use .Lfoo$local if GV is a non-interposable GlobalObject with an + // exact definion (intersection of GlobalValue::hasExactDefinition() and + // !isInterposable()). These linkages include: external, appending, internal, + // private. It may be profitable to use a local alias for external. The + // assembler would otherwise be conservative and assume a global default + // visibility symbol can be interposable, even if the code generator already + // assumed it. + if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) { + const Module &M = *GV.getParent(); + if (TM.getRelocationModel() != Reloc::Static && + M.getPIELevel() == PIELevel::Default) + if (GV.isDSOLocal() || (TM.getTargetTriple().isX86() && + GV.getParent()->noSemanticInterposition())) + return getSymbolWithGlobalValueBase(&GV, "$local"); + } + return TM.getSymbol(&GV); +} + /// EmitGlobalVariable - Emit the specified global variable to the .s file. -void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { +void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { bool IsEmuTLSVar = TM.useEmulatedTLS() && GV->isThreadLocal(); assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) && "No emulated TLS variables in the common section"); @@ -463,7 +470,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GV->hasInitializer()) { // Check to see if this is a special global used by LLVM, if so, emit it. - if (EmitSpecialLLVMGlobal(GV)) + if (emitSpecialLLVMGlobal(GV)) return; // Skip the emission of global equivalents. The symbol can be emitted later @@ -486,7 +493,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // getOrCreateEmuTLSControlSym only creates the symbol with name and default // attributes. // GV's or GVSym's attributes will be used for the EmittedSym. - EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); + emitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. return; @@ -497,7 +504,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { "' is already defined"); if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject); + OutStreamer->emitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject); SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); @@ -522,7 +529,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // .comm _foo, 42, 4 const bool SupportsAlignment = getObjFileLowering().getCommDirectiveSupportsAlignment(); - OutStreamer->EmitCommonSymbol(GVSym, Size, + OutStreamer->emitCommonSymbol(GVSym, Size, SupportsAlignment ? Alignment.value() : 0); return; } @@ -536,9 +543,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { TheSection->isVirtualSection()) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. - EmitLinkage(GV, GVSym); + emitLinkage(GV, GVSym); // .zerofill __DATA, __bss, _foo, 400, 5 - OutStreamer->EmitZerofill(TheSection, GVSym, Size, Alignment.value()); + OutStreamer->emitZerofill(TheSection, GVSym, Size, Alignment.value()); return; } @@ -557,16 +564,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Prefer to simply fall back to .local / .comm in this case. if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 - OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Alignment.value()); + OutStreamer->emitLocalCommonSymbol(GVSym, Size, Alignment.value()); return; } // .local _foo - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local); + OutStreamer->emitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 const bool SupportsAlignment = getObjFileLowering().getCommDirectiveSupportsAlignment(); - OutStreamer->EmitCommonSymbol(GVSym, Size, + OutStreamer->emitCommonSymbol(GVSym, Size, SupportsAlignment ? Alignment.value() : 0); return; } @@ -588,14 +595,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isThreadBSS()) { TheSection = getObjFileLowering().getTLSBSSSection(); - OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, Alignment.value()); + OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment.value()); } else if (GVKind.isThreadData()) { OutStreamer->SwitchSection(TheSection); - EmitAlignment(Alignment, GV); - OutStreamer->EmitLabel(MangSym); + emitAlignment(Alignment, GV); + OutStreamer->emitLabel(MangSym); - EmitGlobalConstant(GV->getParent()->getDataLayout(), + emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); } @@ -606,18 +613,18 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer->SwitchSection(TLVSect); // Emit the linkage here. - EmitLinkage(GV, GVSym); - OutStreamer->EmitLabel(GVSym); + emitLinkage(GV, GVSym); + OutStreamer->emitLabel(GVSym); // Three pointers in size: // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer unsigned PtrSize = DL.getPointerTypeSize(GV->getType()); - OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), + OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize); - OutStreamer->EmitIntValue(0, PtrSize); - OutStreamer->EmitSymbolValue(MangSym, PtrSize); + OutStreamer->emitIntValue(0, PtrSize); + OutStreamer->emitSymbolValue(MangSym, PtrSize); OutStreamer->AddBlankLine(); return; @@ -627,12 +634,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer->SwitchSection(TheSection); - EmitLinkage(GV, EmittedInitSym); - EmitAlignment(Alignment, GV); + emitLinkage(GV, EmittedInitSym); + emitAlignment(Alignment, GV); - OutStreamer->EmitLabel(EmittedInitSym); + OutStreamer->emitLabel(EmittedInitSym); + MCSymbol *LocalAlias = getSymbolPreferLocal(*GV); + if (LocalAlias != EmittedInitSym) + OutStreamer->emitLabel(LocalAlias); - EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); + emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 @@ -646,13 +656,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { /// /// \p Value - The value to emit. /// \p Size - The size of the integer (in bytes) to emit. -void AsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const { - OutStreamer->EmitValue(Value, Size); +void AsmPrinter::emitDebugValue(const MCExpr *Value, unsigned Size) const { + OutStreamer->emitValue(Value, Size); } +void AsmPrinter::emitFunctionHeaderComment() {} + /// EmitFunctionHeader - This method emits the header for the current /// function. -void AsmPrinter::EmitFunctionHeader() { +void AsmPrinter::emitFunctionHeader() { const Function &F = MF->getFunction(); if (isVerbose()) @@ -661,29 +673,32 @@ void AsmPrinter::EmitFunctionHeader() { << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n'; // Print out constants referenced by the function - EmitConstantPool(); + emitConstantPool(); // Print the 'header' of function. - OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM)); - EmitVisibility(CurrentFnSym, F.getVisibility()); + MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM)); + OutStreamer->SwitchSection(MF->getSection()); - if (MAI->needsFunctionDescriptors() && - F.getLinkage() != GlobalValue::InternalLinkage) - EmitLinkage(&F, CurrentFnDescSym); + if (!MAI->hasVisibilityOnlyWithLinkage()) + emitVisibility(CurrentFnSym, F.getVisibility()); - EmitLinkage(&F, CurrentFnSym); + if (MAI->needsFunctionDescriptors()) + emitLinkage(&F, CurrentFnDescSym); + + emitLinkage(&F, CurrentFnSym); if (MAI->hasFunctionAlignment()) - EmitAlignment(MF->getAlignment(), &F); + emitAlignment(MF->getAlignment(), &F); if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); + OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); if (F.hasFnAttribute(Attribute::Cold)) - OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_Cold); + OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold); if (isVerbose()) { F.printAsOperand(OutStreamer->GetCommentOS(), /*PrintType=*/false, F.getParent()); + emitFunctionHeaderComment(); OutStreamer->GetCommentOS() << '\n'; } @@ -695,14 +710,14 @@ void AsmPrinter::EmitFunctionHeader() { // and use the .alt_entry attribute to mark the function's real entry point // as an alternative entry point to the prefix-data symbol. MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol(); - OutStreamer->EmitLabel(PrefixSym); + OutStreamer->emitLabel(PrefixSym); - EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); + emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); // Emit an .alt_entry directive for the actual function symbol. - OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry); + OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_AltEntry); } else { - EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); + emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); } } @@ -719,7 +734,7 @@ void AsmPrinter::EmitFunctionHeader() { if (PatchableFunctionPrefix) { CurrentPatchableFunctionEntrySym = OutContext.createLinkerPrivateTempSymbol(); - OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym); + OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym); emitNops(PatchableFunctionPrefix); } else if (PatchableFunctionEntry) { // May be reassigned when emitting the body, to reference the label after @@ -728,32 +743,24 @@ void AsmPrinter::EmitFunctionHeader() { } // Emit the function descriptor. This is a virtual function to allow targets - // to emit their specific function descriptor. + // to emit their specific function descriptor. Right now it is only used by + // the AIX target. The PowerPC 64-bit V1 ELF target also uses function + // descriptors and should be converted to use this hook as well. if (MAI->needsFunctionDescriptors()) - EmitFunctionDescriptor(); + emitFunctionDescriptor(); // Emit the CurrentFnSym. This is a virtual function to allow targets to do // their wild and crazy things as required. - EmitFunctionEntryLabel(); - - // If the function had address-taken blocks that got deleted, then we have - // references to the dangling symbols. Emit them at the start of the function - // so that we don't get references to undefined symbols. - std::vector<MCSymbol*> DeadBlockSyms; - MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms); - for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { - OutStreamer->AddComment("Address taken block that was later removed"); - OutStreamer->EmitLabel(DeadBlockSyms[i]); - } + emitFunctionEntryLabel(); if (CurrentFnBegin) { if (MAI->useAssignmentForEHBegin()) { MCSymbol *CurPos = OutContext.createTempSymbol(); - OutStreamer->EmitLabel(CurPos); - OutStreamer->EmitAssignment(CurrentFnBegin, + OutStreamer->emitLabel(CurPos); + OutStreamer->emitAssignment(CurrentFnBegin, MCSymbolRefExpr::create(CurPos, OutContext)); } else { - OutStreamer->EmitLabel(CurrentFnBegin); + OutStreamer->emitLabel(CurrentFnBegin); } } @@ -766,12 +773,12 @@ void AsmPrinter::EmitFunctionHeader() { // Emit the prologue data. if (F.hasPrologueData()) - EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData()); + emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the /// function. This can be overridden by targets as required to do custom stuff. -void AsmPrinter::EmitFunctionEntryLabel() { +void AsmPrinter::emitFunctionEntryLabel() { CurrentFnSym->redefineIfPossible(); // The function label could have already been emitted if two symbols end up @@ -783,7 +790,13 @@ void AsmPrinter::EmitFunctionEntryLabel() { report_fatal_error("'" + Twine(CurrentFnSym->getName()) + "' label emitted multiple times to assembly file"); - return OutStreamer->EmitLabel(CurrentFnSym); + OutStreamer->emitLabel(CurrentFnSym); + + if (TM.getTargetTriple().isOSBinFormatELF()) { + MCSymbol *Sym = getSymbolPreferLocal(MF->getFunction()); + if (Sym != CurrentFnSym) + OutStreamer->emitLabel(Sym); + } } /// emitComments - Pretty-print comments for instructions. @@ -863,7 +876,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << " <- "; // The second operand is only an offset if it's an immediate. - bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); + bool MemLoc = MI->isIndirectDebugValue(); int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0; const DIExpression *Expr = MI->getDebugExpression(); if (Expr->getNumElements()) { @@ -882,11 +895,11 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } // Register or immediate value. Register 0 means undef. - if (MI->getOperand(0).isFPImm()) { - APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); - if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { + if (MI->getDebugOperand(0).isFPImm()) { + APFloat APF = APFloat(MI->getDebugOperand(0).getFPImm()->getValueAPF()); + if (MI->getDebugOperand(0).getFPImm()->getType()->isFloatTy()) { OS << (double)APF.convertToFloat(); - } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { + } else if (MI->getDebugOperand(0).getFPImm()->getType()->isDoubleTy()) { OS << APF.convertToDouble(); } else { // There is no good way to print long double. Convert a copy to @@ -896,23 +909,23 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { &ignored); OS << "(long double) " << APF.convertToDouble(); } - } else if (MI->getOperand(0).isImm()) { - OS << MI->getOperand(0).getImm(); - } else if (MI->getOperand(0).isCImm()) { - MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); - } else if (MI->getOperand(0).isTargetIndex()) { - auto Op = MI->getOperand(0); + } else if (MI->getDebugOperand(0).isImm()) { + OS << MI->getDebugOperand(0).getImm(); + } else if (MI->getDebugOperand(0).isCImm()) { + MI->getDebugOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); + } else if (MI->getDebugOperand(0).isTargetIndex()) { + auto Op = MI->getDebugOperand(0); OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")"; return true; } else { - unsigned Reg; - if (MI->getOperand(0).isReg()) { - Reg = MI->getOperand(0).getReg(); + Register Reg; + if (MI->getDebugOperand(0).isReg()) { + Reg = MI->getDebugOperand(0).getReg(); } else { - assert(MI->getOperand(0).isFI() && "Unknown operand type"); + assert(MI->getDebugOperand(0).isFI() && "Unknown operand type"); const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering(); - Offset += TFI->getFrameIndexReference(*AP.MF, - MI->getOperand(0).getIndex(), Reg); + Offset += TFI->getFrameIndexReference( + *AP.MF, MI->getDebugOperand(0).getIndex(), Reg); MemLoc = true; } if (Reg == 0) { @@ -1006,7 +1019,7 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { int FrameOffset = MI.getOperand(1).getImm(); // Emit a symbol assignment. - OutStreamer->EmitAssignment(FrameAllocSym, + OutStreamer->emitAssignment(FrameAllocSym, MCConstantExpr::create(FrameOffset, OutContext)); } @@ -1029,15 +1042,15 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { const MCSymbol *FunctionSymbol = getFunctionBegin(); uint64_t StackSize = FrameInfo.getStackSize(); - OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); - OutStreamer->EmitULEB128IntValue(StackSize); + OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + OutStreamer->emitULEB128IntValue(StackSize); OutStreamer->PopSection(); } -static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, - MachineModuleInfo *MMI) { - if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo()) +static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF) { + MachineModuleInfo &MMI = MF.getMMI(); + if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI.hasDebugInfo()) return true; // We might emit an EH table that uses function begin and end labels even if @@ -1050,11 +1063,11 @@ static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, /// EmitFunctionBody - This method emits the body and trailer for a /// function. -void AsmPrinter::EmitFunctionBody() { - EmitFunctionHeader(); +void AsmPrinter::emitFunctionBody() { + emitFunctionHeader(); // Emit target-specific gunk before the function body. - EmitFunctionBodyStart(); + emitFunctionBodyStart(); bool ShouldPrintDebugScopes = MMI->hasDebugInfo(); @@ -1079,9 +1092,10 @@ void AsmPrinter::EmitFunctionBody() { // Print out code for the function. bool HasAnyRealCode = false; int NumInstsInFunction = 0; + for (auto &MBB : *MF) { // Print a label for the basic block. - EmitBasicBlockStart(MBB); + emitBasicBlockStart(MBB); for (auto &MI : MBB) { // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && @@ -1092,7 +1106,7 @@ void AsmPrinter::EmitFunctionBody() { // If there is a pre-instruction symbol, emit a label for it here. if (MCSymbol *S = MI.getPreInstrSymbol()) - OutStreamer->EmitLabel(S); + OutStreamer->emitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1116,22 +1130,22 @@ void AsmPrinter::EmitFunctionBody() { case TargetOpcode::ANNOTATION_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: - OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol()); + OutStreamer->emitLabel(MI.getOperand(0).getMCSymbol()); break; case TargetOpcode::INLINEASM: case TargetOpcode::INLINEASM_BR: - EmitInlineAsm(&MI); + emitInlineAsm(&MI); break; case TargetOpcode::DBG_VALUE: if (isVerbose()) { if (!emitDebugValueComment(&MI, *this)) - EmitInstruction(&MI); + emitInstruction(&MI); } break; case TargetOpcode::DBG_LABEL: if (isVerbose()) { if (!emitDebugLabelComment(&MI, *this)) - EmitInstruction(&MI); + emitInstruction(&MI); } break; case TargetOpcode::IMPLICIT_DEF: @@ -1141,13 +1155,13 @@ void AsmPrinter::EmitFunctionBody() { if (isVerbose()) emitKill(&MI, *this); break; default: - EmitInstruction(&MI); + emitInstruction(&MI); break; } // If there is a post-instruction symbol, emit a label for it here. if (MCSymbol *S = MI.getPostInstrSymbol()) - OutStreamer->EmitLabel(S); + OutStreamer->emitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1159,7 +1173,44 @@ void AsmPrinter::EmitFunctionBody() { } } - EmitBasicBlockEnd(MBB); + // We need a temporary symbol for the end of this basic block, if either we + // have BBLabels enabled and we want to emit size directive for the BBs, or + // if this basic blocks marks the end of a section (except the section + // containing the entry basic block as the end symbol for that section is + // CurrentFnEnd). + MCSymbol *CurrentBBEnd = nullptr; + if ((MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) || + (MBB.isEndSection() && !MBB.sameSection(&MF->front()))) { + CurrentBBEnd = OutContext.createTempSymbol(); + OutStreamer->emitLabel(CurrentBBEnd); + } + + // Helper for emitting the size directive associated with a basic block + // symbol. + auto emitELFSizeDirective = [&](MCSymbol *SymForSize) { + assert(CurrentBBEnd && "Basicblock end symbol not set!"); + const MCExpr *SizeExp = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(CurrentBBEnd, OutContext), + MCSymbolRefExpr::create(SymForSize, OutContext), OutContext); + OutStreamer->emitELFSize(SymForSize, SizeExp); + }; + + // Emit size directive for the size of each basic block, if BBLabels is + // enabled. + if (MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) + emitELFSizeDirective(MBB.getSymbol()); + + // Emit size directive for the size of each basic block section once we + // get to the end of that section. + if (MBB.isEndSection()) { + if (!MBB.sameSection(&MF->front())) { + if (MAI->hasDotTypeDotSizeDirective()) + emitELFSizeDirective(CurrentSectionBeginSym); + MBBSectionRanges[MBB.getSectionIDNum()] = + MBBSectionRange{CurrentSectionBeginSym, CurrentBBEnd}; + } + } + emitBasicBlockEnd(MBB); } EmittedInsts += NumInstsInFunction; @@ -1192,6 +1243,9 @@ void AsmPrinter::EmitFunctionBody() { } } + // Switch to the original section in case basic block sections was used. + OutStreamer->SwitchSection(MF->getSection()); + const Function &F = MF->getFunction(); for (const auto &BB : F) { if (!BB.hasAddressTaken()) @@ -1200,17 +1254,17 @@ void AsmPrinter::EmitFunctionBody() { if (Sym->isDefined()) continue; OutStreamer->AddComment("Address of block that was removed by CodeGen"); - OutStreamer->EmitLabel(Sym); + OutStreamer->emitLabel(Sym); } // Emit target-specific gunk after the function body. - EmitFunctionBodyEnd(); + emitFunctionBodyEnd(); - if (needFuncLabelsForEHOrDebugInfo(*MF, MMI) || + if (needFuncLabelsForEHOrDebugInfo(*MF) || MAI->hasDotTypeDotSizeDirective()) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); - OutStreamer->EmitLabel(CurrentFnEnd); + OutStreamer->emitLabel(CurrentFnEnd); } // If the target wants a .size directive for the size of the function, emit @@ -1230,8 +1284,11 @@ void AsmPrinter::EmitFunctionBody() { HI.Handler->markFunctionEnd(); } + MBBSectionRanges[MF->front().getSectionIDNum()] = + MBBSectionRange{CurrentFnBegin, CurrentFnEnd}; + // Print out jump tables referenced by the function. - EmitJumpTableInfo(); + emitJumpTableInfo(); // Emit post-function debug and/or EH information. for (const HandlerInfo &HI : Handlers) { @@ -1327,7 +1384,7 @@ void AsmPrinter::emitGlobalGOTEquivs() { GlobalGOTEquivs.clear(); for (auto *GV : FailedCandidates) - EmitGlobalVariable(GV); + emitGlobalVariable(GV); } void AsmPrinter::emitGlobalIndirectSymbol(Module &M, @@ -1335,9 +1392,9 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, MCSymbol *Name = getSymbol(&GIS); if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective()) - OutStreamer->EmitSymbolAttribute(Name, MCSA_Global); + OutStreamer->emitSymbolAttribute(Name, MCSA_Global); else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage()) - OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference); + OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference); else assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage"); @@ -1354,19 +1411,22 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, // Set the symbol type to function if the alias has a function type. // This affects codegen when the aliasee is not a function. if (IsFunction) - OutStreamer->EmitSymbolAttribute(Name, isa<GlobalIFunc>(GIS) + OutStreamer->emitSymbolAttribute(Name, isa<GlobalIFunc>(GIS) ? MCSA_ELF_TypeIndFunction : MCSA_ELF_TypeFunction); - EmitVisibility(Name, GIS.getVisibility()); + emitVisibility(Name, GIS.getVisibility()); const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol()); if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr)) - OutStreamer->EmitSymbolAttribute(Name, MCSA_AltEntry); + OutStreamer->emitSymbolAttribute(Name, MCSA_AltEntry); // Emit the directives as assignments aka .set: - OutStreamer->EmitAssignment(Name, Expr); + OutStreamer->emitAssignment(Name, Expr); + MCSymbol *LocalAlias = getSymbolPreferLocal(GIS); + if (LocalAlias != Name) + OutStreamer->emitAssignment(LocalAlias, Expr); if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) { // If the aliasee does not correspond to a symbol in the output, i.e. the @@ -1384,7 +1444,7 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, } } -void AsmPrinter::emitRemarksSection(RemarkStreamer &RS) { +void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) { if (!RS.needsSection()) return; @@ -1409,7 +1469,7 @@ void AsmPrinter::emitRemarksSection(RemarkStreamer &RS) { OutContext.getObjectFileInfo()->getRemarksSection(); OutStreamer->SwitchSection(RemarksSection); - OutStreamer->EmitBinaryData(OS.str()); + OutStreamer->emitBinaryData(OS.str()); } bool AsmPrinter::doFinalization(Module &M) { @@ -1426,31 +1486,51 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. for (const auto &G : M.globals()) - EmitGlobalVariable(&G); + emitGlobalVariable(&G); // Emit remaining GOT equivalent globals. emitGlobalGOTEquivs(); - // Emit visibility info for declarations + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + + // Emit linkage(XCOFF) and visibility info for declarations for (const Function &F : M) { if (!F.isDeclarationForLinker()) continue; - GlobalValue::VisibilityTypes V = F.getVisibility(); - if (V == GlobalValue::DefaultVisibility) - continue; MCSymbol *Name = getSymbol(&F); - EmitVisibility(Name, V, false); + // Function getSymbol gives us the function descriptor symbol for XCOFF. + + if (!TM.getTargetTriple().isOSBinFormatXCOFF()) { + GlobalValue::VisibilityTypes V = F.getVisibility(); + if (V == GlobalValue::DefaultVisibility) + continue; + + emitVisibility(Name, V, false); + continue; + } + + if (F.isIntrinsic()) + continue; + + // Handle the XCOFF case. + // Variable `Name` is the function descriptor symbol (see above). Get the + // function entry point symbol. + MCSymbol *FnEntryPointSym = TLOF.getFunctionEntryPointSymbol(&F, TM); + if (cast<MCSymbolXCOFF>(FnEntryPointSym)->hasRepresentedCsectSet()) + // Emit linkage for the function entry point. + emitLinkage(&F, FnEntryPointSym); + + // Emit linkage for the function descriptor. + emitLinkage(&F, Name); } // Emit the remarks section contents. // FIXME: Figure out when is the safest time to emit this section. It should // not come after debug info. - if (RemarkStreamer *RS = M.getContext().getRemarkStreamer()) + if (remarks::RemarkStreamer *RS = M.getContext().getMainRemarkStreamer()) emitRemarksSection(*RS); - const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - TLOF.emitModuleMetadata(*OutStreamer, M); if (TM.getTargetTriple().isOSBinFormatELF()) { @@ -1462,10 +1542,10 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer->SwitchSection(TLOF.getDataSection()); const DataLayout &DL = M.getDataLayout(); - EmitAlignment(Align(DL.getPointerSize())); + emitAlignment(Align(DL.getPointerSize())); for (const auto &Stub : Stubs) { - OutStreamer->EmitLabel(Stub.first); - OutStreamer->EmitSymbolValue(Stub.second.getPointer(), + OutStreamer->emitLabel(Stub.first); + OutStreamer->emitSymbolValue(Stub.second.getPointer(), DL.getPointerSize()); } } @@ -1489,10 +1569,10 @@ bool AsmPrinter::doFinalization(Module &M) { COFF::IMAGE_SCN_LNK_COMDAT, SectionKind::getReadOnly(), Stub.first->getName(), COFF::IMAGE_COMDAT_SELECT_ANY)); - EmitAlignment(Align(DL.getPointerSize())); - OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global); - OutStreamer->EmitLabel(Stub.first); - OutStreamer->EmitSymbolValue(Stub.second.getPointer(), + emitAlignment(Align(DL.getPointerSize())); + OutStreamer->emitSymbolAttribute(Stub.first, MCSA_Global); + OutStreamer->emitLabel(Stub.first); + OutStreamer->emitSymbolValue(Stub.second.getPointer(), DL.getPointerSize()); } } @@ -1518,7 +1598,7 @@ bool AsmPrinter::doFinalization(Module &M) { for (const auto &GO : M.global_objects()) { if (!GO.hasExternalWeakLinkage()) continue; - OutStreamer->EmitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference); + OutStreamer->emitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference); } } @@ -1549,25 +1629,25 @@ bool AsmPrinter::doFinalization(Module &M) { MP->finishAssembly(M, *MI, *this); // Emit llvm.ident metadata in an '.ident' directive. - EmitModuleIdents(M); + emitModuleIdents(M); // Emit bytes for llvm.commandline metadata. - EmitModuleCommandLines(M); + emitModuleCommandLines(M); // Emit __morestack address if needed for indirect calls. if (MMI->usesMorestackAddr()) { - unsigned Align = 1; + Align Alignment(1); MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant( getDataLayout(), SectionKind::getReadOnly(), - /*C=*/nullptr, Align); + /*C=*/nullptr, Alignment); OutStreamer->SwitchSection(ReadOnlySection); MCSymbol *AddrSymbol = OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); - OutStreamer->EmitLabel(AddrSymbol); + OutStreamer->emitLabel(AddrSymbol); unsigned PtrSize = MAI->getCodePointerSize(); - OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), + OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("__morestack"), PtrSize); } @@ -1599,7 +1679,7 @@ bool AsmPrinter::doFinalization(Module &M) { OS.flush(); if (!Flags.empty()) { OutStreamer->SwitchSection(TLOF.getDrectveSection()); - OutStreamer->EmitBytes(Flags); + OutStreamer->emitBytes(Flags); } Flags.clear(); } @@ -1625,7 +1705,7 @@ bool AsmPrinter::doFinalization(Module &M) { if (!Flags.empty()) { OutStreamer->SwitchSection(TLOF.getDrectveSection()); - OutStreamer->EmitBytes(Flags); + OutStreamer->emitBytes(Flags); } Flags.clear(); } @@ -1635,12 +1715,12 @@ bool AsmPrinter::doFinalization(Module &M) { if (TM.Options.EmitAddrsig) { // Emit address-significance attributes for all globals. - OutStreamer->EmitAddrsig(); + OutStreamer->emitAddrsig(); for (const GlobalValue &GV : M.global_values()) if (!GV.use_empty() && !GV.isThreadLocal() && !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") && !GV.hasAtLeastLocalUnnamedAddr()) - OutStreamer->EmitAddrsigSym(getSymbol(&GV)); + OutStreamer->emitAddrsigSym(getSymbol(&GV)); } // Emit symbol partition specifications (ELF only). @@ -1651,11 +1731,12 @@ bool AsmPrinter::doFinalization(Module &M) { GV.getVisibility() != GlobalValue::DefaultVisibility) continue; - OutStreamer->SwitchSection(OutContext.getELFSection( - ".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", ++UniqueID)); - OutStreamer->EmitBytes(GV.getPartition()); - OutStreamer->EmitZeros(1); - OutStreamer->EmitValue( + OutStreamer->SwitchSection( + OutContext.getELFSection(".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, + "", ++UniqueID, nullptr)); + OutStreamer->emitBytes(GV.getPartition()); + OutStreamer->emitZeros(1); + OutStreamer->emitValue( MCSymbolRefExpr::create(getSymbol(&GV), OutContext), MAI->getCodePointerSize()); } @@ -1663,7 +1744,7 @@ bool AsmPrinter::doFinalization(Module &M) { // Allow the target to emit any magic that it wants at the end of the file, // after everything else has gone out. - EmitEndOfAsmFile(M); + emitEndOfAsmFile(M); MMI = nullptr; @@ -1686,30 +1767,31 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { const Function &F = MF.getFunction(); // Get the function symbol. - if (MAI->needsFunctionDescriptors()) { - assert(TM.getTargetTriple().isOSAIX() && "Function descriptor is only" - " supported on AIX."); + if (!MAI->needsFunctionDescriptors()) { + CurrentFnSym = getSymbol(&MF.getFunction()); + } else { + assert(TM.getTargetTriple().isOSAIX() && + "Only AIX uses the function descriptor hooks."); + // AIX is unique here in that the name of the symbol emitted for the + // function body does not have the same name as the source function's + // C-linkage name. assert(CurrentFnDescSym && "The function descriptor symbol needs to be" - " initalized first."); + " initalized first."); // Get the function entry point symbol. - CurrentFnSym = - OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName()); - - MCSectionXCOFF *FnEntryPointSec = - cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM)); - // Set the containing csect. - cast<MCSymbolXCOFF>(CurrentFnSym)->setContainingCsect(FnEntryPointSec); - } else { - CurrentFnSym = getSymbol(&MF.getFunction()); + CurrentFnSym = getObjFileLowering().getFunctionEntryPointSymbol(&F, TM); } CurrentFnSymForSize = CurrentFnSym; CurrentFnBegin = nullptr; + CurrentSectionBeginSym = nullptr; + MBBSectionRanges.clear(); CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); if (F.hasFnAttribute("patchable-function-entry") || - needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize || + F.hasFnAttribute("function-instrument") || + F.hasFnAttribute("xray-instruction-threshold") || + needFuncLabelsForEHOrDebugInfo(MF) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) @@ -1717,13 +1799,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { } ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); - PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - MBFI = (PSI && PSI->hasProfileSummary()) ? - // ORE conditionally computes MBFI. If available, use it, otherwise - // request it. - (ORE->getBFI() ? ORE->getBFI() : - &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()) : - nullptr; } namespace { @@ -1731,10 +1806,10 @@ namespace { // Keep track the alignment, constpool entries per Section. struct SectionCPs { MCSection *S; - unsigned Alignment; + Align Alignment; SmallVector<unsigned, 4> CPEs; - SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {} + SectionCPs(MCSection *s, Align a) : S(s), Alignment(a) {} }; } // end anonymous namespace @@ -1743,7 +1818,7 @@ namespace { /// representations of the constants in the constant pool MCP. This is /// used to print out constants which have been "spilled to memory" by /// the code generator. -void AsmPrinter::EmitConstantPool() { +void AsmPrinter::emitConstantPool() { const MachineConstantPool *MCP = MF->getConstantPool(); const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); if (CP.empty()) return; @@ -1753,7 +1828,7 @@ void AsmPrinter::EmitConstantPool() { SmallVector<SectionCPs, 4> CPSections; for (unsigned i = 0, e = CP.size(); i != e; ++i) { const MachineConstantPoolEntry &CPE = CP[i]; - unsigned Align = CPE.getAlignment(); + Align Alignment = CPE.getAlign(); SectionKind Kind = CPE.getSectionKind(&getDataLayout()); @@ -1761,8 +1836,8 @@ void AsmPrinter::EmitConstantPool() { if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; - MCSection *S = getObjFileLowering().getSectionForConstant(getDataLayout(), - Kind, C, Align); + MCSection *S = getObjFileLowering().getSectionForConstant( + getDataLayout(), Kind, C, Alignment); // The number of sections are small, just do a linear search from the // last section to the first. @@ -1776,11 +1851,11 @@ void AsmPrinter::EmitConstantPool() { } if (!Found) { SecIdx = CPSections.size(); - CPSections.push_back(SectionCPs(S, Align)); + CPSections.push_back(SectionCPs(S, Alignment)); } - if (Align > CPSections[SecIdx].Alignment) - CPSections[SecIdx].Alignment = Align; + if (Alignment > CPSections[SecIdx].Alignment) + CPSections[SecIdx].Alignment = Alignment; CPSections[SecIdx].CPEs.push_back(i); } @@ -1794,14 +1869,9 @@ void AsmPrinter::EmitConstantPool() { if (!Sym->isUndefined()) continue; - if (TM.getTargetTriple().isOSBinFormatXCOFF()) { - cast<MCSymbolXCOFF>(Sym)->setContainingCsect( - cast<MCSectionXCOFF>(CPSections[i].S)); - } - if (CurSection != CPSections[i].S) { OutStreamer->SwitchSection(CPSections[i].S); - EmitAlignment(Align(CPSections[i].Alignment)); + emitAlignment(Align(CPSections[i].Alignment)); CurSection = CPSections[i].S; Offset = 0; } @@ -1809,25 +1879,24 @@ void AsmPrinter::EmitConstantPool() { MachineConstantPoolEntry CPE = CP[CPI]; // Emit inter-object padding for alignment. - unsigned AlignMask = CPE.getAlignment() - 1; - unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; - OutStreamer->EmitZeros(NewOffset - Offset); + unsigned NewOffset = alignTo(Offset, CPE.getAlign()); + OutStreamer->emitZeros(NewOffset - Offset); Type *Ty = CPE.getType(); Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty); - OutStreamer->EmitLabel(Sym); + OutStreamer->emitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) - EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); + emitMachineConstantPoolValue(CPE.Val.MachineCPVal); else - EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal); + emitGlobalConstant(getDataLayout(), CPE.Val.ConstVal); } } } -/// EmitJumpTableInfo - Print assembly representations of the jump tables used -/// by the current function to the current output stream. -void AsmPrinter::EmitJumpTableInfo() { +// Print assembly representations of the jump tables used by the current +// function. +void AsmPrinter::emitJumpTableInfo() { const DataLayout &DL = MF->getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (!MJTI) return; @@ -1848,12 +1917,12 @@ void AsmPrinter::EmitJumpTableInfo() { OutStreamer->SwitchSection(ReadOnlySection); } - EmitAlignment(Align(MJTI->getEntryAlignment(DL))); + emitAlignment(Align(MJTI->getEntryAlignment(DL))); // Jump tables in code sections are marked with a data_region directive // where that's supported. if (!JTInDiffSection) - OutStreamer->EmitDataRegion(MCDR_DataRegionJT32); + OutStreamer->emitDataRegion(MCDR_DataRegionJT32); for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; @@ -1876,7 +1945,7 @@ void AsmPrinter::EmitJumpTableInfo() { // .set LJTSet, LBB32-base const MCExpr *LHS = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); - OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), + OutStreamer->emitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), MCBinaryExpr::createSub(LHS, Base, OutContext)); } @@ -1890,25 +1959,21 @@ void AsmPrinter::EmitJumpTableInfo() { // FIXME: This doesn't have to have any specific name, just any randomly // named and numbered local label started with 'l' would work. Simplify // GetJTISymbol. - OutStreamer->EmitLabel(GetJTISymbol(JTI, true)); + OutStreamer->emitLabel(GetJTISymbol(JTI, true)); MCSymbol* JTISymbol = GetJTISymbol(JTI); - if (TM.getTargetTriple().isOSBinFormatXCOFF()) { - cast<MCSymbolXCOFF>(JTISymbol)->setContainingCsect( - cast<MCSectionXCOFF>(TLOF.getSectionForJumpTable(F, TM))); - } - OutStreamer->EmitLabel(JTISymbol); + OutStreamer->emitLabel(JTISymbol); for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) - EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); + emitJumpTableEntry(MJTI, JTBBs[ii], JTI); } if (!JTInDiffSection) - OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); + OutStreamer->emitDataRegion(MCDR_DataRegionEnd); } /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the /// current stream. -void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, +void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned UID) const { assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block"); @@ -1930,7 +1995,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // with a relocation as gp-relative, e.g.: // .gprel32 LBB123 MCSymbol *MBBSym = MBB->getSymbol(); - OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext)); + OutStreamer->emitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext)); return; } @@ -1939,7 +2004,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // with a relocation as gp-relative, e.g.: // .gpdword LBB123 MCSymbol *MBBSym = MBB->getSymbol(); - OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext)); + OutStreamer->emitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext)); return; } @@ -1967,16 +2032,16 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); unsigned EntrySize = MJTI->getEntrySize(getDataLayout()); - OutStreamer->EmitValue(Value, EntrySize); + OutStreamer->emitValue(Value, EntrySize); } /// EmitSpecialLLVMGlobal - Check to see if the specified global is a /// special global used by LLVM. If so, emit it and return true, otherwise /// do nothing and return false. -bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { +bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getName() == "llvm.used") { if (MAI->hasNoDeadStrip()) // No need to emit this at all. - EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer())); + emitLLVMUsedList(cast<ConstantArray>(GV->getInitializer())); return true; } @@ -1990,14 +2055,14 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { assert(GV->hasInitializer() && "Not a special LLVM global!"); if (GV->getName() == "llvm.global_ctors") { - EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), /* isCtor */ true); return true; } if (GV->getName() == "llvm.global_dtors") { - EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), /* isCtor */ false); return true; @@ -2008,13 +2073,13 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each /// global in the specified llvm.used list. -void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { +void AsmPrinter::emitLLVMUsedList(const ConstantArray *InitList) { // Should be an array of 'i8*'. for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); if (GV) - OutStreamer->EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); + OutStreamer->emitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); } } @@ -2032,27 +2097,16 @@ struct Structor { /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. -void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, +void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List, bool isCtor) { // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is the // init priority. if (!isa<ConstantArray>(List)) return; - // Sanity check the structors list. - const ConstantArray *InitList = dyn_cast<ConstantArray>(List); - if (!InitList) return; // Not an array! - StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType()); - if (!ETy || ETy->getNumElements() != 3 || - !isa<IntegerType>(ETy->getTypeAtIndex(0U)) || - !isa<PointerType>(ETy->getTypeAtIndex(1U)) || - !isa<PointerType>(ETy->getTypeAtIndex(2U))) - return; // Not (int, ptr, ptr). - // Gather the structors in a form that's convenient for sorting by priority. SmallVector<Structor, 8> Structors; - for (Value *O : InitList->operands()) { - ConstantStruct *CS = dyn_cast<ConstantStruct>(O); - if (!CS) continue; // Malformed. + for (Value *O : cast<ConstantArray>(List)->operands()) { + auto *CS = cast<ConstantStruct>(O); if (CS->getOperand(1)->isNullValue()) break; // Found a null terminator, skip the rest. ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); @@ -2090,12 +2144,12 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, : Obj.getStaticDtorSection(S.Priority, KeySym)); OutStreamer->SwitchSection(OutputSection); if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection()) - EmitAlignment(Align); - EmitXXStructor(DL, S.Func); + emitAlignment(Align); + emitXXStructor(DL, S.Func); } } -void AsmPrinter::EmitModuleIdents(Module &M) { +void AsmPrinter::emitModuleIdents(Module &M) { if (!MAI->hasIdentDirective()) return; @@ -2105,12 +2159,12 @@ void AsmPrinter::EmitModuleIdents(Module &M) { assert(N->getNumOperands() == 1 && "llvm.ident metadata entry can have only one operand"); const MDString *S = cast<MDString>(N->getOperand(0)); - OutStreamer->EmitIdent(S->getString()); + OutStreamer->emitIdent(S->getString()); } } } -void AsmPrinter::EmitModuleCommandLines(Module &M) { +void AsmPrinter::emitModuleCommandLines(Module &M) { MCSection *CommandLine = getObjFileLowering().getSectionForCommandLines(); if (!CommandLine) return; @@ -2121,14 +2175,14 @@ void AsmPrinter::EmitModuleCommandLines(Module &M) { OutStreamer->PushSection(); OutStreamer->SwitchSection(CommandLine); - OutStreamer->EmitZeros(1); + OutStreamer->emitZeros(1); for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { const MDNode *N = NMD->getOperand(i); assert(N->getNumOperands() == 1 && "llvm.commandline metadata entry can have only one operand"); const MDString *S = cast<MDString>(N->getOperand(0)); - OutStreamer->EmitBytes(S->getString()); - OutStreamer->EmitZeros(1); + OutStreamer->emitBytes(S->getString()); + OutStreamer->emitZeros(1); } OutStreamer->PopSection(); } @@ -2139,29 +2193,23 @@ void AsmPrinter::EmitModuleCommandLines(Module &M) { /// Emit a byte directive and value. /// -void AsmPrinter::emitInt8(int Value) const { - OutStreamer->EmitIntValue(Value, 1); -} +void AsmPrinter::emitInt8(int Value) const { OutStreamer->emitInt8(Value); } /// Emit a short directive and value. -void AsmPrinter::emitInt16(int Value) const { - OutStreamer->EmitIntValue(Value, 2); -} +void AsmPrinter::emitInt16(int Value) const { OutStreamer->emitInt16(Value); } /// Emit a long directive and value. -void AsmPrinter::emitInt32(int Value) const { - OutStreamer->EmitIntValue(Value, 4); -} +void AsmPrinter::emitInt32(int Value) const { OutStreamer->emitInt32(Value); } /// Emit a long long directive and value. void AsmPrinter::emitInt64(uint64_t Value) const { - OutStreamer->EmitIntValue(Value, 8); + OutStreamer->emitInt64(Value); } /// Emit something like ".long Hi-Lo" where the size in bytes of the directive /// is specified by Size and Hi/Lo specify the labels. This implicitly uses /// .set if it avoids relocations. -void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, +void AsmPrinter::emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const { OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size); } @@ -2169,13 +2217,13 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, /// EmitLabelPlusOffset - Emit something like ".long Label+Offset" /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. -void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, +void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size, bool IsSectionRelative) const { if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { OutStreamer->EmitCOFFSecRel32(Label, Offset); if (Size > 4) - OutStreamer->EmitZeros(Size - 4); + OutStreamer->emitZeros(Size - 4); return; } @@ -2185,7 +2233,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, Expr = MCBinaryExpr::createAdd( Expr, MCConstantExpr::create(Offset, OutContext), OutContext); - OutStreamer->EmitValue(Expr, Size); + OutStreamer->emitValue(Expr, Size); } //===----------------------------------------------------------------------===// @@ -2194,17 +2242,17 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // two boundary. If a global value is specified, and if that global has // an explicit alignment requested, it will override the alignment request // if required for correctness. -void AsmPrinter::EmitAlignment(Align Alignment, const GlobalObject *GV) const { +void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const { if (GV) Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment); - if (Alignment == Align::None()) + if (Alignment == Align(1)) return; // 1-byte aligned: no need to emit alignment. if (getCurrentSection()->getKind().isText()) - OutStreamer->EmitCodeAlignment(Alignment.value()); + OutStreamer->emitCodeAlignment(Alignment.value()); else - OutStreamer->EmitValueToAlignment(Alignment.value()); + OutStreamer->emitValueToAlignment(Alignment.value()); } //===----------------------------------------------------------------------===// @@ -2232,23 +2280,22 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { } switch (CE->getOpcode()) { - default: + default: { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstant(CE, getDataLayout())) - if (C != CE) - return lowerConstant(C); + Constant *C = ConstantFoldConstant(CE, getDataLayout()); + if (C != CE) + return lowerConstant(C); // Otherwise report the problem to the user. - { - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - CE->printAsOperand(OS, /*PrintType=*/false, - !MF ? nullptr : MF->getFunction().getParent()); - report_fatal_error(OS.str()); - } + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + CE->printAsOperand(OS, /*PrintType=*/false, + !MF ? nullptr : MF->getFunction().getParent()); + report_fatal_error(OS.str()); + } case Instruction::GetElementPtr: { // Generate a symbolic expression for the byte address APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0); @@ -2434,7 +2481,7 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, // If this can be emitted with .ascii/.asciz, emit it as such. if (CDS->isString()) - return AP.OutStreamer->EmitBytes(CDS->getAsString()); + return AP.OutStreamer->emitBytes(CDS->getAsString()); // Otherwise, emit the values in successive locations. unsigned ElementByteSize = CDS->getElementByteSize(); @@ -2443,7 +2490,7 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, if (AP.isVerbose()) AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i)); - AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i), + AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i), ElementByteSize); } } else { @@ -2453,11 +2500,11 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, } unsigned Size = DL.getTypeAllocSize(CDS->getType()); - unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * - CDS->getNumElements(); + unsigned EmittedSize = + DL.getTypeAllocSize(CDS->getElementType()) * CDS->getNumElements(); assert(EmittedSize <= Size && "Size cannot be less than EmittedSize!"); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer->EmitZeros(Padding); + AP.OutStreamer->emitZeros(Padding); } static void emitGlobalConstantArray(const DataLayout &DL, @@ -2488,7 +2535,7 @@ static void emitGlobalConstantVector(const DataLayout &DL, unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer->EmitZeros(Padding); + AP.OutStreamer->emitZeros(Padding); } static void emitGlobalConstantStruct(const DataLayout &DL, @@ -2513,7 +2560,7 @@ static void emitGlobalConstantStruct(const DataLayout &DL, // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. - AP.OutStreamer->EmitZeros(PadSize); + AP.OutStreamer->emitZeros(PadSize); } assert(SizeSoFar == Layout->getSizeInBytes() && "Layout of constant struct may be incorrect!"); @@ -2545,22 +2592,22 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { int Chunk = API.getNumWords() - 1; if (TrailingBytes) - AP.OutStreamer->EmitIntValue(p[Chunk--], TrailingBytes); + AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk--], TrailingBytes); for (; Chunk >= 0; --Chunk) - AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t)); + AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk], sizeof(uint64_t)); } else { unsigned Chunk; for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) - AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t)); + AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk], sizeof(uint64_t)); if (TrailingBytes) - AP.OutStreamer->EmitIntValue(p[Chunk], TrailingBytes); + AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk], TrailingBytes); } // Emit the tail padding for the long double. const DataLayout &DL = AP.getDataLayout(); - AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(ET) - DL.getTypeStoreSize(ET)); + AP.OutStreamer->emitZeros(DL.getTypeAllocSize(ET) - DL.getTypeStoreSize(ET)); } static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { @@ -2591,9 +2638,10 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // [chunk1][chunk2] ... [chunkN]. // The most significant chunk is chunkN and it should be emitted first. // However, due to the alignment issue chunkN contains useless bits. - // Realign the chunks so that they contain only useless information: + // Realign the chunks so that they contain only useful information: // ExtraBits 0 1 (BitWidth / 64) - 1 // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN] + ExtraBitsSize = alignTo(ExtraBitsSize, 8); ExtraBits = Realigned.getRawData()[0] & (((uint64_t)-1) >> (64 - ExtraBitsSize)); Realigned.lshrInPlace(ExtraBitsSize); @@ -2607,19 +2655,19 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { const uint64_t *RawData = Realigned.getRawData(); for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i]; - AP.OutStreamer->EmitIntValue(Val, 8); + AP.OutStreamer->emitIntValue(Val, 8); } if (ExtraBitsSize) { // Emit the extra bits after the 64-bits chunks. // Emit a directive that fills the expected size. - uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType()); + uint64_t Size = AP.getDataLayout().getTypeStoreSize(CI->getType()); Size -= (BitWidth / 64) * 8; assert(Size && Size * 8 >= ExtraBitsSize && (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) == ExtraBits && "Directive too small for extra bits."); - AP.OutStreamer->EmitIntValue(ExtraBits, Size); + AP.OutStreamer->emitIntValue(ExtraBits, Size); } } @@ -2726,30 +2774,32 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, BaseCV = dyn_cast<Constant>(CV->user_back()); if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) - return AP.OutStreamer->EmitZeros(Size); + return AP.OutStreamer->emitZeros(Size); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - switch (Size) { - case 1: - case 2: - case 4: - case 8: + const uint64_t StoreSize = DL.getTypeStoreSize(CV->getType()); + + if (StoreSize < 8) { if (AP.isVerbose()) AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", CI->getZExtValue()); - AP.OutStreamer->EmitIntValue(CI->getZExtValue(), Size); - return; - default: + AP.OutStreamer->emitIntValue(CI->getZExtValue(), StoreSize); + } else { emitGlobalConstantLargeInt(CI, AP); - return; } + + // Emit tail padding if needed + if (Size != StoreSize) + AP.OutStreamer->emitZeros(Size - StoreSize); + + return; } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) return emitGlobalConstantFP(CFP, AP); if (isa<ConstantPointerNull>(CV)) { - AP.OutStreamer->EmitIntValue(0, Size); + AP.OutStreamer->emitIntValue(0, Size); return; } @@ -2773,7 +2823,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, // to emit the value in chunks. Try to constant fold the value and emit it // that way. Constant *New = ConstantFoldConstant(CE, DL); - if (New && New != CE) + if (New != CE) return emitGlobalConstantImpl(DL, New, AP); } } @@ -2791,22 +2841,22 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel()) handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset); - AP.OutStreamer->EmitValue(ME, Size); + AP.OutStreamer->emitValue(ME, Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) { +void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV) { uint64_t Size = DL.getTypeAllocSize(CV->getType()); if (Size) emitGlobalConstantImpl(DL, CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. - OutStreamer->EmitIntValue(0, 1); + OutStreamer->emitIntValue(0, 1); } } -void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { +void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { // Target doesn't support this yet! llvm_unreachable("Target does not support EmitMachineConstantPoolValue"); } @@ -2850,12 +2900,13 @@ MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { const DataLayout &DL = MF->getDataLayout(); SectionKind Kind = CPE.getSectionKind(&DL); const Constant *C = CPE.Val.ConstVal; - unsigned Align = CPE.Alignment; + Align Alignment = CPE.Alignment; if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>( - getObjFileLowering().getSectionForConstant(DL, Kind, C, Align))) { + getObjFileLowering().getSectionForConstant(DL, Kind, C, + Alignment))) { if (MCSymbol *Sym = S->getCOMDATSymbol()) { if (Sym->isUndefined()) - OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global); + OutStreamer->emitSymbolAttribute(Sym, MCSA_Global); return Sym; } } @@ -2957,10 +3008,10 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); } -/// EmitBasicBlockStart - This method prints the label for the specified +/// emitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. -void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { +void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // End the previous funclet and start a new one. if (MBB.isEHFuncletEntry()) { for (const HandlerInfo &HI : Handlers) { @@ -2971,8 +3022,8 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { // Emit an alignment directive for this block, if needed. const Align Alignment = MBB.getAlignment(); - if (Alignment != Align::None()) - EmitAlignment(Alignment); + if (Alignment != Align(1)) + emitAlignment(Alignment); // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label @@ -2987,7 +3038,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { // their corresponding BB's address taken in IR if (BB->hasAddressTaken()) for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) - OutStreamer->EmitLabel(Sym); + OutStreamer->emitLabel(Sym); } // Print some verbose block comments. @@ -3004,25 +3055,44 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { emitBasicBlockLoopComments(MBB, MLI, *this); } - // Print the main label for the block. if (MBB.pred_empty() || - (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry() && - !MBB.hasLabelMustBeEmitted())) { + (!MF->hasBBLabels() && isBlockOnlyReachableByFallthrough(&MBB) && + !MBB.isEHFuncletEntry() && !MBB.hasLabelMustBeEmitted())) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":", false); } } else { - if (isVerbose() && MBB.hasLabelMustBeEmitted()) + if (isVerbose() && MBB.hasLabelMustBeEmitted()) { OutStreamer->AddComment("Label of block must be emitted"); - OutStreamer->EmitLabel(MBB.getSymbol()); + } + auto *BBSymbol = MBB.getSymbol(); + // Switch to a new section if this basic block must begin a section. + if (MBB.isBeginSection()) { + OutStreamer->SwitchSection( + getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(), + MBB, TM)); + CurrentSectionBeginSym = BBSymbol; + } + OutStreamer->emitLabel(BBSymbol); + // With BB sections, each basic block must handle CFI information on its own + // if it begins a section. + if (MBB.isBeginSection()) + for (const HandlerInfo &HI : Handlers) + HI.Handler->beginBasicBlock(MBB); } } -void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {} +void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { + // Check if CFI information needs to be updated for this MBB with basic block + // sections. + if (MBB.isEndSection()) + for (const HandlerInfo &HI : Handlers) + HI.Handler->endBasicBlock(MBB); +} -void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, +void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition) const { MCSymbolAttr Attr = MCSA_Invalid; @@ -3040,7 +3110,7 @@ void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, } if (Attr != MCSA_Invalid) - OutStreamer->EmitSymbolAttribute(Sym, Attr); + OutStreamer->emitSymbolAttribute(Sym, Attr); } /// isBlockOnlyReachableByFallthough - Return true if the basic block has @@ -3048,6 +3118,10 @@ void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, /// the predecessor and this block is a fall-through. bool AsmPrinter:: isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + // With BasicBlock Sections, beginning of the section is not a fallthrough. + if (MBB->isBeginSection()) + return false; + // If this is a landing pad, it isn't a fall through. If it has no preds, // then nothing falls through to it. if (MBB->isEHPad() || MBB->pred_empty()) @@ -3097,11 +3171,10 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { auto Name = S.getName(); - for (GCMetadataPrinterRegistry::iterator - I = GCMetadataPrinterRegistry::begin(), - E = GCMetadataPrinterRegistry::end(); I != E; ++I) - if (Name == I->getName()) { - std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate(); + for (const GCMetadataPrinterRegistry::entry &GCMetaPrinter : + GCMetadataPrinterRegistry::entries()) + if (Name == GCMetaPrinter.getName()) { + std::unique_ptr<GCMetadataPrinter> GMP = GCMetaPrinter.instantiate(); GMP->S = &S; auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP))); return IterBool.first->second.get(); @@ -3139,18 +3212,15 @@ void AsmPrinterHandler::markFunctionEnd() {} // In the binary's "xray_instr_map" section, an array of these function entries // describes each instrumentation point. When XRay patches your code, the index // into this table will be given to your handler as a patch point identifier. -void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out, - const MCSymbol *CurrentFnSym) const { - Out->EmitSymbolValue(Sled, Bytes); - Out->EmitSymbolValue(CurrentFnSym, Bytes); +void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out) const { auto Kind8 = static_cast<uint8_t>(Kind); - Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1)); - Out->EmitBinaryData( + Out->emitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1)); + Out->emitBinaryData( StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1)); - Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Version), 1)); + Out->emitBinaryData(StringRef(reinterpret_cast<const char *>(&Version), 1)); auto Padding = (4 * Bytes) - ((2 * Bytes) + 3); assert(Padding >= 0 && "Instrumentation map entry > 4 * Word Size"); - Out->EmitZeros(Padding); + Out->emitZeros(Padding); } void AsmPrinter::emitXRayTable() { @@ -3161,28 +3231,34 @@ void AsmPrinter::emitXRayTable() { const Function &F = MF->getFunction(); MCSection *InstMap = nullptr; MCSection *FnSledIndex = nullptr; - if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { - auto Associated = dyn_cast<MCSymbolELF>(CurrentFnSym); - assert(Associated != nullptr); - auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; - std::string GroupName; + const Triple &TT = TM.getTargetTriple(); + // Use PC-relative addresses on all targets except MIPS (MIPS64 cannot use + // PC-relative addresses because R_MIPS_PC64 does not exist). + bool PCRel = !TT.isMIPS(); + if (TT.isOSBinFormatELF()) { + auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym); + auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; + if (!PCRel) + Flags |= ELF::SHF_WRITE; + StringRef GroupName; if (F.hasComdat()) { Flags |= ELF::SHF_GROUP; GroupName = F.getComdat()->getName(); } - - auto UniqueID = ++XRayFnUniqueID; - InstMap = - OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, Flags, 0, - GroupName, UniqueID, Associated); - FnSledIndex = - OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0, - GroupName, UniqueID, Associated); + InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + Flags, 0, GroupName, + MCSection::NonUniqueID, LinkedToSym); + + if (!TM.Options.XRayOmitFunctionIndex) + FnSledIndex = OutContext.getELFSection( + "xray_fn_idx", ELF::SHT_PROGBITS, Flags | ELF::SHF_WRITE, 0, + GroupName, MCSection::NonUniqueID, LinkedToSym); } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, SectionKind::getReadOnlyWithRel()); - FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx", 0, - SectionKind::getReadOnlyWithRel()); + if (!TM.Options.XRayOmitFunctionIndex) + FnSledIndex = OutContext.getMachOSection( + "__DATA", "xray_fn_idx", 0, SectionKind::getReadOnlyWithRel()); } else { llvm_unreachable("Unsupported target"); } @@ -3192,23 +3268,46 @@ void AsmPrinter::emitXRayTable() { // Now we switch to the instrumentation map section. Because this is done // per-function, we are able to create an index entry that will represent the // range of sleds associated with a function. + auto &Ctx = OutContext; MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); OutStreamer->SwitchSection(InstMap); - OutStreamer->EmitLabel(SledsStart); - for (const auto &Sled : Sleds) - Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); + OutStreamer->emitLabel(SledsStart); + for (const auto &Sled : Sleds) { + if (PCRel) { + MCSymbol *Dot = Ctx.createTempSymbol(); + OutStreamer->emitLabel(Dot); + OutStreamer->emitValueImpl( + MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx), + MCSymbolRefExpr::create(Dot, Ctx), Ctx), + WordSizeBytes); + OutStreamer->emitValueImpl( + MCBinaryExpr::createSub( + MCSymbolRefExpr::create(CurrentFnBegin, Ctx), + MCBinaryExpr::createAdd( + MCSymbolRefExpr::create(Dot, Ctx), + MCConstantExpr::create(WordSizeBytes, Ctx), Ctx), + Ctx), + WordSizeBytes); + } else { + OutStreamer->emitSymbolValue(Sled.Sled, WordSizeBytes); + OutStreamer->emitSymbolValue(CurrentFnSym, WordSizeBytes); + } + Sled.emit(WordSizeBytes, OutStreamer.get()); + } MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true); - OutStreamer->EmitLabel(SledsEnd); + OutStreamer->emitLabel(SledsEnd); // We then emit a single entry in the index per function. We use the symbols // that bound the instrumentation map as the range for a specific function. // Each entry here will be 2 * word size aligned, as we're writing down two // pointers. This should work for both 32-bit and 64-bit platforms. - OutStreamer->SwitchSection(FnSledIndex); - OutStreamer->EmitCodeAlignment(2 * WordSizeBytes); - OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false); - OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes, false); - OutStreamer->SwitchSection(PrevSection); + if (FnSledIndex) { + OutStreamer->SwitchSection(FnSledIndex); + OutStreamer->emitCodeAlignment(2 * WordSizeBytes); + OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false); + OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false); + OutStreamer->SwitchSection(PrevSection); + } Sleds.clear(); } @@ -3239,31 +3338,24 @@ void AsmPrinter::emitPatchableFunctionEntries() { const unsigned PointerSize = getPointerSize(); if (TM.getTargetTriple().isOSBinFormatELF()) { auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC; + const MCSymbolELF *LinkedToSym = nullptr; + StringRef GroupName; - // As of binutils 2.33, GNU as does not support section flag "o" or linkage - // field "unique". Use SHF_LINK_ORDER if we are using the integrated - // assembler. + // GNU as < 2.35 did not support section flag 'o'. Use SHF_LINK_ORDER only + // if we are using the integrated assembler. if (MAI->useIntegratedAssembler()) { Flags |= ELF::SHF_LINK_ORDER; - std::string GroupName; if (F.hasComdat()) { Flags |= ELF::SHF_GROUP; GroupName = F.getComdat()->getName(); } - MCSection *Section = getObjFileLowering().SectionForGlobal(&F, TM); - unsigned UniqueID = - PatchableFunctionEntryID - .try_emplace(Section, PatchableFunctionEntryID.size()) - .first->second; - OutStreamer->SwitchSection(OutContext.getELFSection( - "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, - GroupName, UniqueID, cast<MCSymbolELF>(CurrentFnSym))); - } else { - OutStreamer->SwitchSection(OutContext.getELFSection( - "__patchable_function_entries", ELF::SHT_PROGBITS, Flags)); + LinkedToSym = cast<MCSymbolELF>(CurrentFnSym); } - EmitAlignment(Align(PointerSize)); - OutStreamer->EmitSymbolValue(CurrentPatchableFunctionEntrySym, PointerSize); + OutStreamer->SwitchSection(OutContext.getELFSection( + "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, GroupName, + MCSection::NonUniqueID, LinkedToSym)); + emitAlignment(Align(PointerSize)); + OutStreamer->emitSymbolValue(CurrentPatchableFunctionEntrySym, PointerSize); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 992e44d95306..d81a9be26d39 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -36,22 +36,23 @@ using namespace llvm; //===----------------------------------------------------------------------===// /// EmitSLEB128 - emit the specified signed leb128 value. -void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { +void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer->AddComment(Desc); - OutStreamer->EmitSLEB128IntValue(Value); + OutStreamer->emitSLEB128IntValue(Value); } -void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { +void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc, + unsigned PadTo) const { if (isVerbose() && Desc) OutStreamer->AddComment(Desc); - OutStreamer->EmitULEB128IntValue(Value, PadTo); + OutStreamer->emitULEB128IntValue(Value, PadTo); } /// Emit something like ".uleb128 Hi-Lo". -void AsmPrinter::EmitLabelDifferenceAsULEB128(const MCSymbol *Hi, +void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi, const MCSymbol *Lo) const { OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo); } @@ -105,7 +106,7 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) { /// encoding. If verbose assembly output is enabled, we output comments /// describing the encoding. Desc is an optional string saying what the /// encoding is specifying (e.g. "LSDA"). -void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { +void AsmPrinter::emitEncodingByte(unsigned Val, const char *Desc) const { if (isVerbose()) { if (Desc) OutStreamer->AddComment(Twine(Desc) + " Encoding = " + @@ -114,7 +115,7 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { OutStreamer->AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val)); } - OutStreamer->EmitIntValue(Val, 1); + OutStreamer->emitIntValue(Val, 1); } /// GetSizeOfEncodedValue - Return the size of the encoding in bytes. @@ -136,16 +137,16 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { } } -void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, +void AsmPrinter::emitTTypeReference(const GlobalValue *GV, unsigned Encoding) const { if (GV) { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = TLOF.getTTypeGlobalReference(GV, Encoding, TM, MMI, *OutStreamer); - OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); + OutStreamer->emitValue(Exp, GetSizeOfEncodedValue(Encoding)); } else - OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); + OutStreamer->emitIntValue(0, GetSizeOfEncodedValue(Encoding)); } void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, @@ -159,13 +160,13 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, // If the format uses relocations with dwarf, refer to the symbol directly. if (MAI->doesDwarfUseRelocationsAcrossSections()) { - OutStreamer->EmitSymbolValue(Label, 4); + OutStreamer->emitSymbolValue(Label, 4); return; } } // Otherwise, emit it as a label difference from the start of the section. - EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); + emitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); } void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { @@ -179,27 +180,26 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { emitInt32(S.Offset); } -void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { - EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize()); +void AsmPrinter::emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { + // TODO: Support DWARF64 + emitLabelPlusOffset(Label, Offset, 4); } -void AsmPrinter::EmitCallSiteOffset(const MCSymbol *Hi, - const MCSymbol *Lo, +void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Encoding) const { // The least significant 3 bits specify the width of the encoding if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128) - EmitLabelDifferenceAsULEB128(Hi, Lo); + emitLabelDifferenceAsULEB128(Hi, Lo); else - EmitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding)); + emitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding)); } -void AsmPrinter::EmitCallSiteValue(uint64_t Value, - unsigned Encoding) const { +void AsmPrinter::emitCallSiteValue(uint64_t Value, unsigned Encoding) const { // The least significant 3 bits specify the width of the encoding if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128) - EmitULEB128(Value); + emitULEB128(Value); else - OutStreamer->EmitIntValue(Value, GetSizeOfEncodedValue(Encoding)); + OutStreamer->emitIntValue(Value, GetSizeOfEncodedValue(Encoding)); } //===----------------------------------------------------------------------===// @@ -211,40 +211,43 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { default: llvm_unreachable("Unexpected instruction"); case MCCFIInstruction::OpDefCfaOffset: - OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset()); + OutStreamer->emitCFIDefCfaOffset(Inst.getOffset()); break; case MCCFIInstruction::OpAdjustCfaOffset: - OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset()); + OutStreamer->emitCFIAdjustCfaOffset(Inst.getOffset()); break; case MCCFIInstruction::OpDefCfa: - OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); + OutStreamer->emitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); break; case MCCFIInstruction::OpDefCfaRegister: - OutStreamer->EmitCFIDefCfaRegister(Inst.getRegister()); + OutStreamer->emitCFIDefCfaRegister(Inst.getRegister()); break; case MCCFIInstruction::OpOffset: - OutStreamer->EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); + OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset()); break; case MCCFIInstruction::OpRegister: - OutStreamer->EmitCFIRegister(Inst.getRegister(), Inst.getRegister2()); + OutStreamer->emitCFIRegister(Inst.getRegister(), Inst.getRegister2()); break; case MCCFIInstruction::OpWindowSave: - OutStreamer->EmitCFIWindowSave(); + OutStreamer->emitCFIWindowSave(); break; case MCCFIInstruction::OpNegateRAState: - OutStreamer->EmitCFINegateRAState(); + OutStreamer->emitCFINegateRAState(); break; case MCCFIInstruction::OpSameValue: - OutStreamer->EmitCFISameValue(Inst.getRegister()); + OutStreamer->emitCFISameValue(Inst.getRegister()); break; case MCCFIInstruction::OpGnuArgsSize: - OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset()); + OutStreamer->emitCFIGnuArgsSize(Inst.getOffset()); break; case MCCFIInstruction::OpEscape: - OutStreamer->EmitCFIEscape(Inst.getValues()); + OutStreamer->emitCFIEscape(Inst.getValues()); break; case MCCFIInstruction::OpRestore: - OutStreamer->EmitCFIRestore(Inst.getRegister()); + OutStreamer->emitCFIRestore(Inst.getRegister()); + break; + case MCCFIInstruction::OpUndefined: + OutStreamer->emitCFIUndefined(Inst.getRegister()); break; } } @@ -256,7 +259,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const { Twine::utohexstr(Die.getOffset()) + ":0x" + Twine::utohexstr(Die.getSize()) + " " + dwarf::TagString(Die.getTag())); - EmitULEB128(Die.getAbbrevNumber()); + emitULEB128(Die.getAbbrevNumber()); // Emit the DIE attribute values. for (const auto &V : Die.values()) { @@ -271,7 +274,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const { } // Emit an attribute using the defined form. - V.EmitValue(this); + V.emitValue(this); } // Emit the DIE children if any. @@ -286,7 +289,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const { void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const { // Emit the abbreviations code (base 1 index.) - EmitULEB128(Abbrev.getNumber(), "Abbreviation Code"); + emitULEB128(Abbrev.getNumber(), "Abbreviation Code"); // Emit the abbreviations data. Abbrev.Emit(this); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index c631cc5360b8..538107cecd8b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -106,7 +106,7 @@ unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, +void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, const MCTargetOptions &MCOptions, const MDNode *LocMDNode, InlineAsm::AsmDialect Dialect) const { @@ -127,7 +127,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, if (!MCAI->useIntegratedAssembler() && !OutStreamer->isIntegratedAssemblerRequired()) { emitInlineAsmStart(); - OutStreamer->EmitRawText(Str); + OutStreamer->emitRawText(Str); emitInlineAsmEnd(STI, nullptr); return; } @@ -489,9 +489,9 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, OS << '\n' << (char)0; // null terminate string. } -/// EmitInlineAsm - This method formats and emits the specified machine -/// instruction that is an inline asm. -void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { +/// This method formats and emits the specified machine instruction that is an +/// inline asm. +void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); // Count the number of register definitions to find the asm string. @@ -584,7 +584,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); } - EmitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD, + emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't @@ -592,7 +592,6 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OutStreamer->emitRawComment(MAI->getInlineAsmEnd()); } - /// PrintSpecial - Print information related to the specified machine instr /// that is independent of the operand, and may be independent of the instr /// itself. This can be useful for portably encoding the comment character diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index 09f7496cd4ef..90929a217368 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -30,8 +30,9 @@ class ByteStreamer { public: // For now we're just handling the calls we need for dwarf emission/hashing. virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0; - virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; - virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0; + virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; + virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "", + unsigned PadTo = 0) = 0; }; class APByteStreamer final : public ByteStreamer { @@ -44,13 +45,14 @@ public: AP.OutStreamer->AddComment(Comment); AP.emitInt8(Byte); } - void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + void emitSLEB128(uint64_t DWord, const Twine &Comment) override { AP.OutStreamer->AddComment(Comment); - AP.EmitSLEB128(DWord); + AP.emitSLEB128(DWord); } - void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override { + void emitULEB128(uint64_t DWord, const Twine &Comment, + unsigned PadTo) override { AP.OutStreamer->AddComment(Comment); - AP.EmitULEB128(DWord); + AP.emitULEB128(DWord, nullptr, PadTo); } }; @@ -62,10 +64,11 @@ class HashingByteStreamer final : public ByteStreamer { void EmitInt8(uint8_t Byte, const Twine &Comment) override { Hash.update(Byte); } - void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + void emitSLEB128(uint64_t DWord, const Twine &Comment) override { Hash.addSLEB128(DWord); } - void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override { + void emitULEB128(uint64_t DWord, const Twine &Comment, + unsigned PadTo) override { Hash.addULEB128(DWord); } }; @@ -90,7 +93,7 @@ public: if (GenerateComments) Comments.push_back(Comment.str()); } - void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + void emitSLEB128(uint64_t DWord, const Twine &Comment) override { raw_svector_ostream OSE(Buffer); unsigned Length = encodeSLEB128(DWord, OSE); if (GenerateComments) { @@ -102,7 +105,8 @@ public: } } - void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override { + void emitULEB128(uint64_t DWord, const Twine &Comment, + unsigned PadTo) override { raw_svector_ostream OSE(Buffer); unsigned Length = encodeULEB128(DWord, OSE, PadTo); if (GenerateComments) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 62ad356e7f8f..3f053c7a38c7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -101,27 +101,27 @@ public: CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable) : OS(&OS), TypeTable(TypeTable) {} - void EmitBytes(StringRef Data) { OS->EmitBytes(Data); } + void emitBytes(StringRef Data) override { OS->emitBytes(Data); } - void EmitIntValue(uint64_t Value, unsigned Size) { - OS->EmitIntValueInHex(Value, Size); + void emitIntValue(uint64_t Value, unsigned Size) override { + OS->emitIntValueInHex(Value, Size); } - void EmitBinaryData(StringRef Data) { OS->EmitBinaryData(Data); } + void emitBinaryData(StringRef Data) override { OS->emitBinaryData(Data); } - void AddComment(const Twine &T) { OS->AddComment(T); } + void AddComment(const Twine &T) override { OS->AddComment(T); } - void AddRawComment(const Twine &T) { OS->emitRawComment(T); } + void AddRawComment(const Twine &T) override { OS->emitRawComment(T); } - bool isVerboseAsm() { return OS->isVerboseAsm(); } + bool isVerboseAsm() override { return OS->isVerboseAsm(); } - std::string getTypeName(TypeIndex TI) { + std::string getTypeName(TypeIndex TI) override { std::string TypeName; if (!TI.isNoneType()) { if (TI.isSimple()) - TypeName = TypeIndex::simpleTypeName(TI); + TypeName = std::string(TypeIndex::simpleTypeName(TI)); else - TypeName = TypeTable.getTypeName(TI); + TypeName = std::string(TypeTable.getTypeName(TI)); } return TypeName; } @@ -183,7 +183,7 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { if (Dir.startswith("/") || Filename.startswith("/")) { if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix)) return Filename; - Filepath = Dir; + Filepath = std::string(Dir); if (Dir.back() != '/') Filepath += '/'; Filepath += Filename; @@ -195,7 +195,7 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { // that would increase the IR size and probably not needed for other users. // For now, just concatenate and canonicalize the path here. if (Filename.find(':') == 1) - Filepath = Filename; + Filepath = std::string(Filename); else Filepath = (Dir + "\\" + Filename).str(); @@ -250,8 +250,15 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { ChecksumAsBytes = ArrayRef<uint8_t>( reinterpret_cast<const uint8_t *>(CKMem), Checksum.size()); switch (F->getChecksum()->Kind) { - case DIFile::CSK_MD5: CSKind = FileChecksumKind::MD5; break; - case DIFile::CSK_SHA1: CSKind = FileChecksumKind::SHA1; break; + case DIFile::CSK_MD5: + CSKind = FileChecksumKind::MD5; + break; + case DIFile::CSK_SHA1: + CSKind = FileChecksumKind::SHA1; + break; + case DIFile::CSK_SHA256: + CSKind = FileChecksumKind::SHA256; + break; } } bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes, @@ -303,12 +310,19 @@ static StringRef getPrettyScopeName(const DIScope *Scope) { return StringRef(); } -static const DISubprogram *getQualifiedNameComponents( +const DISubprogram *CodeViewDebug::collectParentScopeNames( const DIScope *Scope, SmallVectorImpl<StringRef> &QualifiedNameComponents) { const DISubprogram *ClosestSubprogram = nullptr; while (Scope != nullptr) { if (ClosestSubprogram == nullptr) ClosestSubprogram = dyn_cast<DISubprogram>(Scope); + + // If a type appears in a scope chain, make sure it gets emitted. The + // frontend will be responsible for deciding if this should be a forward + // declaration or a complete type. + if (const auto *Ty = dyn_cast<DICompositeType>(Scope)) + DeferredCompleteTypes.push_back(Ty); + StringRef ScopeName = getPrettyScopeName(Scope); if (!ScopeName.empty()) QualifiedNameComponents.push_back(ScopeName); @@ -317,24 +331,18 @@ static const DISubprogram *getQualifiedNameComponents( return ClosestSubprogram; } -static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents, +static std::string formatNestedName(ArrayRef<StringRef> QualifiedNameComponents, StringRef TypeName) { std::string FullyQualifiedName; for (StringRef QualifiedNameComponent : llvm::reverse(QualifiedNameComponents)) { - FullyQualifiedName.append(QualifiedNameComponent); + FullyQualifiedName.append(std::string(QualifiedNameComponent)); FullyQualifiedName.append("::"); } - FullyQualifiedName.append(TypeName); + FullyQualifiedName.append(std::string(TypeName)); return FullyQualifiedName; } -static std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name) { - SmallVector<StringRef, 5> QualifiedNameComponents; - getQualifiedNameComponents(Scope, QualifiedNameComponents); - return getQualifiedName(QualifiedNameComponents, Name); -} - struct CodeViewDebug::TypeLoweringScope { TypeLoweringScope(CodeViewDebug &CVD) : CVD(CVD) { ++CVD.TypeEmissionLevel; } ~TypeLoweringScope() { @@ -347,7 +355,18 @@ struct CodeViewDebug::TypeLoweringScope { CodeViewDebug &CVD; }; -static std::string getFullyQualifiedName(const DIScope *Ty) { +std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Scope, + StringRef Name) { + // Ensure types in the scope chain are emitted as soon as possible. + // This can create otherwise a situation where S_UDTs are emitted while + // looping in emitDebugInfoForUDTs. + TypeLoweringScope S(*this); + SmallVector<StringRef, 5> QualifiedNameComponents; + collectParentScopeNames(Scope, QualifiedNameComponents); + return formatNestedName(QualifiedNameComponents, Name); +} + +std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Ty) { const DIScope *Scope = Ty->getScope(); return getFullyQualifiedName(Scope, getPrettyScopeName(Ty)); } @@ -418,10 +437,11 @@ getFunctionOptions(const DISubroutineType *Ty, ReturnTy = TypeArray[0]; } - if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) { - if (isNonTrivial(ReturnDCTy)) + // Add CxxReturnUdt option to functions that return nontrivial record types + // or methods that return record types. + if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) + if (isNonTrivial(ReturnDCTy) || ClassTy) FO |= FunctionOptions::CxxReturnUdt; - } // DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison. if (ClassTy && isNonTrivial(ClassTy) && SPName == ClassTy->getName()) { @@ -543,15 +563,15 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, addLocIfNotPresent(CurFn->ChildSites, Loc); } - OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(), + OS.emitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(), /*PrologueEnd=*/false, /*IsStmt=*/false, DL->getFilename(), SMLoc()); } void CodeViewDebug::emitCodeViewMagicVersion() { - OS.EmitValueToAlignment(4); + OS.emitValueToAlignment(4); OS.AddComment("Debug section magic"); - OS.EmitIntValue(COFF::DEBUG_SECTION_MAGIC, 4); + OS.emitInt32(COFF::DEBUG_SECTION_MAGIC); } void CodeViewDebug::endModule() { @@ -600,11 +620,11 @@ void CodeViewDebug::endModule() { // This subsection holds a file index to offset in string table table. OS.AddComment("File index to string table offset subsection"); - OS.EmitCVFileChecksumsDirective(); + OS.emitCVFileChecksumsDirective(); // This subsection holds the string table. OS.AddComment("String table"); - OS.EmitCVStringTableDirective(); + OS.emitCVStringTableDirective(); // Emit S_BUILDINFO, which points to LF_BUILDINFO. Put this in its own symbol // subsection in the generic .debug$S section at the end. There is no @@ -631,7 +651,7 @@ emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, SmallString<32> NullTerminatedString( S.take_front(MaxRecordLength - MaxFixedRecordLength - 1)); NullTerminatedString.push_back('\0'); - OS.EmitBytes(NullTerminatedString); + OS.emitBytes(NullTerminatedString); } void CodeViewDebug::emitTypeInformation() { @@ -674,13 +694,13 @@ void CodeViewDebug::emitTypeGlobalHashes() { // hardcoded to version 0, SHA1. OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection()); - OS.EmitValueToAlignment(4); + OS.emitValueToAlignment(4); OS.AddComment("Magic"); - OS.EmitIntValue(COFF::DEBUG_HASHES_SECTION_MAGIC, 4); + OS.emitInt32(COFF::DEBUG_HASHES_SECTION_MAGIC); OS.AddComment("Section Version"); - OS.EmitIntValue(0, 2); + OS.emitInt16(0); OS.AddComment("Hash Algorithm"); - OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1_8), 2); + OS.emitInt16(uint16_t(GlobalTypeHashAlg::SHA1_8)); TypeIndex TI(TypeIndex::FirstNonSimpleIndex); for (const auto &GHR : TypeTable.hashes()) { @@ -696,7 +716,7 @@ void CodeViewDebug::emitTypeGlobalHashes() { assert(GHR.Hash.size() == 8); StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()), GHR.Hash.size()); - OS.EmitBinaryData(S); + OS.emitBinaryData(S); } } @@ -775,16 +795,16 @@ void CodeViewDebug::emitCompilerInformation() { // TODO: Figure out which other flags need to be set. OS.AddComment("Flags and language"); - OS.EmitIntValue(Flags, 4); + OS.emitInt32(Flags); OS.AddComment("CPUType"); - OS.EmitIntValue(static_cast<uint64_t>(TheCPU), 2); + OS.emitInt16(static_cast<uint64_t>(TheCPU)); StringRef CompilerVersion = CU->getProducer(); Version FrontVer = parseVersion(CompilerVersion); OS.AddComment("Frontend version"); for (int N = 0; N < 4; ++N) - OS.EmitIntValue(FrontVer.Part[N], 2); + OS.emitInt16(FrontVer.Part[N]); // Some Microsoft tools, like Binscope, expect a backend version number of at // least 8.something, so we'll coerce the LLVM version into a form that @@ -797,7 +817,7 @@ void CodeViewDebug::emitCompilerInformation() { Version BackVer = {{ Major, 0, 0, 0 }}; OS.AddComment("Backend version"); for (int N = 0; N < 4; ++N) - OS.EmitIntValue(BackVer.Part[N], 2); + OS.emitInt16(BackVer.Part[N]); OS.AddComment("Null-terminated compiler version string"); emitNullTerminatedSymbolName(OS, CompilerVersion); @@ -841,7 +861,7 @@ void CodeViewDebug::emitBuildInfo() { MCSymbol *BISubsecEnd = beginCVSubsection(DebugSubsectionKind::Symbols); MCSymbol *BIEnd = beginSymbolRecord(SymbolKind::S_BUILDINFO); OS.AddComment("LF_BUILDINFO index"); - OS.EmitIntValue(BuildInfoIndex.getIndex(), 4); + OS.emitInt32(BuildInfoIndex.getIndex()); endSymbolRecord(BIEnd); endCVSubsection(BISubsecEnd); } @@ -858,7 +878,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() { // for instance, will display a warning that the breakpoints are not valid if // the pdb does not match the source. OS.AddComment("Inlinee lines signature"); - OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4); + OS.emitInt32(unsigned(InlineeLinesSignature::Normal)); for (const DISubprogram *SP : InlinedSubprograms) { assert(TypeIndices.count({SP, nullptr})); @@ -870,11 +890,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() { SP->getFilename() + Twine(':') + Twine(SP->getLine())); OS.AddBlankLine(); OS.AddComment("Type index of inlined function"); - OS.EmitIntValue(InlineeIdx.getIndex(), 4); + OS.emitInt32(InlineeIdx.getIndex()); OS.AddComment("Offset into filechecksum table"); - OS.EmitCVFileChecksumOffsetDirective(FileId); + OS.emitCVFileChecksumOffsetDirective(FileId); OS.AddComment("Starting line number"); - OS.EmitIntValue(SP->getLine(), 4); + OS.emitInt32(SP->getLine()); } endCVSubsection(InlineEnd); @@ -890,16 +910,16 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, MCSymbol *InlineEnd = beginSymbolRecord(SymbolKind::S_INLINESITE); OS.AddComment("PtrParent"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("PtrEnd"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("Inlinee type index"); - OS.EmitIntValue(InlineeIdx.getIndex(), 4); + OS.emitInt32(InlineeIdx.getIndex()); unsigned FileId = maybeRecordFile(Site.Inlinee->getFile()); unsigned StartLineNum = Site.Inlinee->getLine(); - OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum, + OS.emitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum, FI.Begin, FI.End); endSymbolRecord(InlineEnd); @@ -943,7 +963,8 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) { void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, FunctionInfo &FI, const MCSymbol *Fn) { - std::string FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); + std::string FuncName = + std::string(GlobalValue::dropLLVMManglingEscape(GV->getName())); const ThunkOrdinal ordinal = ThunkOrdinal::Standard; // Only supported kind. OS.AddComment("Symbol subsection for " + Twine(FuncName)); @@ -952,11 +973,11 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, // Emit S_THUNK32 MCSymbol *ThunkRecordEnd = beginSymbolRecord(SymbolKind::S_THUNK32); OS.AddComment("PtrParent"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("PtrEnd"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("PtrNext"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("Thunk section relative address"); OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); OS.AddComment("Thunk section index"); @@ -964,7 +985,7 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, OS.AddComment("Code size"); OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2); OS.AddComment("Ordinal"); - OS.EmitIntValue(unsigned(ordinal), 1); + OS.emitInt8(unsigned(ordinal)); OS.AddComment("Function name"); emitNullTerminatedSymbolName(OS, FuncName); // Additional fields specific to the thunk ordinal would go here. @@ -1006,7 +1027,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // If our DISubprogram name is empty, use the mangled name. if (FuncName.empty()) - FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); + FuncName = std::string(GlobalValue::dropLLVMManglingEscape(GV->getName())); // Emit FPO data, but only on 32-bit x86. No other platforms use it. if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86) @@ -1022,27 +1043,27 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // These fields are filled in by tools like CVPACK which run after the fact. OS.AddComment("PtrParent"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("PtrEnd"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("PtrNext"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); // This is the important bit that tells the debugger where the function // code is located and what's its size: OS.AddComment("Code size"); OS.emitAbsoluteSymbolDiff(FI.End, Fn, 4); OS.AddComment("Offset after prologue"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("Offset before epilogue"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("Function type index"); - OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4); + OS.emitInt32(getFuncIdForSubprogram(GV->getSubprogram()).getIndex()); OS.AddComment("Function section relative address"); OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); OS.AddComment("Function section index"); OS.EmitCOFFSectionIndex(Fn); OS.AddComment("Flags"); - OS.EmitIntValue(0, 1); + OS.emitInt8(0); // Emit the function display name as a null-terminated string. OS.AddComment("Function name"); // Truncate the name so we won't overflow the record length field. @@ -1052,19 +1073,19 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, MCSymbol *FrameProcEnd = beginSymbolRecord(SymbolKind::S_FRAMEPROC); // Subtract out the CSR size since MSVC excludes that and we include it. OS.AddComment("FrameSize"); - OS.EmitIntValue(FI.FrameSize - FI.CSRSize, 4); + OS.emitInt32(FI.FrameSize - FI.CSRSize); OS.AddComment("Padding"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("Offset of padding"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("Bytes of callee saved registers"); - OS.EmitIntValue(FI.CSRSize, 4); + OS.emitInt32(FI.CSRSize); OS.AddComment("Exception handler offset"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); OS.AddComment("Exception handler section"); - OS.EmitIntValue(0, 2); + OS.emitInt16(0); OS.AddComment("Flags (defines frame register)"); - OS.EmitIntValue(uint32_t(FI.FrameProcOpts), 4); + OS.emitInt32(uint32_t(FI.FrameProcOpts)); endSymbolRecord(FrameProcEnd); emitLocalVariableList(FI, FI.Locals); @@ -1088,13 +1109,13 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.EmitCOFFSecRel32(Label, /*Offset=*/0); // FIXME: Make sure we don't overflow the max record size. OS.EmitCOFFSectionIndex(Label); - OS.EmitIntValue(Strs->getNumOperands(), 2); + OS.emitInt16(Strs->getNumOperands()); for (Metadata *MD : Strs->operands()) { // MDStrings are null terminated, so we can do EmitBytes and get the // nice .asciz directive. StringRef Str = cast<MDString>(MD)->getString(); assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString"); - OS.EmitBytes(StringRef(Str.data(), Str.size() + 1)); + OS.emitBytes(StringRef(Str.data(), Str.size() + 1)); } endSymbolRecord(AnnotEnd); } @@ -1111,7 +1132,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.AddComment("Call instruction length"); OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2); OS.AddComment("Type index"); - OS.EmitIntValue(getCompleteTypeIndex(DITy).getIndex(), 4); + OS.emitInt32(getCompleteTypeIndex(DITy).getIndex()); endSymbolRecord(HeapAllocEnd); } @@ -1124,7 +1145,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, endCVSubsection(SymbolsEnd); // We have an assembler directive that takes care of the whole line table. - OS.EmitCVLinetableDirective(FI.FuncId, Fn, FI.End); + OS.emitCVLinetableDirective(FI.FuncId, Fn, FI.End); } CodeViewDebug::LocalVarDefRange @@ -1173,7 +1194,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable( } // Get the frame register used and the offset. - unsigned FrameReg = 0; + Register FrameReg; int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg); uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg); @@ -1468,12 +1489,12 @@ void CodeViewDebug::addToUDTs(const DIType *Ty) { if (!shouldEmitUdt(Ty)) return; - SmallVector<StringRef, 5> QualifiedNameComponents; + SmallVector<StringRef, 5> ParentScopeNames; const DISubprogram *ClosestSubprogram = - getQualifiedNameComponents(Ty->getScope(), QualifiedNameComponents); + collectParentScopeNames(Ty->getScope(), ParentScopeNames); std::string FullyQualifiedName = - getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty)); + formatNestedName(ParentScopeNames, getPrettyScopeName(Ty)); if (ClosestSubprogram == nullptr) { GlobalUDTs.emplace_back(std::move(FullyQualifiedName), Ty); @@ -1571,7 +1592,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { assert(Element->getTag() == dwarf::DW_TAG_subrange_type); const DISubrange *Subrange = cast<DISubrange>(Element); - assert(Subrange->getLowerBound() == 0 && + assert(!Subrange->getRawLowerBound() && "codeview doesn't support subranges with lower bounds"); int64_t Count = -1; if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>()) @@ -1767,11 +1788,12 @@ translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) { TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty, PointerOptions PO) { assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type); + bool IsPMF = isa<DISubroutineType>(Ty->getBaseType()); TypeIndex ClassTI = getTypeIndex(Ty->getClassType()); - TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType()); + TypeIndex PointeeTI = + getTypeIndex(Ty->getBaseType(), IsPMF ? Ty->getClassType() : nullptr); PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64 : PointerKind::Near32; - bool IsPMF = isa<DISubroutineType>(Ty->getBaseType()); PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction : PointerMode::PointerToDataMember; @@ -2063,7 +2085,7 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { // order, which is what MSVC does. if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) { EnumeratorRecord ER(MemberAccess::Public, - APSInt::getUnsigned(Enumerator->getValue()), + APSInt(Enumerator->getValue(), true), Enumerator->getName()); ContinuationBuilder.writeMemberType(ER); EnumeratorCount++; @@ -2248,7 +2270,7 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { // MSVC appears to set this flag by searching any destructor or method with // FunctionOptions::Constructor among the emitted members. Clang AST has all - // the members, however special member functions are not yet emitted into + // the members, however special member functions are not yet emitted into // debug information. For now checking a class's non-triviality seems enough. // FIXME: not true for a nested unnamed struct. if (isNonTrivial(Ty)) @@ -2625,9 +2647,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, TypeIndex TI = Var.UseReferenceType ? getTypeIndexForReferenceTo(Var.DIVar->getType()) : getCompleteTypeIndex(Var.DIVar->getType()); - OS.EmitIntValue(TI.getIndex(), 4); + OS.emitInt32(TI.getIndex()); OS.AddComment("Flags"); - OS.EmitIntValue(static_cast<uint16_t>(Flags), 2); + OS.emitInt16(static_cast<uint16_t>(Flags)); // Truncate the name so we won't overflow the record length field. emitNullTerminatedSymbolName(OS, Var.DIVar->getName()); endSymbolRecord(LocalEnd); @@ -2660,7 +2682,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, : (EncFP == FI.EncodedLocalFramePtrReg))) { DefRangeFramePointerRelHeader DRHdr; DRHdr.Offset = Offset; - OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { uint16_t RegRelFlags = 0; if (DefRange.IsSubfield) { @@ -2672,7 +2694,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, DRHdr.Register = Reg; DRHdr.Flags = RegRelFlags; DRHdr.BasePointerOffset = Offset; - OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); } } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); @@ -2681,12 +2703,12 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; DRHdr.OffsetInParent = DefRange.StructOffset; - OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { DefRangeRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; - OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); } } } @@ -2704,9 +2726,9 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, const FunctionInfo& FI) { MCSymbol *RecordEnd = beginSymbolRecord(SymbolKind::S_BLOCK32); OS.AddComment("PtrParent"); - OS.EmitIntValue(0, 4); // PtrParent + OS.emitInt32(0); // PtrParent OS.AddComment("PtrEnd"); - OS.EmitIntValue(0, 4); // PtrEnd + OS.emitInt32(0); // PtrEnd OS.AddComment("Code size"); OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size OS.AddComment("Function section relative address"); @@ -2914,17 +2936,17 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) { MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(), *EndLabel = MMI->getContext().createTempSymbol(); - OS.EmitIntValue(unsigned(Kind), 4); + OS.emitInt32(unsigned(Kind)); OS.AddComment("Subsection size"); OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4); - OS.EmitLabel(BeginLabel); + OS.emitLabel(BeginLabel); return EndLabel; } void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) { - OS.EmitLabel(EndLabel); + OS.emitLabel(EndLabel); // Every subsection must be aligned to a 4-byte boundary. - OS.EmitValueToAlignment(4); + OS.emitValueToAlignment(4); } static StringRef getSymbolName(SymbolKind SymKind) { @@ -2939,10 +2961,10 @@ MCSymbol *CodeViewDebug::beginSymbolRecord(SymbolKind SymKind) { *EndLabel = MMI->getContext().createTempSymbol(); OS.AddComment("Record length"); OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2); - OS.EmitLabel(BeginLabel); + OS.emitLabel(BeginLabel); if (OS.isVerboseAsm()) OS.AddComment("Record kind: " + getSymbolName(SymKind)); - OS.EmitIntValue(unsigned(SymKind), 2); + OS.emitInt16(unsigned(SymKind)); return EndLabel; } @@ -2951,27 +2973,31 @@ void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) { // an extra copy of every symbol record in LLD. This increases object file // size by less than 1% in the clang build, and is compatible with the Visual // C++ linker. - OS.EmitValueToAlignment(4); - OS.EmitLabel(SymEnd); + OS.emitValueToAlignment(4); + OS.emitLabel(SymEnd); } void CodeViewDebug::emitEndSymbolRecord(SymbolKind EndKind) { OS.AddComment("Record length"); - OS.EmitIntValue(2, 2); + OS.emitInt16(2); if (OS.isVerboseAsm()) OS.AddComment("Record kind: " + getSymbolName(EndKind)); - OS.EmitIntValue(unsigned(EndKind), 2); // Record Kind + OS.emitInt16(uint16_t(EndKind)); // Record Kind } void CodeViewDebug::emitDebugInfoForUDTs( - ArrayRef<std::pair<std::string, const DIType *>> UDTs) { + const std::vector<std::pair<std::string, const DIType *>> &UDTs) { +#ifndef NDEBUG + size_t OriginalSize = UDTs.size(); +#endif for (const auto &UDT : UDTs) { const DIType *T = UDT.second; assert(shouldEmitUdt(T)); - MCSymbol *UDTRecordEnd = beginSymbolRecord(SymbolKind::S_UDT); OS.AddComment("Type"); - OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4); + OS.emitInt32(getCompleteTypeIndex(T).getIndex()); + assert(OriginalSize == UDTs.size() && + "getCompleteTypeIndex found new UDTs!"); emitNullTerminatedSymbolName(OS, UDT.first); endSymbolRecord(UDTRecordEnd); } @@ -3075,6 +3101,14 @@ void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) { void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { const DIGlobalVariable *DIGV = CVGV.DIGV; + + const DIScope *Scope = DIGV->getScope(); + // For static data members, get the scope from the declaration. + if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>( + DIGV->getRawStaticDataMemberDeclaration())) + Scope = MemberDecl->getScope(); + std::string QualifiedName = getFullyQualifiedName(Scope, DIGV->getName()); + if (const GlobalVariable *GV = CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) { // DataSym record, see SymbolRecord.h for more info. Thread local data @@ -3087,18 +3121,16 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { : SymbolKind::S_GDATA32); MCSymbol *DataEnd = beginSymbolRecord(DataSym); OS.AddComment("Type"); - OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4); + OS.emitInt32(getCompleteTypeIndex(DIGV->getType()).getIndex()); OS.AddComment("DataOffset"); OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0); OS.AddComment("Segment"); OS.EmitCOFFSectionIndex(GVSym); OS.AddComment("Name"); const unsigned LengthOfDataRecord = 12; - emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord); + emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord); endSymbolRecord(DataEnd); } else { - // FIXME: Currently this only emits the global variables in the IR metadata. - // This should also emit enums and static data members. const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>(); assert(DIE->isConstant() && "Global constant variables must contain a constant expression."); @@ -3106,7 +3138,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT); OS.AddComment("Type"); - OS.EmitIntValue(getTypeIndex(DIGV->getType()).getIndex(), 4); + OS.emitInt32(getTypeIndex(DIGV->getType()).getIndex()); OS.AddComment("Value"); // Encoded integers shouldn't need more than 10 bytes. @@ -3115,16 +3147,10 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { CodeViewRecordIO IO(Writer); cantFail(IO.mapEncodedInteger(Val)); StringRef SRef((char *)data, Writer.getOffset()); - OS.EmitBinaryData(SRef); + OS.emitBinaryData(SRef); OS.AddComment("Name"); - const DIScope *Scope = DIGV->getScope(); - // For static data members, get the scope from the declaration. - if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>( - DIGV->getRawStaticDataMemberDeclaration())) - Scope = MemberDecl->getScope(); - emitNullTerminatedSymbolName(OS, - getFullyQualifiedName(Scope, DIGV->getName())); + emitNullTerminatedSymbolName(OS, QualifiedName); endSymbolRecord(SConstantEnd); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index b56b9047e1a9..82f0293874d0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -310,8 +310,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitDebugInfoForRetainedTypes(); - void - emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs); + void emitDebugInfoForUDTs( + const std::vector<std::pair<std::string, const DIType *>> &UDTs); void emitDebugInfoForGlobals(); void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals); @@ -443,6 +443,15 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex TI, const DIType *ClassTy = nullptr); + /// Collect the names of parent scopes, innermost to outermost. Return the + /// innermost subprogram scope if present. Ensure that parent type scopes are + /// inserted into the type table. + const DISubprogram * + collectParentScopeNames(const DIScope *Scope, + SmallVectorImpl<StringRef> &ParentScopeNames); + std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name); + std::string getFullyQualifiedName(const DIScope *Scope); + unsigned getPointerSizeInBytes(); protected: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 84b86a71fa5f..edf82fbed650 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -67,17 +67,17 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { /// void DIEAbbrev::Emit(const AsmPrinter *AP) const { // Emit its Dwarf tag type. - AP->EmitULEB128(Tag, dwarf::TagString(Tag).data()); + AP->emitULEB128(Tag, dwarf::TagString(Tag).data()); // Emit whether it has children DIEs. - AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data()); + AP->emitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data()); // For each attribute description. for (unsigned i = 0, N = Data.size(); i < N; ++i) { const DIEAbbrevData &AttrData = Data[i]; // Emit attribute type. - AP->EmitULEB128(AttrData.getAttribute(), + AP->emitULEB128(AttrData.getAttribute(), dwarf::AttributeString(AttrData.getAttribute()).data()); // Emit form type. @@ -92,17 +92,17 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { llvm_unreachable("Invalid form for specified DWARF version"); } #endif - AP->EmitULEB128(AttrData.getForm(), + AP->emitULEB128(AttrData.getForm(), dwarf::FormEncodingString(AttrData.getForm()).data()); // Emit value for DW_FORM_implicit_const. if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) - AP->EmitSLEB128(AttrData.getValue()); + AP->emitSLEB128(AttrData.getValue()); } // Mark end of abbreviation. - AP->EmitULEB128(0, "EOM(1)"); - AP->EmitULEB128(0, "EOM(2)"); + AP->emitULEB128(0, "EOM(1)"); + AP->emitULEB128(0, "EOM(2)"); } LLVM_DUMP_METHOD @@ -325,13 +325,13 @@ DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag) "expected a unit TAG"); } -void DIEValue::EmitValue(const AsmPrinter *AP) const { +void DIEValue::emitValue(const AsmPrinter *AP) const { switch (Ty) { case isNone: llvm_unreachable("Expected valid DIEValue"); #define HANDLE_DIEVALUE(T) \ case is##T: \ - getDIE##T().EmitValue(AP, Form); \ + getDIE##T().emitValue(AP, Form); \ break; #include "llvm/CodeGen/DIEValue.def" } @@ -374,7 +374,7 @@ LLVM_DUMP_METHOD void DIEValue::dump() const { /// EmitValue - Emit integer of appropriate size. /// -void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { +void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_implicit_const: case dwarf::DW_FORM_flag_present: @@ -409,7 +409,7 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_strp_sup: case dwarf::DW_FORM_addr: case dwarf::DW_FORM_ref_addr: - Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form)); + Asm->OutStreamer->emitIntValue(Integer, SizeOf(Asm, Form)); return; case dwarf::DW_FORM_GNU_str_index: case dwarf::DW_FORM_GNU_addr_index: @@ -418,10 +418,10 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_addrx: case dwarf::DW_FORM_rnglistx: case dwarf::DW_FORM_udata: - Asm->EmitULEB128(Integer); + Asm->emitULEB128(Integer); return; case dwarf::DW_FORM_sdata: - Asm->EmitSLEB128(Integer); + Asm->emitSLEB128(Integer); return; default: llvm_unreachable("DIE Value form not supported yet"); } @@ -465,8 +465,8 @@ void DIEInteger::print(raw_ostream &O) const { /// EmitValue - Emit expression value. /// -void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->EmitDebugValue(Expr, SizeOf(AP, Form)); +void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { + AP->emitDebugValue(Expr, SizeOf(AP, Form)); } /// SizeOf - Determine size of expression value in bytes. @@ -487,12 +487,11 @@ void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; } /// EmitValue - Emit label value. /// -void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->EmitLabelReference(Label, SizeOf(AP, Form), - Form == dwarf::DW_FORM_strp || - Form == dwarf::DW_FORM_sec_offset || - Form == dwarf::DW_FORM_ref_addr || - Form == dwarf::DW_FORM_data4); +void DIELabel::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { + AP->emitLabelReference( + Label, SizeOf(AP, Form), + Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset || + Form == dwarf::DW_FORM_ref_addr || Form == dwarf::DW_FORM_data4); } /// SizeOf - Determine size of label value in bytes. @@ -511,10 +510,10 @@ void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } // DIEBaseTypeRef Implementation //===----------------------------------------------------------------------===// -void DIEBaseTypeRef::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { +void DIEBaseTypeRef::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { uint64_t Offset = CU->ExprRefedBaseTypes[Index].Die->getOffset(); assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); - AP->EmitULEB128(Offset, nullptr, ULEB128PadSize); + AP->emitULEB128(Offset, nullptr, ULEB128PadSize); } unsigned DIEBaseTypeRef::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { @@ -530,8 +529,8 @@ void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index /// EmitValue - Emit delta value. /// -void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); +void DIEDelta::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { + AP->emitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); } /// SizeOf - Determine size of delta value in bytes. @@ -554,7 +553,7 @@ void DIEDelta::print(raw_ostream &O) const { /// EmitValue - Emit string value. /// -void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { +void DIEString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { // Index of string in symbol table. switch (Form) { case dwarf::DW_FORM_GNU_str_index: @@ -563,13 +562,13 @@ void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_strx2: case dwarf::DW_FORM_strx3: case dwarf::DW_FORM_strx4: - DIEInteger(S.getIndex()).EmitValue(AP, Form); + DIEInteger(S.getIndex()).emitValue(AP, Form); return; case dwarf::DW_FORM_strp: if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) - DIELabel(S.getSymbol()).EmitValue(AP, Form); + DIELabel(S.getSymbol()).emitValue(AP, Form); else - DIEInteger(S.getOffset()).EmitValue(AP, Form); + DIEInteger(S.getOffset()).emitValue(AP, Form); return; default: llvm_unreachable("Expected valid string form"); @@ -605,9 +604,9 @@ void DIEString::print(raw_ostream &O) const { //===----------------------------------------------------------------------===// // DIEInlineString Implementation //===----------------------------------------------------------------------===// -void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { +void DIEInlineString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_string) { - AP->OutStreamer->EmitBytes(S); + AP->OutStreamer->emitBytes(S); AP->emitInt8(0); return; } @@ -630,18 +629,18 @@ void DIEInlineString::print(raw_ostream &O) const { /// EmitValue - Emit debug information entry offset. /// -void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { +void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_ref1: case dwarf::DW_FORM_ref2: case dwarf::DW_FORM_ref4: case dwarf::DW_FORM_ref8: - AP->OutStreamer->EmitIntValue(Entry->getOffset(), SizeOf(AP, Form)); + AP->OutStreamer->emitIntValue(Entry->getOffset(), SizeOf(AP, Form)); return; case dwarf::DW_FORM_ref_udata: - AP->EmitULEB128(Entry->getOffset()); + AP->emitULEB128(Entry->getOffset()); return; case dwarf::DW_FORM_ref_addr: { @@ -649,11 +648,11 @@ void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { unsigned Addr = Entry->getDebugSectionOffset(); if (const MCSymbol *SectionSym = Entry->getUnit()->getCrossSectionRelativeBaseAddress()) { - AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); + AP->emitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); return; } - AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form)); + AP->OutStreamer->emitIntValue(Addr, SizeOf(AP, Form)); return; } default: @@ -711,7 +710,7 @@ unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { /// EmitValue - Emit location data. /// -void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { +void DIELoc::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break; @@ -719,11 +718,12 @@ void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break; case dwarf::DW_FORM_block: case dwarf::DW_FORM_exprloc: - Asm->EmitULEB128(Size); break; + Asm->emitULEB128(Size); + break; } for (const auto &V : values()) - V.EmitValue(Asm); + V.emitValue(Asm); } /// SizeOf - Determine size of location data in bytes. @@ -762,19 +762,21 @@ unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { /// EmitValue - Emit block data. /// -void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { +void DIEBlock::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break; case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break; case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break; - case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; + case dwarf::DW_FORM_block: + Asm->emitULEB128(Size); + break; case dwarf::DW_FORM_string: break; case dwarf::DW_FORM_data16: break; } for (const auto &V : values()) - V.EmitValue(Asm); + V.emitValue(Asm); } /// SizeOf - Determine size of block data in bytes. @@ -811,9 +813,9 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { /// EmitValue - Emit label value. /// -void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { +void DIELocList::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_loclistx) { - AP->EmitULEB128(Index); + AP->emitULEB128(Index); return; } DwarfDebug *DD = AP->getDwarfDebug(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index bfac8850a2a6..f26ef63eedec 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -17,10 +17,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/DIE.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -224,8 +222,9 @@ void DIEHash::hashLocList(const DIELocList &LocList) { HashingByteStreamer Streamer(*this); DwarfDebug &DD = *AP->getDwarfDebug(); const DebugLocStream &Locs = DD.getDebugLocs(); - for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue()))) - DD.emitDebugLocEntry(Streamer, Entry, nullptr); + const DebugLocStream::List &List = Locs.getList(LocList.getValue()); + for (const DebugLocStream::Entry &Entry : Locs.getEntries(List)) + DD.emitDebugLocEntry(Streamer, Entry, List.CU); } // Hash an individual attribute \param Attr based on the type of attribute and @@ -361,7 +360,7 @@ void DIEHash::computeHash(const DIE &Die) { for (auto &C : Die.children()) { // 7.27 Step 7 // If C is a nested type entry or a member function entry, ... - if (isType(C.getTag()) || C.getTag() == dwarf::DW_TAG_subprogram) { + if (isType(C.getTag()) || (C.getTag() == dwarf::DW_TAG_subprogram && isType(C.getParent()->getTag()))) { StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name); // ... and has a DW_AT_name attribute if (!Name.empty()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 2e49514c98be..1a69f6772873 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -20,7 +20,6 @@ namespace llvm { class AsmPrinter; -class CompileUnit; /// An object containing the capability of hashing and adding hash /// attributes onto a DIE. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 170fc8b6d49f..584b7614915d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -47,7 +47,8 @@ static Register isDescribedByReg(const MachineInstr &MI) { return 0; // If location of variable is described using a register (directly or // indirectly), this register is always a first operand. - return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register(); + return MI.getDebugOperand(0).isReg() ? MI.getDebugOperand(0).getReg() + : Register(); } bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 22f458e4b03e..880791a06d93 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -32,9 +32,9 @@ DbgVariableLocation::extractFromMachineInstruction( DbgVariableLocation Location; if (!Instruction.isDebugValue()) return None; - if (!Instruction.getOperand(0).isReg()) + if (!Instruction.getDebugOperand(0).isReg()) return None; - Location.Register = Instruction.getOperand(0).getReg(); + Location.Register = Instruction.getDebugOperand(0).getReg(); Location.FragmentInfo.reset(); // We only handle expressions generated by DIExpression::appendOffset, // which doesn't require a full stack machine. @@ -124,21 +124,6 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) { return LabelsAfterInsn.lookup(MI); } -// Return the function-local offset of an instruction. -const MCExpr * -DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) { - MCContext &MC = Asm->OutContext; - - MCSymbol *Start = Asm->getFunctionBegin(); - const auto *StartRef = MCSymbolRefExpr::create(Start, MC); - - MCSymbol *AfterInsn = getLabelAfterInsn(MI); - assert(AfterInsn && "Expected label after instruction"); - const auto *AfterRef = MCSymbolRefExpr::create(AfterInsn, MC); - - return MCBinaryExpr::createSub(AfterRef, StartRef, MC); -} - /// If this type is derived from a base type then return base type size. uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { assert(Ty); @@ -215,7 +200,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { continue; auto IsDescribedByReg = [](const MachineInstr *MI) { - return MI->getOperand(0).isReg() && MI->getOperand(0).getReg(); + return MI->getDebugOperand(0).isReg() && MI->getDebugOperand(0).getReg(); }; // The first mention of a function argument gets the CurrentFnBegin label, @@ -297,7 +282,7 @@ void DebugHandlerBase::beginInstruction(const MachineInstr *MI) { if (!PrevLabel) { PrevLabel = MMI->getContext().createTempSymbol(); - Asm->OutStreamer->EmitLabel(PrevLabel); + Asm->OutStreamer->emitLabel(PrevLabel); } I->second = PrevLabel; } @@ -329,7 +314,7 @@ void DebugHandlerBase::endInstruction() { // We need a label after this instruction. if (!PrevLabel) { PrevLabel = MMI->getContext().createTempSymbol(); - Asm->OutStreamer->EmitLabel(PrevLabel); + Asm->OutStreamer->emitLabel(PrevLabel); } I->second = PrevLabel; } @@ -342,3 +327,17 @@ void DebugHandlerBase::endFunction(const MachineFunction *MF) { LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); } + +void DebugHandlerBase::beginBasicBlock(const MachineBasicBlock &MBB) { + if (!MBB.isBeginSection()) + return; + + PrevLabel = MBB.getSymbol(); +} + +void DebugHandlerBase::endBasicBlock(const MachineBasicBlock &MBB) { + if (!MBB.isEndSection()) + return; + + PrevLabel = nullptr; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index facbf22946e4..11ed1062f77e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -47,8 +47,8 @@ void DwarfCFIExceptionBase::markFunctionEnd() { } void DwarfCFIExceptionBase::endFragment() { - if (shouldEmitCFI) - Asm->OutStreamer->EmitCFIEndProc(); + if (shouldEmitCFI && !Asm->MF->hasBBSections()) + Asm->OutStreamer->emitCFIEndProc(); } DwarfCFIException::DwarfCFIException(AsmPrinter *A) @@ -133,13 +133,13 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, if (!hasEmittedCFISections) { if (Asm->needsOnlyDebugCFIMoves()) - Asm->OutStreamer->EmitCFISections(false, true); + Asm->OutStreamer->emitCFISections(false, true); else if (Asm->TM.Options.ForceDwarfFrameSection) - Asm->OutStreamer->EmitCFISections(true, true); + Asm->OutStreamer->emitCFISections(true, true); hasEmittedCFISections = true; } - Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false); + Asm->OutStreamer->emitCFIStartProc(/*IsSimple=*/false); // Indicate personality routine, if any. if (!shouldEmitPersonality) @@ -157,11 +157,11 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(P, Asm->TM, MMI); - Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding); + Asm->OutStreamer->emitCFIPersonality(Sym, PerEncoding); // Provide LSDA information. if (shouldEmitLSDA) - Asm->OutStreamer->EmitCFILsda(ESP(Asm), TLOF.getLSDAEncoding()); + Asm->OutStreamer->emitCFILsda(ESP(Asm), TLOF.getLSDAEncoding()); } /// endFunction - Gather and emit post-function exception information. @@ -172,3 +172,12 @@ void DwarfCFIException::endFunction(const MachineFunction *MF) { emitExceptionTable(); } + +void DwarfCFIException::beginBasicBlock(const MachineBasicBlock &MBB) { + beginFragment(&MBB, getExceptionSym); +} + +void DwarfCFIException::endBasicBlock(const MachineBasicBlock &MBB) { + if (shouldEmitCFI) + Asm->OutStreamer->emitCFIEndProc(); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index e97bcd62e8c7..296c380ae550 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -37,6 +37,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Casting.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -113,8 +114,9 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) { // extend .file to support this. unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID(); if (!File) - return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", None, None, CUID); - return Asm->OutStreamer->EmitDwarfFileDirective( + return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None, + CUID); + return Asm->OutStreamer->emitDwarfFileDirective( 0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File), File->getSource(), CUID); } @@ -154,7 +156,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( DeclContext = GV->getScope(); // Add name and type. addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); - addType(*VariableDIE, GTy); + if (GTy) + addType(*VariableDIE, GTy); // Add scoping info. if (!GV->isLocalToUnit()) @@ -328,6 +331,8 @@ DIE *DwarfCompileUnit::getOrCreateCommonBlock( } void DwarfCompileUnit::addRange(RangeSpan Range) { + DD->insertSectionLabel(Range.Begin); + bool SameAsPrevCU = this == DD->getPrevCU(); DD->setPrevCU(this); // If we have no current ranges just add the range and return, otherwise, @@ -348,8 +353,6 @@ void DwarfCompileUnit::initStmtList() { if (CUNode->isDebugDirectivesOnly()) return; - // Define start line table label for each Compile Unit. - MCSymbol *LineTableStartSym; const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); if (DD->useSectionsAsReferences()) { LineTableStartSym = TLOF.getDwarfLineSection()->getBeginSymbol(); @@ -363,13 +366,14 @@ void DwarfCompileUnit::initStmtList() { // left in the skeleton CU and so not included. // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. - StmtListValue = addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym, TLOF.getDwarfLineSection()->getBeginSymbol()); } void DwarfCompileUnit::applyStmtList(DIE &D) { - D.addValue(DIEValueAllocator, *StmtListValue); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + addSectionLabel(D, dwarf::DW_AT_stmt_list, LineTableStartSym, + TLOF.getDwarfLineSection()->getBeginSymbol()); } void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, @@ -392,7 +396,14 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); - attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd()); + SmallVector<RangeSpan, 2> BB_List; + // If basic block sections are on, ranges for each basic block section has + // to be emitted separately. + for (const auto &R : Asm->MBBSectionRanges) + BB_List.push_back({R.second.BeginLabel, R.second.EndLabel}); + + attachRangesOrLowHighPC(*SPDie, BB_List); + if (DD->useAppleExtensionAttributes() && !DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( *DD->getCurrentFunction())) @@ -400,15 +411,60 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { // Only include DW_AT_frame_base in full debug info if (!includeMinimalInlineScopes()) { - if (Asm->MF->getTarget().getTargetTriple().isNVPTX()) { + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); + TargetFrameLowering::DwarfFrameBase FrameBase = + TFI->getDwarfFrameBase(*Asm->MF); + switch (FrameBase.Kind) { + case TargetFrameLowering::DwarfFrameBase::Register: { + if (Register::isPhysicalRegister(FrameBase.Location.Reg)) { + MachineLocation Location(FrameBase.Location.Reg); + addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); + } + break; + } + case TargetFrameLowering::DwarfFrameBase::CFA: { DIELoc *Loc = new (DIEValueAllocator) DIELoc; addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa); addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc); - } else { - const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); - MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - if (Register::isPhysicalRegister(Location.getReg())) - addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); + break; + } + case TargetFrameLowering::DwarfFrameBase::WasmFrameBase: { + // FIXME: duplicated from Target/WebAssembly/WebAssembly.h + // don't want to depend on target specific headers in this code? + const unsigned TI_GLOBAL_RELOC = 3; + if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) { + // These need to be relocatable. + assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far. + auto SPSym = cast<MCSymbolWasm>( + Asm->GetExternalSymbolSymbol("__stack_pointer")); + // FIXME: this repeats what WebAssemblyMCInstLower:: + // GetExternalSymbolSymbol does, since if there's no code that + // refers to this symbol, we have to set it here. + SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); + SPSym->setGlobalType(wasm::WasmGlobalType{ + uint8_t(Asm->getSubtargetInfo().getTargetTriple().getArch() == + Triple::wasm64 + ? wasm::WASM_TYPE_I64 + : wasm::WASM_TYPE_I32), + true}); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location); + addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind); + addLabel(*Loc, dwarf::DW_FORM_udata, SPSym); + DD->addArangeLabel(SymbolCU(this, SPSym)); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc); + } else { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + DIExpressionCursor Cursor({}); + DwarfExpr.addWasmLocation(FrameBase.Location.WasmLoc.Kind, + FrameBase.Location.WasmLoc.Index); + DwarfExpr.addExpression(std::move(Cursor)); + addBlock(*SPDie, dwarf::DW_AT_frame_base, DwarfExpr.finalize()); + } + break; + } } } @@ -521,9 +577,33 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) { SmallVector<RangeSpan, 2> List; List.reserve(Ranges.size()); - for (const InsnRange &R : Ranges) - List.push_back( - {DD->getLabelBeforeInsn(R.first), DD->getLabelAfterInsn(R.second)}); + for (const InsnRange &R : Ranges) { + auto *BeginLabel = DD->getLabelBeforeInsn(R.first); + auto *EndLabel = DD->getLabelAfterInsn(R.second); + + const auto *BeginMBB = R.first->getParent(); + const auto *EndMBB = R.second->getParent(); + + const auto *MBB = BeginMBB; + // Basic block sections allows basic block subsets to be placed in unique + // sections. For each section, the begin and end label must be added to the + // list. If there is more than one range, debug ranges must be used. + // Otherwise, low/high PC can be used. + // FIXME: Debug Info Emission depends on block order and this assumes that + // the order of blocks will be frozen beyond this point. + do { + if (MBB->sameSection(EndMBB) || MBB->isEndSection()) { + auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionIDNum()]; + List.push_back( + {MBB->sameSection(BeginMBB) ? BeginLabel + : MBBSectionRange.BeginLabel, + MBB->sameSection(EndMBB) ? EndLabel : MBBSectionRange.EndLabel}); + } + if (MBB->sameSection(EndMBB)) + break; + MBB = MBB->getNextNode(); + } while (true); + } attachRangesOrLowHighPC(Die, std::move(List)); } @@ -654,7 +734,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); for (auto &Fragment : DV.getFrameIndexExprs()) { - unsigned FrameReg = 0; + Register FrameReg; const DIExpression *Expr = Fragment.Expr; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); @@ -719,11 +799,22 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) { auto *Array = dyn_cast<DICompositeType>(Var->getType()); if (!Array || Array->getTag() != dwarf::DW_TAG_array_type) return Result; + if (auto *DLVar = Array->getDataLocation()) + Result.push_back(DLVar); for (auto *El : Array->getElements()) { if (auto *Subrange = dyn_cast<DISubrange>(El)) { - auto Count = Subrange->getCount(); - if (auto *Dependency = Count.dyn_cast<DIVariable *>()) - Result.push_back(Dependency); + if (auto Count = Subrange->getCount()) + if (auto *Dependency = Count.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); + if (auto LB = Subrange->getLowerBound()) + if (auto *Dependency = LB.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); + if (auto UB = Subrange->getUpperBound()) + if (auto *Dependency = UB.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); + if (auto ST = Subrange->getStride()) + if (auto *Dependency = ST.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); } } return Result; @@ -904,13 +995,12 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } -/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location atom. -static bool useGNUAnalogForDwarf5Feature(DwarfDebug *DD) { +bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const { return DD->getDwarfVersion() == 4 && DD->tuneForGDB(); } dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const { - if (!useGNUAnalogForDwarf5Feature(DD)) + if (!useGNUAnalogForDwarf5Feature()) return Tag; switch (Tag) { case dwarf::DW_TAG_call_site: @@ -924,7 +1014,7 @@ dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const { dwarf::Attribute DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const { - if (!useGNUAnalogForDwarf5Feature(DD)) + if (!useGNUAnalogForDwarf5Feature()) return Attr; switch (Attr) { case dwarf::DW_AT_call_all_calls: @@ -933,7 +1023,7 @@ DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const { return dwarf::DW_AT_GNU_call_site_target; case dwarf::DW_AT_call_origin: return dwarf::DW_AT_abstract_origin; - case dwarf::DW_AT_call_pc: + case dwarf::DW_AT_call_return_pc: return dwarf::DW_AT_low_pc; case dwarf::DW_AT_call_value: return dwarf::DW_AT_GNU_call_site_value; @@ -946,7 +1036,7 @@ DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const { dwarf::LocationAtom DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { - if (!useGNUAnalogForDwarf5Feature(DD)) + if (!useGNUAnalogForDwarf5Feature()) return Loc; switch (Loc) { case dwarf::DW_OP_entry_value: @@ -956,9 +1046,12 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { } } -DIE &DwarfCompileUnit::constructCallSiteEntryDIE( - DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail, - const MCSymbol *PCAddr, const MCExpr *PCOffset, unsigned CallReg) { +DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, + DIE *CalleeDIE, + bool IsTail, + const MCSymbol *PCAddr, + const MCSymbol *CallAddr, + unsigned CallReg) { // Insert a call site entry DIE within ScopeDIE. DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site), ScopeDIE, nullptr); @@ -968,24 +1061,41 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE( addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), MachineLocation(CallReg)); } else { - DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); - assert(CalleeDIE && "Could not create DIE for call site entry origin"); + assert(CalleeDIE && "No DIE for call site entry origin"); addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), *CalleeDIE); } - if (IsTail) + if (IsTail) { // Attach DW_AT_call_tail_call to tail calls for standards compliance. addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call)); + // Attach the address of the branch instruction to allow the debugger to + // show where the tail call occurred. This attribute has no GNU analog. + // + // GDB works backwards from non-standard usage of DW_AT_low_pc (in DWARF4 + // mode -- equivalently, in DWARF5 mode, DW_AT_call_return_pc) at tail-call + // site entries to figure out the PC of tail-calling branch instructions. + // This means it doesn't need the compiler to emit DW_AT_call_pc, so we + // don't emit it here. + // + // There's no need to tie non-GDB debuggers to this non-standardness, as it + // adds unnecessary complexity to the debugger. For non-GDB debuggers, emit + // the standard DW_AT_call_pc info. + if (!useGNUAnalogForDwarf5Feature()) + addLabelAddress(CallSiteDIE, dwarf::DW_AT_call_pc, CallAddr); + } + // Attach the return PC to allow the debugger to disambiguate call paths // from one function to another. - if (DD->getDwarfVersion() == 4 && DD->tuneForGDB()) { - assert(PCAddr && "Missing PC information for a call"); - addLabelAddress(CallSiteDIE, dwarf::DW_AT_low_pc, PCAddr); - } else if (!IsTail || DD->tuneForGDB()) { - assert(PCOffset && "Missing return PC information for a call"); - addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset); + // + // The return PC is only really needed when the call /isn't/ a tail call, but + // GDB expects it in DWARF4 mode, even for tail calls (see the comment above + // the DW_AT_call_pc emission logic for an explanation). + if (!IsTail || useGNUAnalogForDwarf5Feature()) { + assert(PCAddr && "Missing return PC information for a call"); + addLabelAddress(CallSiteDIE, + getDwarf5OrGNUAttr(dwarf::DW_AT_call_return_pc), PCAddr); } return CallSiteDIE; @@ -1108,7 +1218,7 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) { // Don't bother labeling the .dwo unit, as its offset isn't used. if (!Skeleton && !DD->useSectionsAsReferences()) { LabelBegin = Asm->createTempSymbol("cu_begin"); - Asm->OutStreamer->EmitLabel(LabelBegin); + Asm->OutStreamer->emitLabel(LabelBegin); } dwarf::UnitType UT = Skeleton ? dwarf::DW_UT_split_compile @@ -1219,15 +1329,12 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); const DIExpression *DIExpr = DV.getSingleExpression(); DwarfExpr.addFragmentOffset(DIExpr); - if (Location.isIndirect()) - DwarfExpr.setMemoryLocationKind(); + DwarfExpr.setLocation(Location, DIExpr); DIExpressionCursor Cursor(DIExpr); - if (DIExpr->isEntryValue()) { - DwarfExpr.setEntryValueFlag(); + if (DIExpr->isEntryValue()) DwarfExpr.beginEntryValueExpression(Cursor); - } const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) @@ -1285,12 +1392,6 @@ void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr)); } -void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute, - const MCExpr *Expr) { - Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, - DIEExpr(Expr)); -} - void DwarfCompileUnit::applySubprogramAttributesToDefinition( const DISubprogram *SP, DIE &SPDie) { auto *SPDecl = SP->getDeclaration(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 8491d078ed89..4ccd8c96dd0d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -47,9 +47,9 @@ class DwarfCompileUnit final : public DwarfUnit { unsigned UniqueID; bool HasRangeLists = false; - /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding - /// the need to search for it in applyStmtList. - DIE::value_iterator StmtListValue; + /// The start of the unit line section, this is also + /// reused in appyStmtList. + MCSymbol *LineTableStartSym; /// Skeleton unit associated with this unit. DwarfCompileUnit *Skeleton = nullptr; @@ -123,6 +123,9 @@ public: /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. void applyStmtList(DIE &D); + /// Get line table start symbol for this unit. + MCSymbol *getLineTableStartSym() const { return LineTableStartSym; } + /// A pair of GlobalVariable and DIExpression. struct GlobalExpr { const GlobalVariable *Var; @@ -230,6 +233,10 @@ public: void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + /// Whether to use the GNU analog for a DWARF5 tag, attribute, or location + /// atom. Only applicable when emitting otherwise DWARF4-compliant debug info. + bool useGNUAnalogForDwarf5Feature() const; + /// This takes a DWARF 5 tag and returns it or a GNU analog. dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const; @@ -240,19 +247,17 @@ public: dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const; /// Construct a call site entry DIE describing a call within \p Scope to a - /// callee described by \p CalleeSP. + /// callee described by \p CalleeDIE. + /// \p CalleeDIE is a declaration or definition subprogram DIE for the callee. + /// For indirect calls \p CalleeDIE is set to nullptr. /// \p IsTail specifies whether the call is a tail call. - /// \p PCAddr (used for GDB + DWARF 4 tuning) points to the PC value after - /// the call instruction. - /// \p PCOffset (used for cases other than GDB + DWARF 4 tuning) must be - /// non-zero for non-tail calls (in the case of non-gdb tuning, since for - /// GDB + DWARF 5 tuning we still generate PC info for tail calls) or be the - /// function-local offset to PC value after the call instruction. + /// \p PCAddr points to the PC value after the call instruction. + /// \p CallAddr points to the PC value at the call instruction (or is null). /// \p CallReg is a register location for an indirect call. For direct calls /// the \p CallReg is set to 0. - DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP, - bool IsTail, const MCSymbol *PCAddr, - const MCExpr *PCOffset, unsigned CallReg); + DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, DIE *CalleeDIE, bool IsTail, + const MCSymbol *PCAddr, + const MCSymbol *CallAddr, unsigned CallReg); /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params /// were collected by the \ref collectCallSiteParameters. /// Note: The order of parameters does not matter, since debuggers recognize @@ -340,9 +345,6 @@ public: /// Add a Dwarf expression attribute data and value. void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); - /// Add an attribute containing an address expression to \p Die. - void addAddressExpr(DIE &Die, dwarf::Attribute Attribute, const MCExpr *Expr); - void applySubprogramAttributesToDefinition(const DISubprogram *SP, DIE &SPDie); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 6e643ad26410..45ed5256deb9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -95,6 +95,10 @@ static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier( "use-dwarf-ranges-base-address-specifier", cl::Hidden, cl::desc("Use base address specifiers in debug_ranges"), cl::init(false)); +static cl::opt<bool> EmitDwarfDebugEntryValues( + "emit-debug-entry-values", cl::Hidden, + cl::desc("Emit the debug entry values"), cl::init(false)); + static cl::opt<bool> GenerateARangeSection("generate-arange-section", cl::Hidden, cl::desc("Generate dwarf aranges"), @@ -163,6 +167,11 @@ static cl::opt<LinkageNameOption> "Abstract subprograms")), cl::init(DefaultLinkageNames)); +static cl::opt<unsigned> LocationAnalysisSizeLimit( + "singlevarlocation-input-bb-limit", + cl::desc("Maximum block size to analyze for single-location variables"), + cl::init(30000), cl::Hidden); + static const char *const DWARFGroupName = "dwarf"; static const char *const DWARFGroupDescription = "DWARF Emission"; static const char *const DbgTimerName = "writer"; @@ -176,11 +185,11 @@ void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) { } void DebugLocDwarfExpression::emitSigned(int64_t Value) { - getActiveStreamer().EmitSLEB128(Value, Twine(Value)); + getActiveStreamer().emitSLEB128(Value, Twine(Value)); } void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) { - getActiveStreamer().EmitULEB128(Value, Twine(Value)); + getActiveStreamer().emitULEB128(Value, Twine(Value)); } void DebugLocDwarfExpression::emitData1(uint8_t Value) { @@ -189,7 +198,7 @@ void DebugLocDwarfExpression::emitData1(uint8_t Value) { void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) { assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit"); - getActiveStreamer().EmitULEB128(Idx, Twine(Idx), ULEB128PadSize); + getActiveStreamer().emitULEB128(Idx, Twine(Idx), ULEB128PadSize); } bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, @@ -232,26 +241,26 @@ const DIType *DbgVariable::getType() const { static DbgValueLoc getDebugLocValue(const MachineInstr *MI) { const DIExpression *Expr = MI->getDebugExpression(); assert(MI->getNumOperands() == 4); - if (MI->getOperand(0).isReg()) { - auto RegOp = MI->getOperand(0); - auto Op1 = MI->getOperand(1); + if (MI->getDebugOperand(0).isReg()) { + auto RegOp = MI->getDebugOperand(0); + auto Op1 = MI->getDebugOffset(); // If the second operand is an immediate, this is a // register-indirect address. assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); MachineLocation MLoc(RegOp.getReg(), Op1.isImm()); return DbgValueLoc(Expr, MLoc); } - if (MI->getOperand(0).isTargetIndex()) { - auto Op = MI->getOperand(0); + if (MI->getDebugOperand(0).isTargetIndex()) { + auto Op = MI->getDebugOperand(0); return DbgValueLoc(Expr, TargetIndexLocation(Op.getIndex(), Op.getOffset())); } - if (MI->getOperand(0).isImm()) - return DbgValueLoc(Expr, MI->getOperand(0).getImm()); - if (MI->getOperand(0).isFPImm()) - return DbgValueLoc(Expr, MI->getOperand(0).getFPImm()); - if (MI->getOperand(0).isCImm()) - return DbgValueLoc(Expr, MI->getOperand(0).getCImm()); + if (MI->getDebugOperand(0).isImm()) + return DbgValueLoc(Expr, MI->getDebugOperand(0).getImm()); + if (MI->getDebugOperand(0).isFPImm()) + return DbgValueLoc(Expr, MI->getDebugOperand(0).getFPImm()); + if (MI->getDebugOperand(0).isCImm()) + return DbgValueLoc(Expr, MI->getDebugOperand(0).getCImm()); llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); } @@ -419,6 +428,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // a monolithic string offsets table without any header. UseSegmentedStringOffsetsTable = DwarfVersion >= 5; + // Emit call-site-param debug info for GDB and LLDB, if the target supports + // the debug entry values feature. It can also be enabled explicitly. + EmitDebugEntryValues = (Asm->TM.Options.ShouldEmitDebugEntryValues() && + (tuneForGDB() || tuneForLLDB())) || + EmitDwarfDebugEntryValues; + Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); } @@ -540,11 +555,222 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } +DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) { + DICompileUnit *Unit = SP->getUnit(); + assert(SP->isDefinition() && "Subprogram not a definition"); + assert(Unit && "Subprogram definition without parent unit"); + auto &CU = getOrCreateDwarfCompileUnit(Unit); + return *CU.getOrCreateSubprogramDIE(SP); +} + +/// Represents a parameter whose call site value can be described by applying a +/// debug expression to a register in the forwarded register worklist. +struct FwdRegParamInfo { + /// The described parameter register. + unsigned ParamReg; + + /// Debug expression that has been built up when walking through the + /// instruction chain that produces the parameter's value. + const DIExpression *Expr; +}; + +/// Register worklist for finding call site values. +using FwdRegWorklist = MapVector<unsigned, SmallVector<FwdRegParamInfo, 2>>; + +/// Append the expression \p Addition to \p Original and return the result. +static const DIExpression *combineDIExpressions(const DIExpression *Original, + const DIExpression *Addition) { + std::vector<uint64_t> Elts = Addition->getElements().vec(); + // Avoid multiple DW_OP_stack_values. + if (Original->isImplicit() && Addition->isImplicit()) + erase_if(Elts, [](uint64_t Op) { return Op == dwarf::DW_OP_stack_value; }); + const DIExpression *CombinedExpr = + (Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original; + return CombinedExpr; +} + +/// Emit call site parameter entries that are described by the given value and +/// debug expression. +template <typename ValT> +static void finishCallSiteParams(ValT Val, const DIExpression *Expr, + ArrayRef<FwdRegParamInfo> DescribedParams, + ParamSet &Params) { + for (auto Param : DescribedParams) { + bool ShouldCombineExpressions = Expr && Param.Expr->getNumElements() > 0; + + // TODO: Entry value operations can currently not be combined with any + // other expressions, so we can't emit call site entries in those cases. + if (ShouldCombineExpressions && Expr->isEntryValue()) + continue; + + // If a parameter's call site value is produced by a chain of + // instructions we may have already created an expression for the + // parameter when walking through the instructions. Append that to the + // base expression. + const DIExpression *CombinedExpr = + ShouldCombineExpressions ? combineDIExpressions(Expr, Param.Expr) + : Expr; + assert((!CombinedExpr || CombinedExpr->isValid()) && + "Combined debug expression is invalid"); + + DbgValueLoc DbgLocVal(CombinedExpr, Val); + DbgCallSiteParam CSParm(Param.ParamReg, DbgLocVal); + Params.push_back(CSParm); + ++NumCSParams; + } +} + +/// Add \p Reg to the worklist, if it's not already present, and mark that the +/// given parameter registers' values can (potentially) be described using +/// that register and an debug expression. +static void addToFwdRegWorklist(FwdRegWorklist &Worklist, unsigned Reg, + const DIExpression *Expr, + ArrayRef<FwdRegParamInfo> ParamsToAdd) { + auto I = Worklist.insert({Reg, {}}); + auto &ParamsForFwdReg = I.first->second; + for (auto Param : ParamsToAdd) { + assert(none_of(ParamsForFwdReg, + [Param](const FwdRegParamInfo &D) { + return D.ParamReg == Param.ParamReg; + }) && + "Same parameter described twice by forwarding reg"); + + // If a parameter's call site value is produced by a chain of + // instructions we may have already created an expression for the + // parameter when walking through the instructions. Append that to the + // new expression. + const DIExpression *CombinedExpr = combineDIExpressions(Expr, Param.Expr); + ParamsForFwdReg.push_back({Param.ParamReg, CombinedExpr}); + } +} + +/// Interpret values loaded into registers by \p CurMI. +static void interpretValues(const MachineInstr *CurMI, + FwdRegWorklist &ForwardedRegWorklist, + ParamSet &Params) { + + const MachineFunction *MF = CurMI->getMF(); + const DIExpression *EmptyExpr = + DIExpression::get(MF->getFunction().getContext(), {}); + const auto &TRI = *MF->getSubtarget().getRegisterInfo(); + const auto &TII = *MF->getSubtarget().getInstrInfo(); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); + + // If an instruction defines more than one item in the worklist, we may run + // into situations where a worklist register's value is (potentially) + // described by the previous value of another register that is also defined + // by that instruction. + // + // This can for example occur in cases like this: + // + // $r1 = mov 123 + // $r0, $r1 = mvrr $r1, 456 + // call @foo, $r0, $r1 + // + // When describing $r1's value for the mvrr instruction, we need to make sure + // that we don't finalize an entry value for $r0, as that is dependent on the + // previous value of $r1 (123 rather than 456). + // + // In order to not have to distinguish between those cases when finalizing + // entry values, we simply postpone adding new parameter registers to the + // worklist, by first keeping them in this temporary container until the + // instruction has been handled. + FwdRegWorklist TmpWorklistItems; + + // If the MI is an instruction defining one or more parameters' forwarding + // registers, add those defines. + auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI, + SmallSetVector<unsigned, 4> &Defs) { + if (MI.isDebugInstr()) + return; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && + Register::isPhysicalRegister(MO.getReg())) { + for (auto FwdReg : ForwardedRegWorklist) + if (TRI.regsOverlap(FwdReg.first, MO.getReg())) + Defs.insert(FwdReg.first); + } + } + }; + + // Set of worklist registers that are defined by this instruction. + SmallSetVector<unsigned, 4> FwdRegDefs; + + getForwardingRegsDefinedByMI(*CurMI, FwdRegDefs); + if (FwdRegDefs.empty()) + return; + + for (auto ParamFwdReg : FwdRegDefs) { + if (auto ParamValue = TII.describeLoadedValue(*CurMI, ParamFwdReg)) { + if (ParamValue->first.isImm()) { + int64_t Val = ParamValue->first.getImm(); + finishCallSiteParams(Val, ParamValue->second, + ForwardedRegWorklist[ParamFwdReg], Params); + } else if (ParamValue->first.isReg()) { + Register RegLoc = ParamValue->first.getReg(); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + Register FP = TRI.getFrameRegister(*MF); + bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); + if (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { + MachineLocation MLoc(RegLoc, /*IsIndirect=*/IsSPorFP); + finishCallSiteParams(MLoc, ParamValue->second, + ForwardedRegWorklist[ParamFwdReg], Params); + } else { + // ParamFwdReg was described by the non-callee saved register + // RegLoc. Mark that the call site values for the parameters are + // dependent on that register instead of ParamFwdReg. Since RegLoc + // may be a register that will be handled in this iteration, we + // postpone adding the items to the worklist, and instead keep them + // in a temporary container. + addToFwdRegWorklist(TmpWorklistItems, RegLoc, ParamValue->second, + ForwardedRegWorklist[ParamFwdReg]); + } + } + } + } + + // Remove all registers that this instruction defines from the worklist. + for (auto ParamFwdReg : FwdRegDefs) + ForwardedRegWorklist.erase(ParamFwdReg); + + // Now that we are done handling this instruction, add items from the + // temporary worklist to the real one. + for (auto New : TmpWorklistItems) + addToFwdRegWorklist(ForwardedRegWorklist, New.first, EmptyExpr, New.second); + TmpWorklistItems.clear(); +} + +static bool interpretNextInstr(const MachineInstr *CurMI, + FwdRegWorklist &ForwardedRegWorklist, + ParamSet &Params) { + // Skip bundle headers. + if (CurMI->isBundle()) + return true; + + // If the next instruction is a call we can not interpret parameter's + // forwarding registers or we finished the interpretation of all + // parameters. + if (CurMI->isCall()) + return false; + + if (ForwardedRegWorklist.empty()) + return false; + + // Avoid NOP description. + if (CurMI->getNumOperands() == 0) + return true; + + interpretValues(CurMI, ForwardedRegWorklist, Params); + + return true; +} + /// Try to interpret values loaded into registers that forward parameters /// for \p CallMI. Store parameters with interpreted value into \p Params. static void collectCallSiteParameters(const MachineInstr *CallMI, ParamSet &Params) { - auto *MF = CallMI->getMF(); + const MachineFunction *MF = CallMI->getMF(); auto CalleesMap = MF->getCallSitesInfo(); auto CallFwdRegsInfo = CalleesMap.find(CallMI); @@ -552,18 +778,21 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, if (CallFwdRegsInfo == CalleesMap.end()) return; - auto *MBB = CallMI->getParent(); - const auto &TRI = MF->getSubtarget().getRegisterInfo(); - const auto &TII = MF->getSubtarget().getInstrInfo(); - const auto &TLI = MF->getSubtarget().getTargetLowering(); + const MachineBasicBlock *MBB = CallMI->getParent(); // Skip the call instruction. auto I = std::next(CallMI->getReverseIterator()); - DenseSet<unsigned> ForwardedRegWorklist; + FwdRegWorklist ForwardedRegWorklist; + + const DIExpression *EmptyExpr = + DIExpression::get(MF->getFunction().getContext(), {}); + // Add all the forwarding registers into the ForwardedRegWorklist. for (auto ArgReg : CallFwdRegsInfo->second) { - bool InsertedReg = ForwardedRegWorklist.insert(ArgReg.Reg).second; + bool InsertedReg = + ForwardedRegWorklist.insert({ArgReg.Reg, {{ArgReg.Reg, EmptyExpr}}}) + .second; assert(InsertedReg && "Single register used to forward two arguments?"); (void)InsertedReg; } @@ -573,107 +802,29 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, // the describeLoadedValue()). For those remaining arguments in the working // list, for which we do not describe a loaded value by // the describeLoadedValue(), we try to generate an entry value expression - // for their call site value desctipion, if the call is within the entry MBB. - // The RegsForEntryValues maps a forwarding register into the register holding - // the entry value. + // for their call site value description, if the call is within the entry MBB. // TODO: Handle situations when call site parameter value can be described - // as the entry value within basic blocks other then the first one. + // as the entry value within basic blocks other than the first one. bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin(); - DenseMap<unsigned, unsigned> RegsForEntryValues; - // If the MI is an instruction defining one or more parameters' forwarding - // registers, add those defines. We can currently only describe forwarded - // registers that are explicitly defined, but keep track of implicit defines - // also to remove those registers from the work list. - auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI, - SmallVectorImpl<unsigned> &Explicit, - SmallVectorImpl<unsigned> &Implicit) { - if (MI.isDebugInstr()) + // Search for a loading value in forwarding registers inside call delay slot. + if (CallMI->hasDelaySlot()) { + auto Suc = std::next(CallMI->getIterator()); + // Only one-instruction delay slot is supported. + auto BundleEnd = llvm::getBundleEnd(CallMI->getIterator()); + (void)BundleEnd; + assert(std::next(Suc) == BundleEnd && + "More than one instruction in call delay slot"); + // Try to interpret value loaded by instruction. + if (!interpretNextInstr(&*Suc, ForwardedRegWorklist, Params)) return; - - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isDef() && - Register::isPhysicalRegister(MO.getReg())) { - for (auto FwdReg : ForwardedRegWorklist) { - if (TRI->regsOverlap(FwdReg, MO.getReg())) { - if (MO.isImplicit()) - Implicit.push_back(FwdReg); - else - Explicit.push_back(FwdReg); - } - } - } - } - }; - - auto finishCallSiteParam = [&](DbgValueLoc DbgLocVal, unsigned Reg) { - unsigned FwdReg = Reg; - if (ShouldTryEmitEntryVals) { - auto EntryValReg = RegsForEntryValues.find(Reg); - if (EntryValReg != RegsForEntryValues.end()) - FwdReg = EntryValReg->second; - } - - DbgCallSiteParam CSParm(FwdReg, DbgLocVal); - Params.push_back(CSParm); - ++NumCSParams; - }; + } // Search for a loading value in forwarding registers. for (; I != MBB->rend(); ++I) { - // Skip bundle headers. - if (I->isBundle()) - continue; - - // If the next instruction is a call we can not interpret parameter's - // forwarding registers or we finished the interpretation of all parameters. - if (I->isCall()) + // Try to interpret values loaded by instruction. + if (!interpretNextInstr(&*I, ForwardedRegWorklist, Params)) return; - - if (ForwardedRegWorklist.empty()) - return; - - SmallVector<unsigned, 4> ExplicitFwdRegDefs; - SmallVector<unsigned, 4> ImplicitFwdRegDefs; - getForwardingRegsDefinedByMI(*I, ExplicitFwdRegDefs, ImplicitFwdRegDefs); - if (ExplicitFwdRegDefs.empty() && ImplicitFwdRegDefs.empty()) - continue; - - // If the MI clobbers more then one forwarding register we must remove - // all of them from the working list. - for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs)) - ForwardedRegWorklist.erase(Reg); - - for (auto ParamFwdReg : ExplicitFwdRegDefs) { - if (auto ParamValue = TII->describeLoadedValue(*I, ParamFwdReg)) { - if (ParamValue->first.isImm()) { - int64_t Val = ParamValue->first.getImm(); - DbgValueLoc DbgLocVal(ParamValue->second, Val); - finishCallSiteParam(DbgLocVal, ParamFwdReg); - } else if (ParamValue->first.isReg()) { - Register RegLoc = ParamValue->first.getReg(); - // TODO: For now, there is no use of describing the value loaded into the - // register that is also the source registers (e.g. $r0 = add $r0, x). - if (ParamFwdReg == RegLoc) - continue; - - unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - Register FP = TRI->getFrameRegister(*MF); - bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); - if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { - DbgValueLoc DbgLocVal(ParamValue->second, - MachineLocation(RegLoc, - /*IsIndirect=*/IsSPorFP)); - finishCallSiteParam(DbgLocVal, ParamFwdReg); - // TODO: Add support for entry value plus an expression. - } else if (ShouldTryEmitEntryVals && - ParamValue->second->getNumElements() == 0) { - ForwardedRegWorklist.insert(RegLoc); - RegsForEntryValues[RegLoc] = ParamFwdReg; - } - } - } - } } // Emit the call site parameter's value as an entry value. @@ -682,15 +833,8 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, DIExpression *EntryExpr = DIExpression::get( MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1}); for (auto RegEntry : ForwardedRegWorklist) { - unsigned FwdReg = RegEntry; - auto EntryValReg = RegsForEntryValues.find(RegEntry); - if (EntryValReg != RegsForEntryValues.end()) - FwdReg = EntryValReg->second; - - DbgValueLoc DbgLocVal(EntryExpr, MachineLocation(RegEntry)); - DbgCallSiteParam CSParm(FwdReg, DbgLocVal); - Params.push_back(CSParm); - ++NumCSParams; + MachineLocation MLoc(RegEntry.first); + finishCallSiteParams(MLoc, EntryExpr, RegEntry.second, Params); } } } @@ -711,7 +855,25 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); assert(TII && "TargetInstrInfo not found: cannot label tail calls"); - bool ApplyGNUExtensions = getDwarfVersion() == 4 && tuneForGDB(); + + // Delay slot support check. + auto delaySlotSupported = [&](const MachineInstr &MI) { + if (!MI.isBundledWithSucc()) + return false; + auto Suc = std::next(MI.getIterator()); + auto CallInstrBundle = getBundleStart(MI.getIterator()); + (void)CallInstrBundle; + auto DelaySlotBundle = getBundleStart(Suc); + (void)DelaySlotBundle; + // Ensure that label after call is following delay slot instruction. + // Ex. CALL_INSTRUCTION { + // DELAY_SLOT_INSTRUCTION } + // LABEL_AFTER_CALL + assert(getLabelAfterInsn(&*CallInstrBundle) == + getLabelAfterInsn(&*DelaySlotBundle) && + "Call and its successor instruction don't have same label after."); + return true; + }; // Emit call site entries for each call or tail call in the function. for (const MachineBasicBlock &MBB : MF) { @@ -724,11 +886,16 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // Skip instructions which aren't calls. Both calls and tail-calling jump // instructions (e.g TAILJMPd64) are classified correctly here. - if (!MI.isCall()) + if (!MI.isCandidateForCallSiteEntry()) continue; - // TODO: Add support for targets with delay slots (see: beginInstruction). - if (MI.hasDelaySlot()) + // Skip instructions marked as frame setup, as they are not interesting to + // the user. + if (MI.getFlag(MachineInstr::FrameSetup)) + continue; + + // Check if delay slot support is enabled. + if (MI.hasDelaySlot() && !delaySlotSupported(*&MI)) return; // If this is a direct call, find the callee's subprogram. @@ -739,7 +906,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, continue; unsigned CallReg = 0; - const DISubprogram *CalleeSP = nullptr; + DIE *CalleeDIE = nullptr; const Function *CalleeDecl = nullptr; if (CalleeOp.isReg()) { CallReg = CalleeOp.getReg(); @@ -749,7 +916,19 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal()); if (!CalleeDecl || !CalleeDecl->getSubprogram()) continue; - CalleeSP = CalleeDecl->getSubprogram(); + const DISubprogram *CalleeSP = CalleeDecl->getSubprogram(); + + if (CalleeSP->isDefinition()) { + // Ensure that a subprogram DIE for the callee is available in the + // appropriate CU. + CalleeDIE = &constructSubprogramDefinitionDIE(CalleeSP); + } else { + // Create the declaration DIE if it is missing. This is required to + // support compilation of old bitcode with an incomplete list of + // retained metadata. + CalleeDIE = CU.getOrCreateSubprogramDIE(CalleeSP); + } + assert(CalleeDIE && "Must have a DIE for the callee"); } // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). @@ -762,25 +941,21 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, const MachineInstr *TopLevelCallMI = MI.isInsideBundle() ? &*getBundleStart(MI.getIterator()) : &MI; - // For tail calls, for non-gdb tuning, no return PC information is needed. - // For regular calls (and tail calls in GDB tuning), the return PC - // is needed to disambiguate paths in the call graph which could lead to - // some target function. - const MCExpr *PCOffset = - (IsTail && !tuneForGDB()) - ? nullptr - : getFunctionLocalOffsetAfterInsn(TopLevelCallMI); - - // Return address of a call-like instruction for a normal call or a - // jump-like instruction for a tail call. This is needed for - // GDB + DWARF 4 tuning. + // For non-tail calls, the return PC is needed to disambiguate paths in + // the call graph which could lead to some target function. For tail + // calls, no return PC information is needed, unless tuning for GDB in + // DWARF4 mode in which case we fake a return PC for compatibility. const MCSymbol *PCAddr = - ApplyGNUExtensions + (!IsTail || CU.useGNUAnalogForDwarf5Feature()) ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI)) : nullptr; - assert((IsTail || PCOffset || PCAddr) && - "Call without return PC information"); + // For tail calls, it's necessary to record the address of the branch + // instruction so that the debugger can show where the tail call occurred. + const MCSymbol *CallAddr = + IsTail ? getLabelBeforeInsn(TopLevelCallMI) : nullptr; + + assert((IsTail || PCAddr) && "Non-tail call without return PC"); LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> " << (CalleeDecl ? CalleeDecl->getName() @@ -789,13 +964,11 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, ->getName(CallReg))) << (IsTail ? " [IsTail]" : "") << "\n"); - DIE &CallSiteDIE = - CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, IsTail, PCAddr, - PCOffset, CallReg); + DIE &CallSiteDIE = CU.constructCallSiteEntryDIE( + ScopeDIE, CalleeDIE, IsTail, PCAddr, CallAddr, CallReg); - // GDB and LLDB support call site parameter debug info. - if (Asm->TM.Options.EnableDebugEntryValues && - (tuneForGDB() || tuneForLLDB())) { + // Optionally emit call-site-param debug info. + if (emitDebugEntryValues()) { ParamSet Params; // Try to interpret values of call site parameters. collectCallSiteParameters(&MI, Params); @@ -828,6 +1001,12 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit->getSourceLanguage()); NewCU.addString(Die, dwarf::DW_AT_name, FN); + StringRef SysRoot = DIUnit->getSysRoot(); + if (!SysRoot.empty()) + NewCU.addString(Die, dwarf::DW_AT_LLVM_sysroot, SysRoot); + StringRef SDK = DIUnit->getSDK(); + if (!SDK.empty()) + NewCU.addString(Die, dwarf::DW_AT_APPLE_sdk, SDK); // Add DW_str_offsets_base to the unit DIE, except for split units. if (useSegmentedStringOffsetsTable() && !useSplitDwarf()) @@ -840,7 +1019,6 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, // skeleton CU and so we don't need to duplicate it here. if (!CompilationDir.empty()) NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - addGnuPubAttributes(NewCU, Die); } @@ -905,11 +1083,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); } - // Create DIEs for function declarations used for call site debug info. - for (auto Scope : DIUnit->getRetainedTypes()) - if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope)) - NewCU.getOrCreateSubprogramDIE(SP); - CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); return NewCU; @@ -1161,8 +1334,7 @@ void DwarfDebug::finalizeModuleInfo() { // We don't keep track of which addresses are used in which CU so this // is a bit pessimistic under LTO. - if ((!AddrPool.isEmpty() || TheCU.hasRangeLists()) && - (getDwarfVersion() >= 5 || HasSplitUnit)) + if ((HasSplitUnit || getDwarfVersion() >= 5) && !AddrPool.isEmpty()) U.addAddrTableBase(); if (getDwarfVersion() >= 5) { @@ -1178,18 +1350,31 @@ void DwarfDebug::finalizeModuleInfo() { } auto *CUNode = cast<DICompileUnit>(P.first); - // If compile Unit has macros, emit "DW_AT_macro_info" attribute. + // If compile Unit has macros, emit "DW_AT_macro_info/DW_AT_macros" + // attribute. if (CUNode->getMacros()) { - if (useSplitDwarf()) - TheCU.addSectionDelta(TheCU.getUnitDie(), dwarf::DW_AT_macro_info, + if (getDwarfVersion() >= 5) { + if (useSplitDwarf()) + TheCU.addSectionDelta( + TheCU.getUnitDie(), dwarf::DW_AT_macros, U.getMacroLabelBegin(), + TLOF.getDwarfMacroDWOSection()->getBeginSymbol()); + else + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macros, U.getMacroLabelBegin(), - TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol()); - else - U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info, - U.getMacroLabelBegin(), - TLOF.getDwarfMacinfoSection()->getBeginSymbol()); + TLOF.getDwarfMacroSection()->getBeginSymbol()); + } else { + if (useSplitDwarf()) + TheCU.addSectionDelta( + TheCU.getUnitDie(), dwarf::DW_AT_macro_info, + U.getMacroLabelBegin(), + TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol()); + else + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info, + U.getMacroLabelBegin(), + TLOF.getDwarfMacinfoSection()->getBeginSymbol()); + } + } } - } // Emit all frontend-produced Skeleton CUs, i.e., Clang modules. for (auto *CUNode : MMI->getModule()->debug_compile_units()) @@ -1221,8 +1406,6 @@ void DwarfDebug::endModule() { // Finalize the debug info for the module. finalizeModuleInfo(); - emitDebugStr(); - if (useSplitDwarf()) // Emit debug_loc.dwo/debug_loclists.dwo section. emitDebugLocDWO(); @@ -1247,9 +1430,11 @@ void DwarfDebug::endModule() { // Emit info into a debug macinfo.dwo section. emitDebugMacinfoDWO(); else - // Emit info into a debug macinfo section. + // Emit info into a debug macinfo/macro section. emitDebugMacinfo(); + emitDebugStr(); + if (useSplitDwarf()) { emitDebugStrDWO(); emitDebugInfoDWO(); @@ -1308,6 +1493,7 @@ void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU, void DwarfDebug::collectVariableInfoFromMFTable( DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) { SmallDenseMap<InlinedEntity, DbgVariable *> MFVars; + LLVM_DEBUG(dbgs() << "DwarfDebug: collecting variables from MF side table\n"); for (const auto &VI : Asm->MF->getVariableDbgInfo()) { if (!VI.Var) continue; @@ -1319,13 +1505,18 @@ void DwarfDebug::collectVariableInfoFromMFTable( LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. - if (!Scope) + if (!Scope) { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << VI.Var->getName() + << ", no variable scope found\n"); continue; + } ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode()); auto RegVar = std::make_unique<DbgVariable>( cast<DILocalVariable>(Var.first), Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); + LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName() + << "\n"); if (DbgVariable *DbgVar = MFVars.lookup(Var)) DbgVar->addMMIEntry(*RegVar); else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) { @@ -1353,11 +1544,20 @@ static bool validThroughout(LexicalScopes &LScopes, if (LSRange.size() == 0) return false; + // Determine if the DBG_VALUE is valid at the beginning of its lexical block. const MachineInstr *LScopeBegin = LSRange.front().first; // Early exit if the lexical scope begins outside of the current block. if (LScopeBegin->getParent() != MBB) return false; + + // If there are instructions belonging to our scope in another block, and + // we're not a constant (see DWARF2 comment below), then we can't be + // validThroughout. + const MachineInstr *LScopeEnd = LSRange.back().second; + if (RangeEnd && LScopeEnd->getParent() != MBB) + return false; + MachineBasicBlock::const_reverse_iterator Pred(DbgValue); for (++Pred; Pred != MBB->rend(); ++Pred) { if (Pred->getFlag(MachineInstr::FrameSetup)) @@ -1378,19 +1578,35 @@ static bool validThroughout(LexicalScopes &LScopes, if (!RangeEnd) return true; - // Fail if there are instructions belonging to our scope in another block. - const MachineInstr *LScopeEnd = LSRange.back().second; - if (LScopeEnd->getParent() != MBB) - return false; - // Single, constant DBG_VALUEs in the prologue are promoted to be live // throughout the function. This is a hack, presumably for DWARF v2 and not // necessarily correct. It would be much better to use a dbg.declare instead // if we know the constant is live throughout the scope. - if (DbgValue->getOperand(0).isImm() && MBB->pred_empty()) + if (DbgValue->getDebugOperand(0).isImm() && MBB->pred_empty()) return true; - return false; + // Now check for situations where an "open-ended" DBG_VALUE isn't enough to + // determine eligibility for a single location, e.g. nested scopes, inlined + // functions. + // FIXME: For now we just handle a simple (but common) case where the scope + // is contained in MBB. We could be smarter here. + // + // At this point we know that our scope ends in MBB. So, if RangeEnd exists + // outside of the block we can ignore it; the location is just leaking outside + // its scope. + assert(LScopeEnd->getParent() == MBB && "Scope ends outside MBB"); + if (RangeEnd->getParent() != DbgValue->getParent()) + return true; + + // The location range and variable's enclosing scope are both contained within + // MBB, test if location terminates before end of scope. + for (auto I = RangeEnd->getIterator(); I != MBB->end(); ++I) + if (&*I == LScopeEnd) + return false; + + // There's a single location which starts at the scope start, and ends at or + // after the scope end. + return true; } /// Build the location list for all DBG_VALUEs in the function that @@ -1426,8 +1642,10 @@ static bool validThroughout(LexicalScopes &LScopes, // [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)] // [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)] // [4-) [(@g, fragment 0, 96)] -bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, - const DbgValueHistoryMap::Entries &Entries) { +bool DwarfDebug::buildLocationList( + SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::Entries &Entries, + DenseSet<const MachineBasicBlock *> &VeryLargeBlocks) { using OpenRange = std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>; SmallVector<OpenRange, 4> OpenRanges; @@ -1453,7 +1671,8 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, const MCSymbol *EndLabel; if (std::next(EI) == Entries.end()) { - EndLabel = Asm->getFunctionEnd(); + const MachineBasicBlock &EndMBB = Asm->MF->back(); + EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionIDNum()].EndLabel; if (EI->isClobber()) EndMI = EI->getInstr(); } @@ -1522,8 +1741,14 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, DebugLoc.pop_back(); } - return DebugLoc.size() == 1 && isSafeForSingleLocation && - validThroughout(LScopes, StartDebugMI, EndMI); + // If there's a single entry, safe for a single location, and not part of + // an over-sized basic block, then ask validThroughout whether this + // location can be represented as a single variable location. + if (DebugLoc.size() != 1 || !isSafeForSingleLocation) + return false; + if (VeryLargeBlocks.count(StartDebugMI->getParent())) + return false; + return validThroughout(LScopes, StartDebugMI, EndMI); } DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, @@ -1555,6 +1780,13 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMFTable(TheCU, Processed); + // Identify blocks that are unreasonably sized, so that we can later + // skip lexical scope analysis over them. + DenseSet<const MachineBasicBlock *> VeryLargeBlocks; + for (const auto &MBB : *CurFn) + if (MBB.size() > LocationAnalysisSizeLimit) + VeryLargeBlocks.insert(&MBB); + for (const auto &I : DbgValues) { InlinedEntity IV = I.first; if (Processed.count(IV)) @@ -1591,7 +1823,8 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, if (HistSize == 1 || SingleValueWithClobber) { const auto *End = SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr; - if (validThroughout(LScopes, MInsn, End)) { + if (VeryLargeBlocks.count(MInsn->getParent()) == 0 && + validThroughout(LScopes, MInsn, End)) { RegVar->initializeDbgValue(MInsn); continue; } @@ -1606,7 +1839,8 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, // Build the location list for this variable. SmallVector<DebugLocEntry, 8> Entries; - bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries); + bool isValidSingleLocation = + buildLocationList(Entries, HistoryMapEntries, VeryLargeBlocks); // Check whether buildLocationList managed to merge all locations to one // that is valid throughout the variable's scope. If so, produce single @@ -1675,11 +1909,45 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, // Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { + const MachineFunction &MF = *MI->getMF(); + const auto *SP = MF.getFunction().getSubprogram(); + bool NoDebug = + !SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug; + + // Delay slot support check. + auto delaySlotSupported = [](const MachineInstr &MI) { + if (!MI.isBundledWithSucc()) + return false; + auto Suc = std::next(MI.getIterator()); + (void)Suc; + // Ensure that delay slot instruction is successor of the call instruction. + // Ex. CALL_INSTRUCTION { + // DELAY_SLOT_INSTRUCTION } + assert(Suc->isBundledWithPred() && + "Call bundle instructions are out of order"); + return true; + }; + + // When describing calls, we need a label for the call instruction. + if (!NoDebug && SP->areAllCallsDescribed() && + MI->isCandidateForCallSiteEntry(MachineInstr::AnyInBundle) && + (!MI->hasDelaySlot() || delaySlotSupported(*MI))) { + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + bool IsTail = TII->isTailCall(*MI); + // For tail calls, we need the address of the branch instruction for + // DW_AT_call_pc. + if (IsTail) + requestLabelBeforeInsn(MI); + // For non-tail calls, we need the return address for the call for + // DW_AT_call_return_pc. Under GDB tuning, this information is needed for + // tail calls as well. + requestLabelAfterInsn(MI); + } + DebugHandlerBase::beginInstruction(MI); assert(CurMI); - const auto *SP = MI->getMF()->getFunction().getSubprogram(); - if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) + if (NoDebug) return; // Check if source location changes, but ignore DBG_VALUE and CFI locations. @@ -1693,11 +1961,6 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { unsigned LastAsmLine = Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine(); - // Request a label after the call in order to emit AT_return_pc information - // in call site entries. TODO: Add support for targets with delay slots. - if (SP->areAllCallsDescribed() && MI->isCall() && !MI->hasDelaySlot()) - requestLabelAfterInsn(MI); - if (DL == PrevInstLoc) { // If we have an ongoing unspecified location, nothing to do here. if (!DL) @@ -1796,7 +2059,7 @@ static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col, FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID]) .getOrCreateSourceID(Scope->getFile()); } - Asm.OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0, + Asm.OutStreamer->emitDwarfLocDirective(FileNo, Line, Col, Flags, 0, Discriminator, Fn); } @@ -1828,9 +2091,6 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; - SectionLabels.insert(std::make_pair(&Asm->getFunctionBegin()->getSection(), - Asm->getFunctionBegin())); - DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function @@ -1878,7 +2138,9 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { collectEntityInfo(TheCU, SP, Processed); // Add the range of this function to the list of ranges for the CU. - TheCU.addRange({Asm->getFunctionBegin(), Asm->getFunctionEnd()}); + // With basic block sections, add ranges for all basic block sections. + for (const auto &R : Asm->MBBSectionRanges) + TheCU.addRange({R.second.BeginLabel, R.second.EndLabel}); // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. But with -fdebug-info-for-profiling, the subprogram @@ -2107,7 +2369,7 @@ void DwarfDebug::emitDebugPubSections() { void DwarfDebug::emitSectionReference(const DwarfCompileUnit &CU) { if (useSectionsAsReferences()) - Asm->EmitDwarfOffset(CU.getSection()->getBeginSymbol(), + Asm->emitDwarfOffset(CU.getSection()->getBeginSymbol(), CU.getDebugSectionOffset()); else Asm->emitDwarfSymbolReference(CU.getLabelBegin()); @@ -2123,9 +2385,9 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, Asm->OutStreamer->AddComment("Length of Public " + Name + " Info"); MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); - Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + Asm->emitLabelDifference(EndLabel, BeginLabel, 4); - Asm->OutStreamer->EmitLabel(BeginLabel); + Asm->OutStreamer->emitLabel(BeginLabel); Asm->OutStreamer->AddComment("DWARF Version"); Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION); @@ -2153,12 +2415,12 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, } Asm->OutStreamer->AddComment("External Name"); - Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); + Asm->OutStreamer->emitBytes(StringRef(Name, GI.getKeyLength() + 1)); } Asm->OutStreamer->AddComment("End Mark"); Asm->emitInt32(0); - Asm->OutStreamer->EmitLabel(EndLabel); + Asm->OutStreamer->emitLabel(EndLabel); } /// Emit null-terminated strings into a debug str section. @@ -2189,7 +2451,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(), DebugLocs.getBytes(Entry).size()), Asm->getDataLayout().isLittleEndian(), PtrSize); - DWARFExpression Expr(Data, getDwarfVersion(), PtrSize); + DWARFExpression Expr(Data, PtrSize, Asm->OutContext.getDwarfFormat()); using Encoding = DWARFExpression::Operation::Encoding; uint64_t Offset = 0; @@ -2202,18 +2464,14 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, if (Op.getDescription().Op[I] == Encoding::SizeNA) continue; if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) { - if (CU) { - uint64_t Offset = CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset(); - assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); - Asm->EmitULEB128(Offset, nullptr, ULEB128PadSize); - } else { - // Emit a reference to the 'generic type'. - Asm->EmitULEB128(0, nullptr, ULEB128PadSize); - } - // Make sure comments stay aligned. - for (unsigned J = 0; J < ULEB128PadSize; ++J) - if (Comment != End) - Comment++; + uint64_t Offset = + CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset(); + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + Streamer.emitULEB128(Offset, "", ULEB128PadSize); + // Make sure comments stay aligned. + for (unsigned J = 0; J < ULEB128PadSize; ++J) + if (Comment != End) + Comment++; } else { for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J) Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); @@ -2239,14 +2497,11 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, DwarfExpr.addUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Location = Value.getLoc(); - if (Location.isIndirect()) - DwarfExpr.setMemoryLocationKind(); + DwarfExpr.setLocation(Location, DIExpr); DIExpressionCursor Cursor(DIExpr); - if (DIExpr->isEntryValue()) { - DwarfExpr.setEntryValueFlag(); + if (DIExpr->isEntryValue()) DwarfExpr.beginEntryValueExpression(Cursor); - } const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) @@ -2256,7 +2511,7 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, TargetIndexLocation Loc = Value.getTargetIndexLocation(); // TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific // encoding is supported. - DwarfExpr.addWasmLocation(Loc.Index, Loc.Offset); + DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset)); } else if (Value.isConstantFP()) { APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt(); DwarfExpr.addUnsignedConstant(RawBytes); @@ -2280,8 +2535,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, assert(llvm::all_of(Values, [](DbgValueLoc P) { return P.isFragment(); }) && "all values are expected to be fragments"); - assert(std::is_sorted(Values.begin(), Values.end()) && - "fragments are expected to be sorted"); + assert(llvm::is_sorted(Values) && "fragments are expected to be sorted"); for (auto Fragment : Values) DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr); @@ -2300,7 +2554,7 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, // Emit the size. Asm->OutStreamer->AddComment("Loc expr size"); if (getDwarfVersion() >= 5) - Asm->EmitULEB128(DebugLocs.getBytes(Entry).size()); + Asm->emitULEB128(DebugLocs.getBytes(Entry).size()); else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max()) Asm->emitInt16(DebugLocs.getBytes(Entry).size()); else { @@ -2314,41 +2568,19 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, emitDebugLocEntry(Streamer, Entry, CU); } -// Emit the common part of the DWARF 5 range/locations list tables header. -static void emitListsTableHeaderStart(AsmPrinter *Asm, - MCSymbol *TableStart, - MCSymbol *TableEnd) { - // Build the table header, which starts with the length field. - Asm->OutStreamer->AddComment("Length"); - Asm->EmitLabelDifference(TableEnd, TableStart, 4); - Asm->OutStreamer->EmitLabel(TableStart); - // Version number (DWARF v5 and later). - Asm->OutStreamer->AddComment("Version"); - Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion()); - // Address size. - Asm->OutStreamer->AddComment("Address size"); - Asm->emitInt8(Asm->MAI->getCodePointerSize()); - // Segment selector size. - Asm->OutStreamer->AddComment("Segment selector size"); - Asm->emitInt8(0); -} - // Emit the header of a DWARF 5 range list table list table. Returns the symbol // that designates the end of the table for the caller to emit when the table is // complete. static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, const DwarfFile &Holder) { - MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start"); - MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end"); - emitListsTableHeaderStart(Asm, TableStart, TableEnd); + MCSymbol *TableEnd = mcdwarf::emitListsTableHeaderStart(*Asm->OutStreamer); Asm->OutStreamer->AddComment("Offset entry count"); Asm->emitInt32(Holder.getRangeLists().size()); - Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym()); + Asm->OutStreamer->emitLabel(Holder.getRnglistsTableBaseSym()); for (const RangeSpanList &List : Holder.getRangeLists()) - Asm->EmitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), - 4); + Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), 4); return TableEnd; } @@ -2358,18 +2590,16 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, // complete. static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm, const DwarfDebug &DD) { - MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start"); - MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end"); - emitListsTableHeaderStart(Asm, TableStart, TableEnd); + MCSymbol *TableEnd = mcdwarf::emitListsTableHeaderStart(*Asm->OutStreamer); const auto &DebugLocs = DD.getDebugLocs(); Asm->OutStreamer->AddComment("Offset entry count"); Asm->emitInt32(DebugLocs.getLists().size()); - Asm->OutStreamer->EmitLabel(DebugLocs.getSym()); + Asm->OutStreamer->emitLabel(DebugLocs.getSym()); for (const auto &List : DebugLocs.getLists()) - Asm->EmitLabelDifference(List.Label, DebugLocs.getSym(), 4); + Asm->emitLabelDifference(List.Label, DebugLocs.getSym(), 4); return TableEnd; } @@ -2387,7 +2617,7 @@ static void emitRangeList( bool UseDwarf5 = DD.getDwarfVersion() >= 5; // Emit our symbol so we can find the beginning of the range. - Asm->OutStreamer->EmitLabel(Sym); + Asm->OutStreamer->emitLabel(Sym); // Gather all the ranges that apply to the same section so they can share // a base address entry. @@ -2406,9 +2636,9 @@ static void emitRangeList( if (!UseDwarf5) { Base = NewBase; BaseIsSet = true; - Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->emitIntValue(-1, Size); Asm->OutStreamer->AddComment(" base address"); - Asm->OutStreamer->EmitSymbolValue(Base, Size); + Asm->OutStreamer->emitSymbolValue(Base, Size); } else if (NewBase != Begin || P.second.size() > 1) { // Only use a base address if // * the existing pool address doesn't match (NewBase != Begin) @@ -2418,13 +2648,13 @@ static void emitRangeList( Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx)); Asm->emitInt8(BaseAddressx); Asm->OutStreamer->AddComment(" base address index"); - Asm->EmitULEB128(DD.getAddressPool().getIndex(Base)); + Asm->emitULEB128(DD.getAddressPool().getIndex(Base)); } } else if (BaseIsSet && !UseDwarf5) { BaseIsSet = false; assert(!Base); - Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->emitIntValue(-1, Size); + Asm->OutStreamer->emitIntValue(0, Size); } for (const auto *RS : P.second) { @@ -2438,23 +2668,23 @@ static void emitRangeList( Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair)); Asm->emitInt8(OffsetPair); Asm->OutStreamer->AddComment(" starting offset"); - Asm->EmitLabelDifferenceAsULEB128(Begin, Base); + Asm->emitLabelDifferenceAsULEB128(Begin, Base); Asm->OutStreamer->AddComment(" ending offset"); - Asm->EmitLabelDifferenceAsULEB128(End, Base); + Asm->emitLabelDifferenceAsULEB128(End, Base); } else { - Asm->EmitLabelDifference(Begin, Base, Size); - Asm->EmitLabelDifference(End, Base, Size); + Asm->emitLabelDifference(Begin, Base, Size); + Asm->emitLabelDifference(End, Base, Size); } } else if (UseDwarf5) { Asm->OutStreamer->AddComment(StringifyEnum(StartxLength)); Asm->emitInt8(StartxLength); Asm->OutStreamer->AddComment(" start index"); - Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin)); + Asm->emitULEB128(DD.getAddressPool().getIndex(Begin)); Asm->OutStreamer->AddComment(" length"); - Asm->EmitLabelDifferenceAsULEB128(End, Begin); + Asm->emitLabelDifferenceAsULEB128(End, Begin); } else { - Asm->OutStreamer->EmitSymbolValue(Begin, Size); - Asm->OutStreamer->EmitSymbolValue(End, Size); + Asm->OutStreamer->emitSymbolValue(Begin, Size); + Asm->OutStreamer->emitSymbolValue(End, Size); } EmitPayload(*RS); } @@ -2465,8 +2695,8 @@ static void emitRangeList( Asm->emitInt8(EndOfList); } else { // Terminate the list with two 0 values. - Asm->OutStreamer->EmitIntValue(0, Size); - Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->emitIntValue(0, Size); + Asm->OutStreamer->emitIntValue(0, Size); } } @@ -2496,7 +2726,7 @@ void DwarfDebug::emitDebugLocImpl(MCSection *Sec) { emitLocList(*this, Asm, List); if (TableEnd) - Asm->OutStreamer->EmitLabel(TableEnd); + Asm->OutStreamer->emitLabel(TableEnd); } // Emit locations into the .debug_loc/.debug_loclists section. @@ -2519,7 +2749,7 @@ void DwarfDebug::emitDebugLocDWO() { for (const auto &List : DebugLocs.getLists()) { Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocDWOSection()); - Asm->OutStreamer->EmitLabel(List.Label); + Asm->OutStreamer->emitLabel(List.Label); for (const auto &Entry : DebugLocs.getEntries(List)) { // GDB only supports startx_length in pre-standard split-DWARF. @@ -2527,14 +2757,15 @@ void DwarfDebug::emitDebugLocDWO() { // offset_pair, so the implementations can't really share much since they // need to use different representations) // * as of October 2018, at least - // Ideally/in v5, this could use SectionLabels to reuse existing addresses - // in the address pool to minimize object size/relocations. + // + // In v5 (see emitLocList), this uses SectionLabels to reuse existing + // addresses in the address pool to minimize object size/relocations. Asm->emitInt8(dwarf::DW_LLE_startx_length); unsigned idx = AddrPool.getIndex(Entry.Begin); - Asm->EmitULEB128(idx); + Asm->emitULEB128(idx); // Also the pre-standard encoding is slightly different, emitting this as // an address-length entry here, but its a ULEB128 in DWARFv5 loclists. - Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4); + Asm->emitLabelDifference(Entry.End, Entry.Begin, 4); emitDebugLocEntryLocation(Entry, List.CU); } Asm->emitInt8(dwarf::DW_LLE_end_of_list); @@ -2679,11 +2910,11 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer->emitFill(Padding, 0xff); for (const ArangeSpan &Span : List) { - Asm->EmitLabelReference(Span.Start, PtrSize); + Asm->emitLabelReference(Span.Start, PtrSize); // Calculate the size as being from the span start to it's end. if (Span.End) { - Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize); + Asm->emitLabelDifference(Span.End, Span.Start, PtrSize); } else { // For symbols without an end marker (e.g. common), we // write a single arange entry containing just that one symbol. @@ -2691,13 +2922,13 @@ void DwarfDebug::emitDebugARanges() { if (Size == 0) Size = 1; - Asm->OutStreamer->EmitIntValue(Size, PtrSize); + Asm->OutStreamer->emitIntValue(Size, PtrSize); } } Asm->OutStreamer->AddComment("ARange terminator"); - Asm->OutStreamer->EmitIntValue(0, PtrSize); - Asm->OutStreamer->EmitIntValue(0, PtrSize); + Asm->OutStreamer->emitIntValue(0, PtrSize); + Asm->OutStreamer->emitIntValue(0, PtrSize); } } @@ -2733,7 +2964,7 @@ void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section emitRangeList(*this, Asm, List); if (TableEnd) - Asm->OutStreamer->EmitLabel(TableEnd); + Asm->OutStreamer->emitLabel(TableEnd); } /// Emit address ranges into the .debug_ranges section or into the DWARF v5 @@ -2752,6 +2983,27 @@ void DwarfDebug::emitDebugRangesDWO() { Asm->getObjFileLowering().getDwarfRnglistsDWOSection()); } +/// Emit the header of a DWARF 5 macro section. +static void emitMacroHeader(AsmPrinter *Asm, const DwarfDebug &DD, + const DwarfCompileUnit &CU) { + enum HeaderFlagMask { +#define HANDLE_MACRO_FLAG(ID, NAME) MACRO_FLAG_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + }; + uint8_t Flags = 0; + Asm->OutStreamer->AddComment("Macro information version"); + Asm->emitInt16(5); + // We are setting Offset and line offset flags unconditionally here, + // since we're only supporting DWARF32 and line offset should be mostly + // present. + // FIXME: Add support for DWARF64. + Flags |= MACRO_FLAG_DEBUG_LINE_OFFSET; + Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present"); + Asm->emitInt8(Flags); + Asm->OutStreamer->AddComment("debug_line_offset"); + Asm->OutStreamer->emitSymbolValue(CU.getLineTableStartSym(), /*Size=*/4); +} + void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { for (auto *MN : Nodes) { if (auto *M = dyn_cast<DIMacro>(MN)) @@ -2764,26 +3016,72 @@ void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { } void DwarfDebug::emitMacro(DIMacro &M) { - Asm->EmitULEB128(M.getMacinfoType()); - Asm->EmitULEB128(M.getLine()); StringRef Name = M.getName(); StringRef Value = M.getValue(); - Asm->OutStreamer->EmitBytes(Name); - if (!Value.empty()) { - // There should be one space between macro name and macro value. - Asm->emitInt8(' '); - Asm->OutStreamer->EmitBytes(Value); + bool UseMacro = getDwarfVersion() >= 5; + + if (UseMacro) { + unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define + ? dwarf::DW_MACRO_define_strx + : dwarf::DW_MACRO_undef_strx; + Asm->OutStreamer->AddComment(dwarf::MacroString(Type)); + Asm->emitULEB128(Type); + Asm->OutStreamer->AddComment("Line Number"); + Asm->emitULEB128(M.getLine()); + Asm->OutStreamer->AddComment("Macro String"); + if (!Value.empty()) + Asm->emitULEB128(this->InfoHolder.getStringPool() + .getIndexedEntry(*Asm, (Name + " " + Value).str()) + .getIndex()); + else + // DW_MACRO_undef_strx doesn't have a value, so just emit the macro + // string. + Asm->emitULEB128(this->InfoHolder.getStringPool() + .getIndexedEntry(*Asm, (Name).str()) + .getIndex()); + } else { + Asm->OutStreamer->AddComment(dwarf::MacinfoString(M.getMacinfoType())); + Asm->emitULEB128(M.getMacinfoType()); + Asm->OutStreamer->AddComment("Line Number"); + Asm->emitULEB128(M.getLine()); + Asm->OutStreamer->AddComment("Macro String"); + Asm->OutStreamer->emitBytes(Name); + if (!Value.empty()) { + // There should be one space between macro name and macro value. + Asm->emitInt8(' '); + Asm->OutStreamer->AddComment("Macro Value="); + Asm->OutStreamer->emitBytes(Value); + } + Asm->emitInt8('\0'); } - Asm->emitInt8('\0'); +} + +void DwarfDebug::emitMacroFileImpl( + DIMacroFile &F, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile, + StringRef (*MacroFormToString)(unsigned Form)) { + + Asm->OutStreamer->AddComment(MacroFormToString(StartFile)); + Asm->emitULEB128(StartFile); + Asm->OutStreamer->AddComment("Line Number"); + Asm->emitULEB128(F.getLine()); + Asm->OutStreamer->AddComment("File Number"); + Asm->emitULEB128(U.getOrCreateSourceID(F.getFile())); + handleMacroNodes(F.getElements(), U); + Asm->OutStreamer->AddComment(MacroFormToString(EndFile)); + Asm->emitULEB128(EndFile); } void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { + // DWARFv5 macro and DWARFv4 macinfo share some common encodings, + // so for readibility/uniformity, We are explicitly emitting those. assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file); - Asm->EmitULEB128(dwarf::DW_MACINFO_start_file); - Asm->EmitULEB128(F.getLine()); - Asm->EmitULEB128(U.getOrCreateSourceID(F.getFile())); - handleMacroNodes(F.getElements(), U); - Asm->EmitULEB128(dwarf::DW_MACINFO_end_file); + bool UseMacro = getDwarfVersion() >= 5; + if (UseMacro) + emitMacroFileImpl(F, U, dwarf::DW_MACRO_start_file, + dwarf::DW_MACRO_end_file, dwarf::MacroString); + else + emitMacroFileImpl(F, U, dwarf::DW_MACINFO_start_file, + dwarf::DW_MACINFO_end_file, dwarf::MacinfoString); } void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) { @@ -2796,20 +3094,28 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) { if (Macros.empty()) continue; Asm->OutStreamer->SwitchSection(Section); - Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); + Asm->OutStreamer->emitLabel(U.getMacroLabelBegin()); + if (getDwarfVersion() >= 5) + emitMacroHeader(Asm, *this, U); handleMacroNodes(Macros, U); Asm->OutStreamer->AddComment("End Of Macro List Mark"); Asm->emitInt8(0); } } -/// Emit macros into a debug macinfo section. +/// Emit macros into a debug macinfo/macro section. void DwarfDebug::emitDebugMacinfo() { - emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoSection()); + auto &ObjLower = Asm->getObjFileLowering(); + emitDebugMacinfoImpl(getDwarfVersion() >= 5 + ? ObjLower.getDwarfMacroSection() + : ObjLower.getDwarfMacinfoSection()); } void DwarfDebug::emitDebugMacinfoDWO() { - emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoDWOSection()); + auto &ObjLower = Asm->getObjFileLowering(); + emitDebugMacinfoImpl(getDwarfVersion() >= 5 + ? ObjLower.getDwarfMacroDWOSection() + : ObjLower.getDwarfMacinfoDWOSection()); } // DWARF5 Experimental Separate Dwarf emitters. @@ -2819,7 +3125,6 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, if (!CompilationDir.empty()) NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - addGnuPubAttributes(*NewU, Die); SkeletonHolder.addUnit(std::move(NewU)); @@ -3073,3 +3378,8 @@ uint16_t DwarfDebug::getDwarfVersion() const { const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) { return SectionLabels.find(S)->second; } +void DwarfDebug::insertSectionLabel(const MCSymbol *S) { + if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second) + if (useSplitDwarf() || getDwarfVersion() >= 5) + AddrPool.getIndex(S); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index f90dd48458ea..ad2f2f3edd8e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -49,7 +49,6 @@ namespace llvm { class AsmPrinter; class ByteStreamer; -class DebugLocEntry; class DIE; class DwarfCompileUnit; class DwarfExpression; @@ -59,7 +58,6 @@ class LexicalScope; class MachineFunction; class MCSection; class MCSymbol; -class MDNode; class Module; //===----------------------------------------------------------------------===// @@ -327,7 +325,7 @@ class DwarfDebug : public DebugHandlerBase { const MachineFunction *CurFn = nullptr; /// If nonnull, stores the CU in which the previous subprogram was contained. - const DwarfCompileUnit *PrevCU; + const DwarfCompileUnit *PrevCU = nullptr; /// As an optimization, there is no need to emit an entry in the directory /// table for the same directory as DW_AT_comp_dir. @@ -386,6 +384,11 @@ class DwarfDebug : public DebugHandlerBase { /// a monolithic sequence of string offsets. bool UseSegmentedStringOffsetsTable; + /// Enable production of call site parameters needed to print the debug entry + /// values. Useful for testing purposes when a debugger does not support the + /// feature yet. + bool EmitDebugEntryValues; + /// Separated Dwarf Variables /// In general these will all be for bits that are left in the /// original object file, rather than things that are meant @@ -442,6 +445,9 @@ class DwarfDebug : public DebugHandlerBase { /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); + /// Construct a DIE for the subprogram definition \p SP and return it. + DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP); + /// Construct DIEs for call site entries describing the calls in \p MF. void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, DIE &ScopeDIE, const MachineFunction &MF); @@ -520,6 +526,9 @@ class DwarfDebug : public DebugHandlerBase { void emitDebugMacinfoImpl(MCSection *Section); void emitMacro(DIMacro &M); void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U); + void emitMacroFileImpl(DIMacroFile &F, DwarfCompileUnit &U, + unsigned StartFile, unsigned EndFile, + StringRef (*MacroFormToString)(unsigned Form)); void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U); /// DWARF 5 Experimental Split Dwarf Emitters @@ -583,8 +592,10 @@ class DwarfDebug : public DebugHandlerBase { /// function that describe the same variable. If the resulting /// list has only one entry that is valid for entire variable's /// scope return true. - bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, - const DbgValueHistoryMap::Entries &Entries); + bool buildLocationList( + SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::Entries &Entries, + DenseSet<const MachineBasicBlock *> &VeryLargeBlocks); /// Collect variable information from the side table maintained by MF. void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU, @@ -631,7 +642,6 @@ public: void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &Die, const DICompositeType *CTy); - friend class NonTypeUnitContext; class NonTypeUnitContext { DwarfDebug *DD; decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; @@ -705,6 +715,10 @@ public: return UseSegmentedStringOffsetsTable; } + bool emitDebugEntryValues() const { + return EmitDebugEntryValues; + } + bool shareAcrossDWOCUs() const; /// Returns the Dwarf Version. @@ -765,6 +779,7 @@ public: void addSectionLabel(const MCSymbol *Sym); const MCSymbol *getSectionLabel(const MCSection *S); + void insertSectionLabel(const MCSymbol *S); static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, const DbgValueLoc &Value, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 24bbf58b91ec..c2956380438f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -66,6 +66,9 @@ public: void beginFragment(const MachineBasicBlock *MBB, ExceptionSymbolProvider ESP) override; + + void beginBasicBlock(const MachineBasicBlock &MBB) override; + void endBasicBlock(const MachineBasicBlock &MBB) override; }; class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 310647f15a5e..d4762121d105 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -40,12 +40,12 @@ void DwarfExpression::emitConstu(uint64_t Value) { } void DwarfExpression::addReg(int DwarfReg, const char *Comment) { - assert(DwarfReg >= 0 && "invalid negative dwarf register number"); - assert((isUnknownLocation() || isRegisterLocation()) && - "location description already locked down"); - LocationKind = Register; - if (DwarfReg < 32) { - emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + assert((isUnknownLocation() || isRegisterLocation()) && + "location description already locked down"); + LocationKind = Register; + if (DwarfReg < 32) { + emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); } else { emitOp(dwarf::DW_OP_regx, Comment); emitUnsigned(DwarfReg); @@ -100,7 +100,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg, unsigned MaxSize) { if (!llvm::Register::isPhysicalRegister(MachineReg)) { if (isFrameRegister(TRI, MachineReg)) { - DwarfRegs.push_back({-1, 0, nullptr}); + DwarfRegs.push_back(Register::createRegister(-1, nullptr)); return true; } return false; @@ -110,7 +110,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, // If this is a valid register number, emit it. if (Reg >= 0) { - DwarfRegs.push_back({Reg, 0, nullptr}); + DwarfRegs.push_back(Register::createRegister(Reg, nullptr)); return true; } @@ -122,7 +122,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg); unsigned Size = TRI.getSubRegIdxSize(Idx); unsigned RegOffset = TRI.getSubRegIdxOffset(Idx); - DwarfRegs.push_back({Reg, 0, "super-register"}); + DwarfRegs.push_back(Register::createRegister(Reg, "super-register")); // Use a DW_OP_bit_piece to describe the sub-register. setSubRegisterPiece(Size, RegOffset); return true; @@ -149,8 +149,8 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, if (Reg < 0) continue; - // Intersection between the bits we already emitted and the bits - // covered by this subregister. + // Used to build the intersection between the bits we already + // emitted and the bits covered by this subregister. SmallBitVector CurSubReg(RegSize, false); CurSubReg.set(Offset, Offset + Size); @@ -159,10 +159,13 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, if (Offset < MaxSize && CurSubReg.test(Coverage)) { // Emit a piece for any gap in the coverage. if (Offset > CurPos) - DwarfRegs.push_back( - {-1, Offset - CurPos, "no DWARF register encoding"}); - DwarfRegs.push_back( - {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"}); + DwarfRegs.push_back(Register::createSubRegister( + -1, Offset - CurPos, "no DWARF register encoding")); + if (Offset == 0 && Size >= MaxSize) + DwarfRegs.push_back(Register::createRegister(Reg, "sub-register")); + else + DwarfRegs.push_back(Register::createSubRegister( + Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register")); } // Mark it as emitted. Coverage.set(Offset, Offset + Size); @@ -173,7 +176,8 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, return false; // Found a partial or complete DWARF encoding. if (CurPos < RegSize) - DwarfRegs.push_back({-1, RegSize - CurPos, "no DWARF register encoding"}); + DwarfRegs.push_back(Register::createSubRegister( + -1, RegSize - CurPos, "no DWARF register encoding")); return true; } @@ -233,8 +237,17 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // If the register can only be described by a complex expression (i.e., // multiple subregisters) it doesn't safely compose with another complex // expression. For example, it is not possible to apply a DW_OP_deref - // operation to multiple DW_OP_pieces. - if (HasComplexExpression && DwarfRegs.size() > 1) { + // operation to multiple DW_OP_pieces, since composite location descriptions + // do not push anything on the DWARF stack. + // + // DW_OP_entry_value operations can only hold a DWARF expression or a + // register location description, so we can't emit a single entry value + // covering a composite location description. In the future we may want to + // emit entry value operations for each register location in the composite + // location, but until that is supported do not emit anything. + if ((HasComplexExpression || IsEmittingEntryValue) && DwarfRegs.size() > 1) { + if (IsEmittingEntryValue) + cancelEntryValue(); DwarfRegs.clear(); LocationKind = Unknown; return false; @@ -244,18 +257,19 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // a call site parameter expression and if that expression is just a register // location, emit it with addBReg and offset 0, because we should emit a DWARF // expression representing a value, rather than a location. - if (!isMemoryLocation() && !HasComplexExpression && (!isParameterValue() || - isEntryValue())) { + if (!isMemoryLocation() && !HasComplexExpression && + (!isParameterValue() || isEntryValue())) { for (auto &Reg : DwarfRegs) { if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); - addOpPiece(Reg.Size); + addOpPiece(Reg.SubRegSize); } if (isEntryValue()) finalizeEntryValue(); - if (isEntryValue() && !isParameterValue() && DwarfVersion >= 4) + if (isEntryValue() && !isIndirect() && !isParameterValue() && + DwarfVersion >= 4) emitOp(dwarf::DW_OP_stack_value); DwarfRegs.clear(); @@ -276,7 +290,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, auto Reg = DwarfRegs[0]; bool FBReg = isFrameRegister(TRI, MachineReg); int SignedOffset = 0; - assert(Reg.Size == 0 && "subregister has same size as superregister"); + assert(!Reg.isSubRegister() && "full register expected"); // Pattern-match combinations for which more efficient representations exist. // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset]. @@ -314,6 +328,25 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return true; } +void DwarfExpression::setEntryValueFlags(const MachineLocation &Loc) { + LocationFlags |= EntryValue; + if (Loc.isIndirect()) + LocationFlags |= Indirect; +} + +void DwarfExpression::setLocation(const MachineLocation &Loc, + const DIExpression *DIExpr) { + if (Loc.isIndirect()) + // Do not treat entry value descriptions of indirect parameters as memory + // locations. This allows DwarfExpression::addReg() to add DW_OP_regN to an + // entry value description. + if (!DIExpr->isEntryValue()) + setMemoryLocationKind(); + + if (DIExpr->isEntryValue()) + setEntryValueFlags(Loc); +} + void DwarfExpression::beginEntryValueExpression( DIExpressionCursor &ExprCursor) { auto Op = ExprCursor.take(); @@ -325,7 +358,6 @@ void DwarfExpression::beginEntryValueExpression( assert(Op->getArg(0) == 1 && "Can currently only emit entry values covering a single operation"); - emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value)); IsEmittingEntryValue = true; enableTemporaryBuffer(); } @@ -334,6 +366,8 @@ void DwarfExpression::finalizeEntryValue() { assert(IsEmittingEntryValue && "Entry value not open?"); disableTemporaryBuffer(); + emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value)); + // Emit the entry value's size operand. unsigned Size = getTemporaryBufferSize(); emitUnsigned(Size); @@ -344,7 +378,35 @@ void DwarfExpression::finalizeEntryValue() { IsEmittingEntryValue = false; } -/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?". +void DwarfExpression::cancelEntryValue() { + assert(IsEmittingEntryValue && "Entry value not open?"); + disableTemporaryBuffer(); + + // The temporary buffer can't be emptied, so for now just assert that nothing + // has been emitted to it. + assert(getTemporaryBufferSize() == 0 && + "Began emitting entry value block before cancelling entry value"); + + IsEmittingEntryValue = false; +} + +unsigned DwarfExpression::getOrCreateBaseType(unsigned BitSize, + dwarf::TypeKind Encoding) { + // Reuse the base_type if we already have one in this CU otherwise we + // create a new one. + unsigned I = 0, E = CU.ExprRefedBaseTypes.size(); + for (; I != E; ++I) + if (CU.ExprRefedBaseTypes[I].BitSize == BitSize && + CU.ExprRefedBaseTypes[I].Encoding == Encoding) + break; + + if (I == E) + CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding); + return I; +} + +/// Assuming a well-formed expression, match "DW_OP_deref* +/// DW_OP_LLVM_fragment?". static bool isMemoryLocation(DIExpressionCursor ExprCursor) { while (ExprCursor) { auto Op = ExprCursor.take(); @@ -361,6 +423,10 @@ static bool isMemoryLocation(DIExpressionCursor ExprCursor) { void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, unsigned FragmentOffsetInBits) { + // Entry values can currently only cover the initial register location, + // and not any other parts of the following DWARF expression. + assert(!IsEmittingEntryValue && "Can't emit entry value around expression"); + // If we need to mask out a subregister, do it now, unless the next // operation would emit an OpPiece anyway. auto N = ExprCursor.peek(); @@ -431,6 +497,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, case dwarf::DW_OP_lit0: case dwarf::DW_OP_not: case dwarf::DW_OP_dup: + case dwarf::DW_OP_push_object_address: emitOp(OpNum); break; case dwarf::DW_OP_deref: @@ -451,24 +518,13 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1)); if (DwarfVersion >= 5) { emitOp(dwarf::DW_OP_convert); - // Reuse the base_type if we already have one in this CU otherwise we - // create a new one. - unsigned I = 0, E = CU.ExprRefedBaseTypes.size(); - for (; I != E; ++I) - if (CU.ExprRefedBaseTypes[I].BitSize == BitSize && - CU.ExprRefedBaseTypes[I].Encoding == Encoding) - break; - - if (I == E) - CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding); - // If targeting a location-list; simply emit the index into the raw // byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been // fitted with means to extract it later. // If targeting a inlined DW_AT_location; insert a DIEBaseTypeRef // (containing the index and a resolve mechanism during emit) into the // DIE value list. - emitBaseTypeRef(I); + emitBaseTypeRef(getOrCreateBaseType(BitSize, Encoding)); } else { if (PrevConvertOp && PrevConvertOp->getArg(0) < BitSize) { if (Encoding == dwarf::DW_ATE_signed) @@ -573,10 +629,10 @@ void DwarfExpression::emitLegacyZExt(unsigned FromBits) { emitOp(dwarf::DW_OP_and); } -void DwarfExpression::addWasmLocation(unsigned Index, int64_t Offset) { +void DwarfExpression::addWasmLocation(unsigned Index, uint64_t Offset) { assert(LocationKind == Implicit || LocationKind == Unknown); LocationKind = Implicit; emitOp(dwarf::DW_OP_WASM_location); emitUnsigned(Index); - emitSigned(Offset); + emitUnsigned(Offset); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 46c07b1d5b6b..757b17511453 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -30,6 +30,7 @@ class APInt; class DwarfCompileUnit; class DIELoc; class TargetRegisterInfo; +class MachineLocation; /// Holds a DIExpression and keeps track of how many operands have been consumed /// so far. @@ -107,8 +108,21 @@ protected: /// Holds information about all subregisters comprising a register location. struct Register { int DwarfRegNo; - unsigned Size; + unsigned SubRegSize; const char *Comment; + + /// Create a full register, no extra DW_OP_piece operators necessary. + static Register createRegister(int RegNo, const char *Comment) { + return {RegNo, 0, Comment}; + } + + /// Create a subregister that needs a DW_OP_piece operator with SizeInBits. + static Register createSubRegister(int RegNo, unsigned SizeInBits, + const char *Comment) { + return {RegNo, SizeInBits, Comment}; + } + + bool isSubRegister() const { return SubRegSize; } }; /// Whether we are currently emitting an entry value operation. @@ -129,37 +143,31 @@ protected: /// The kind of location description being produced. enum { Unknown = 0, Register, Memory, Implicit }; - /// The flags of location description being produced. - enum { EntryValue = 1, CallSiteParamValue }; + /// Additional location flags which may be combined with any location kind. + /// Currently, entry values are not supported for the Memory location kind. + enum { EntryValue = 1 << 0, Indirect = 1 << 1, CallSiteParamValue = 1 << 2 }; unsigned LocationKind : 3; - unsigned LocationFlags : 2; + unsigned LocationFlags : 3; unsigned DwarfVersion : 4; public: - bool isUnknownLocation() const { - return LocationKind == Unknown; - } + /// Set the location (\p Loc) and \ref DIExpression (\p DIExpr) to describe. + void setLocation(const MachineLocation &Loc, const DIExpression *DIExpr); - bool isMemoryLocation() const { - return LocationKind == Memory; - } + bool isUnknownLocation() const { return LocationKind == Unknown; } - bool isRegisterLocation() const { - return LocationKind == Register; - } + bool isMemoryLocation() const { return LocationKind == Memory; } - bool isImplicitLocation() const { - return LocationKind == Implicit; - } + bool isRegisterLocation() const { return LocationKind == Register; } - bool isEntryValue() const { - return LocationFlags & EntryValue; - } + bool isImplicitLocation() const { return LocationKind == Implicit; } - bool isParameterValue() { - return LocationFlags & CallSiteParamValue; - } + bool isEntryValue() const { return LocationFlags & EntryValue; } + + bool isIndirect() const { return LocationFlags & Indirect; } + + bool isParameterValue() { return LocationFlags & CallSiteParamValue; } Optional<uint8_t> TagOffset; @@ -209,7 +217,8 @@ protected: /// Return whether the given machine register is the frame register in the /// current function. - virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0; + virtual bool isFrameRegister(const TargetRegisterInfo &TRI, + unsigned MachineReg) = 0; /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF /// register location description. @@ -267,6 +276,9 @@ protected: /// DWARF block which has been emitted to the temporary buffer. void finalizeEntryValue(); + /// Cancel the emission of an entry value. + void cancelEntryValue(); + ~DwarfExpression() = default; public: @@ -294,14 +306,10 @@ public: } /// Lock this down to become an entry value location. - void setEntryValueFlag() { - LocationFlags |= EntryValue; - } + void setEntryValueFlags(const MachineLocation &Loc); /// Lock this down to become a call site parameter location. - void setCallSiteParamValueFlag() { - LocationFlags |= CallSiteParamValue; - } + void setCallSiteParamValueFlag() { LocationFlags |= CallSiteParamValue; } /// Emit a machine register location. As an optimization this may also consume /// the prefix of a DwarfExpression if a more efficient representation for @@ -323,6 +331,10 @@ public: /// any operands here. void beginEntryValueExpression(DIExpressionCursor &ExprCursor); + /// Return the index of a base type with the given properties and + /// create one if necessary. + unsigned getOrCreateBaseType(unsigned BitSize, dwarf::TypeKind Encoding); + /// Emit all remaining operations in the DIExpressionCursor. /// /// \param FragmentOffsetInBits If this is one fragment out of multiple @@ -340,7 +352,7 @@ public: /// Emit location information expressed via WebAssembly location + offset /// The Index is an identifier for locals, globals or operand stack. - void addWasmLocation(unsigned Index, int64_t Offset); + void addWasmLocation(unsigned Index, uint64_t Offset); }; /// DwarfExpression implementation for .debug_loc entries. @@ -374,6 +386,7 @@ class DebugLocDwarfExpression final : public DwarfExpression { bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; + public: DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS, DwarfCompileUnit &CU) @@ -403,6 +416,7 @@ class DIEDwarfExpression final : public DwarfExpression { bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; + public: DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index e5c4db58f477..812e6383288f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -53,7 +53,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { Asm->emitDwarfDIE(TheU->getUnitDie()); if (MCSymbol *EndLabel = TheU->getEndLabel()) - Asm->OutStreamer->EmitLabel(EndLabel); + Asm->OutStreamer->emitLabel(EndLabel); } // Compute the size and offset for each DIE. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index 2a76dcb1b082..a43929d8e8f7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -71,7 +71,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, // referenced by most unit headers via DW_AT_str_offsets_base. // Split units do not use the attribute. if (StartSym) - Asm.OutStreamer->EmitLabel(StartSym); + Asm.OutStreamer->emitLabel(StartSym); } void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, @@ -100,12 +100,12 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, // Emit a label for reference from debug information entries. if (ShouldCreateSymbols) - Asm.OutStreamer->EmitLabel(Entry->getValue().Symbol); + Asm.OutStreamer->emitLabel(Entry->getValue().Symbol); // Emit the string itself with a terminating null byte. Asm.OutStreamer->AddComment("string offset=" + Twine(Entry->getValue().Offset)); - Asm.OutStreamer->EmitBytes( + Asm.OutStreamer->emitBytes( StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1)); } @@ -125,6 +125,6 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, if (UseRelativeOffsets) Asm.emitDwarfStringOffset(Entry->getValue()); else - Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size); + Asm.OutStreamer->emitIntValue(Entry->getValue().Offset, size); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 53747aef77fd..e958f38e486b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -188,8 +188,9 @@ int64_t DwarfUnit::getDefaultLowerBound() const { /// Check whether the DIE for this MDNode can be shared across CUs. bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { - // When the MDNode can be part of the type system, the DIE can be shared - // across CUs. + // When the MDNode can be part of the type system (this includes subprogram + // declarations *and* subprogram definitions, even local definitions), the + // DIE must be shared across CUs. // Combining type units and cross-CU DIE sharing is lower value (since // cross-CU DIE sharing is used in LTO and removes type redundancy at that // level already) but may be implementable for some value in projects @@ -197,9 +198,7 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { // together. if (isDwoUnit() && !DD->shareAcrossDWOCUs()) return false; - return (isa<DIType>(D) || - (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && - !DD->generateTypeUnits(); + return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits(); } DIE *DwarfUnit::getDIE(const DINode *D) const { @@ -1046,6 +1045,8 @@ void DwarfUnit::constructTemplateTypeParameterDIE( addType(ParamDIE, TP->getType()); if (!TP->getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, TP->getName()); + if (TP->isDefault() && (DD->getDwarfVersion() >= 5)) + addFlag(ParamDIE, dwarf::DW_AT_default_value); } void DwarfUnit::constructTemplateValueParameterDIE( @@ -1058,6 +1059,8 @@ void DwarfUnit::constructTemplateValueParameterDIE( addType(ParamDIE, VP->getType()); if (!VP->getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, VP->getName()); + if (VP->isDefault() && (DD->getDwarfVersion() >= 5)) + addFlag(ParamDIE, dwarf::DW_AT_default_value); if (Metadata *Val = VP->getValue()) { if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) addConstantValue(ParamDIE, CI, VP->getType()); @@ -1123,8 +1126,13 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) { M->getConfigurationMacros()); if (!M->getIncludePath().empty()) addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath()); - if (!M->getSysRoot().empty()) - addString(MDie, dwarf::DW_AT_LLVM_sysroot, M->getSysRoot()); + if (!M->getAPINotesFile().empty()) + addString(MDie, dwarf::DW_AT_LLVM_apinotes, M->getAPINotesFile()); + if (M->getFile()) + addUInt(MDie, dwarf::DW_AT_decl_file, None, + getOrCreateSourceID(M->getFile())); + if (M->getLineNo()) + addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo()); return &MDie; } @@ -1166,6 +1174,14 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE *DeclDie = nullptr; StringRef DeclLinkageName; if (auto *SPDecl = SP->getDeclaration()) { + DITypeRefArray DeclArgs, DefinitionArgs; + DeclArgs = SPDecl->getType()->getTypeArray(); + DefinitionArgs = SP->getType()->getTypeArray(); + + if (DeclArgs.size() && DefinitionArgs.size()) + if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0]) + addType(SPDie, DefinitionArgs[0]); + DeclDie = getDIE(SPDecl); assert(DeclDie && "This DIE should've already been constructed when the " "definition DIE was created in " @@ -1333,20 +1349,40 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, // C/C++. The Count value is the number of elements. Values are 64 bit. If // Count == -1 then the array is unbounded and we do not emit // DW_AT_lower_bound and DW_AT_count attributes. - int64_t LowerBound = SR->getLowerBound(); int64_t DefaultLowerBound = getDefaultLowerBound(); int64_t Count = -1; if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>()) Count = CI->getSExtValue(); - if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) - addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); + auto addBoundTypeEntry = [&](dwarf::Attribute Attr, + DISubrange::BoundType Bound) -> void { + if (auto *BV = Bound.dyn_cast<DIVariable *>()) { + if (auto *VarDIE = getDIE(BV)) + addDIEEntry(DW_Subrange, Attr, *VarDIE); + } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(BE); + addBlock(DW_Subrange, Attr, DwarfExpr.finalize()); + } else if (auto *BI = Bound.dyn_cast<ConstantInt *>()) { + if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 || + BI->getSExtValue() != DefaultLowerBound) + addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue()); + } + }; + + addBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound()); if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) { if (auto *CountVarDIE = getDIE(CV)) addDIEEntry(DW_Subrange, dwarf::DW_AT_count, *CountVarDIE); } else if (Count != -1) addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count); + + addBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound()); + + addBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride()); } DIE *DwarfUnit::getIndexTyDie() { @@ -1398,6 +1434,17 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { CTy->getSizeInBits() / CHAR_BIT); } + if (DIVariable *Var = CTy->getDataLocation()) { + if (auto *VarDIE = getDIE(Var)) + addDIEEntry(Buffer, dwarf::DW_AT_data_location, *VarDIE); + } else if (DIExpression *Expr = CTy->getDataLocationExp()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(Expr); + addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize()); + } + // Emit the element type. addType(Buffer, CTy->getBaseType()); @@ -1438,8 +1485,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); StringRef Name = Enum->getName(); addString(Enumerator, dwarf::DW_AT_name, Name); - auto Value = static_cast<uint64_t>(Enum->getValue()); - addConstantValue(Enumerator, IsUnsigned, Value); + addConstantValue(Enumerator, Enum->getValue(), IsUnsigned); if (IndexEnumerators) addGlobalName(Name, Enumerator, Context); } @@ -1623,8 +1669,8 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_"; MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start"); EndLabel = Asm->createTempSymbol(Prefix + "end"); - Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); - Asm->OutStreamer->EmitLabel(BeginLabel); + Asm->emitLabelDifference(EndLabel, BeginLabel, 4); + Asm->OutStreamer->emitLabel(BeginLabel); } else Asm->emitInt32(getHeaderSize() + getUnitDie().getSize()); @@ -1662,10 +1708,10 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) { DD->useSplitDwarf() ? dwarf::DW_UT_split_type : dwarf::DW_UT_type); Asm->OutStreamer->AddComment("Type Signature"); - Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature)); + Asm->OutStreamer->emitIntValue(TypeSignature, sizeof(TypeSignature)); Asm->OutStreamer->AddComment("Type DIE Offset"); // In a skeleton type unit there is no type DIE so emit a zero offset. - Asm->OutStreamer->EmitIntValue(Ty ? Ty->getOffset() : 0, + Asm->OutStreamer->emitIntValue(Ty ? Ty->getOffset() : 0, sizeof(Ty->getOffset())); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 46c52a1faf4b..34f3a34ed336 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -27,7 +27,6 @@ namespace llvm { -class MachineLocation; class MachineOperand; class ConstantInt; class ConstantFP; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 31dfaaac836e..99ee4567fa58 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -426,18 +426,18 @@ MCSymbol *EHStreamer::emitExceptionTable() { // EHABI). In this case LSDASection will be NULL. if (LSDASection) Asm->OutStreamer->SwitchSection(LSDASection); - Asm->EmitAlignment(Align(4)); + Asm->emitAlignment(Align(4)); // Emit the LSDA. MCSymbol *GCCETSym = Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+ Twine(Asm->getFunctionNumber())); - Asm->OutStreamer->EmitLabel(GCCETSym); - Asm->OutStreamer->EmitLabel(Asm->getCurExceptionSym()); + Asm->OutStreamer->emitLabel(GCCETSym); + Asm->OutStreamer->emitLabel(Asm->getCurExceptionSym()); // Emit the LSDA header. - Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); - Asm->EmitEncodingByte(TTypeEncoding, "@TType"); + Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + Asm->emitEncodingByte(TTypeEncoding, "@TType"); MCSymbol *TTBaseLabel = nullptr; if (HaveTTData) { @@ -447,8 +447,8 @@ MCSymbol *EHStreamer::emitExceptionTable() { // the type table. See PR35809 or GNU as bug 4029. MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref"); TTBaseLabel = Asm->createTempSymbol("ttbase"); - Asm->EmitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); - Asm->OutStreamer->EmitLabel(TTBaseRefLabel); + Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); + Asm->OutStreamer->emitLabel(TTBaseRefLabel); } bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); @@ -456,9 +456,9 @@ MCSymbol *EHStreamer::emitExceptionTable() { // Emit the landing pad call site table. MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin"); MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end"); - Asm->EmitEncodingByte(CallSiteEncoding, "Call site"); - Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); - Asm->OutStreamer->EmitLabel(CstBeginLabel); + Asm->emitEncodingByte(CallSiteEncoding, "Call site"); + Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); + Asm->OutStreamer->emitLabel(CstBeginLabel); // SjLj / Wasm Exception handling if (IsSJLJ || IsWasm) { @@ -472,7 +472,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<"); Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx)); } - Asm->EmitULEB128(idx); + Asm->emitULEB128(idx); // Offset of the first associated action record, relative to the start of // the action table. This value is biased by 1 (1 indicates the start of @@ -484,7 +484,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { Asm->OutStreamer->AddComment(" Action: " + Twine((S.Action - 1) / 2 + 1)); } - Asm->EmitULEB128(S.Action); + Asm->emitULEB128(S.Action); } } else { // Itanium LSDA exception handling @@ -524,23 +524,23 @@ MCSymbol *EHStreamer::emitExceptionTable() { // Offset of the call site relative to the start of the procedure. if (VerboseAsm) Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); - Asm->EmitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding); + Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding); if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" Call between ") + BeginLabel->getName() + " and " + EndLabel->getName()); - Asm->EmitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding); + Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding); // Offset of the landing pad relative to the start of the procedure. if (!S.LPad) { if (VerboseAsm) Asm->OutStreamer->AddComment(" has no landing pad"); - Asm->EmitCallSiteValue(0, CallSiteEncoding); + Asm->emitCallSiteValue(0, CallSiteEncoding); } else { if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" jumps to ") + S.LPad->LandingPadLabel->getName()); - Asm->EmitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym, + Asm->emitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym, CallSiteEncoding); } @@ -554,10 +554,10 @@ MCSymbol *EHStreamer::emitExceptionTable() { Asm->OutStreamer->AddComment(" On action: " + Twine((S.Action - 1) / 2 + 1)); } - Asm->EmitULEB128(S.Action); + Asm->emitULEB128(S.Action); } } - Asm->OutStreamer->EmitLabel(CstEndLabel); + Asm->OutStreamer->emitLabel(CstEndLabel); // Emit the Action Table. int Entry = 0; @@ -584,7 +584,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { else Asm->OutStreamer->AddComment(" Cleanup"); } - Asm->EmitSLEB128(Action.ValueForTypeID); + Asm->emitSLEB128(Action.ValueForTypeID); // Action Record // @@ -598,15 +598,15 @@ MCSymbol *EHStreamer::emitExceptionTable() { Asm->OutStreamer->AddComment(" Continue to action "+Twine(NextAction)); } } - Asm->EmitSLEB128(Action.NextAction); + Asm->emitSLEB128(Action.NextAction); } if (HaveTTData) { - Asm->EmitAlignment(Align(4)); + Asm->emitAlignment(Align(4)); emitTypeInfos(TTypeEncoding, TTBaseLabel); } - Asm->EmitAlignment(Align(4)); + Asm->emitAlignment(Align(4)); return GCCETSym; } @@ -629,10 +629,10 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { TypeInfos.rend())) { if (VerboseAsm) Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); - Asm->EmitTTypeReference(GV, TTypeEncoding); + Asm->emitTTypeReference(GV, TTypeEncoding); } - Asm->OutStreamer->EmitLabel(TTBaseLabel); + Asm->OutStreamer->emitLabel(TTBaseLabel); // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { @@ -649,6 +649,6 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry)); } - Asm->EmitULEB128(TypeID); + Asm->emitULEB128(TypeID); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 3849644d1584..59a84e6f2d7b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -72,7 +72,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, **/ // Align to address width. - AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); + AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); // Emit PointCount. OS.AddComment("safe point count"); @@ -84,7 +84,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, // Emit the address of the safe point. OS.AddComment("safe point address"); MCSymbol *Label = PI->Label; - AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/); + AP.emitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/); } // Stack information never change in safe points! Only print info from the diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index b4eda5fa8c58..8fa83f515910 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -66,8 +66,8 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr); - AP.OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global); - AP.OutStreamer->EmitLabel(Sym); + AP.OutStreamer->emitSymbolAttribute(Sym, MCSA_Global); + AP.OutStreamer->emitLabel(Sym); } void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, @@ -106,7 +106,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, EmitCamlGlobal(M, AP, "data_end"); // FIXME: Why does ocaml emit this?? - AP.OutStreamer->EmitIntValue(0, IntPtrSize); + AP.OutStreamer->emitIntValue(0, IntPtrSize); AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "frametable"); @@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, report_fatal_error(" Too much descriptor for ocaml GC"); } AP.emitInt16(NumDescriptors); - AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); + AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), IE = Info.funcinfo_end(); @@ -164,7 +164,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, Twine(LiveCount) + " >= 65536."); } - AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize); + AP.OutStreamer->emitSymbolValue(J->Label, IntPtrSize); AP.emitInt16(FrameSize); AP.emitInt16(LiveCount); @@ -180,7 +180,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.emitInt16(K->StackOffset); } - AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); + AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp index 444b0ed17b6d..baef4d2cc849 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -27,7 +27,7 @@ void WasmException::endModule() { Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout()); if (Asm->OutContext.lookupSymbol(NameStr)) { MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception"); - Asm->OutStreamer->EmitLabel(ExceptionSym); + Asm->OutStreamer->emitLabel(ExceptionSym); } } @@ -58,7 +58,7 @@ void WasmException::endFunction(const MachineFunction *MF) { // end marker and set the size as the difference between the start end the end // marker. MCSymbol *LSDAEndLabel = Asm->createTempSymbol("GCC_except_table_end"); - Asm->OutStreamer->EmitLabel(LSDAEndLabel); + Asm->OutStreamer->emitLabel(LSDAEndLabel); MCContext &OutContext = Asm->OutStreamer->getContext(); const MCExpr *SizeExp = MCBinaryExpr::createSub( MCSymbolRefExpr::create(LSDAEndLabel, OutContext), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index 0398675577cd..cd8077e7d548 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -203,11 +204,11 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // We want our funclet's entry point to be aligned such that no nops will be // present after the label. - Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()), + Asm->emitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()), &F); // Now that we've emitted the alignment directive, point at our funclet. - Asm->OutStreamer->EmitLabel(Sym); + Asm->OutStreamer->emitLabel(Sym); } // Mark 'Sym' as starting our funclet. @@ -276,7 +277,7 @@ void WinException::endFuncletImpl() { StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( Twine("$cppxdata$", FuncLinkageName)); - Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); + Asm->OutStreamer->emitValue(create32bitRef(FuncInfoXData), 4); } else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() && !CurrentFuncletEntry->isEHFuncletEntry()) { // If this is the parent function in Win64 SEH, emit the LSDA immediately @@ -336,7 +337,7 @@ const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf, int WinException::getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo) { const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering(); - unsigned UnusedReg; + Register UnusedReg; if (Asm->MAI->usesWindowsCFI()) { int Offset = TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg, @@ -566,7 +567,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); const MCExpr *MCOffset = MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); - Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + Asm->OutStreamer->emitAssignment(ParentFrameOffset, MCOffset); } // Use the assembler to compute the number of table entries through label @@ -579,9 +580,9 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx); const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx); AddComment("Number of call sites"); - OS.EmitValue(EntryCount, 4); + OS.emitValue(EntryCount, 4); - OS.EmitLabel(TableBegin); + OS.emitLabel(TableBegin); // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only // models exceptions from invokes. LLVM also allows arbitrary reordering of @@ -609,7 +610,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { LastEHState = StateChange.NewState; } - OS.EmitLabel(TableEnd); + OS.emitLabel(TableEnd); } void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, @@ -641,14 +642,14 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, } AddComment("LabelStart"); - OS.EmitValue(getLabel(BeginLabel), 4); + OS.emitValue(getLabel(BeginLabel), 4); AddComment("LabelEnd"); - OS.EmitValue(getLabel(EndLabel), 4); + OS.emitValue(getLabel(EndLabel), 4); AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction" : "CatchAll"); - OS.EmitValue(FilterOrFinally, 4); + OS.emitValue(FilterOrFinally, 4); AddComment(UME.IsFinally ? "Null" : "ExceptionHandler"); - OS.EmitValue(ExceptOrNull, 4); + OS.emitValue(ExceptOrNull, 4); assert(UME.ToState < State && "states should decrease"); State = UME.ToState; @@ -713,55 +714,55 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // EHFlags & 1 -> Synchronous exceptions only, no async exceptions. // EHFlags & 2 -> ??? // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue. - OS.EmitValueToAlignment(4); - OS.EmitLabel(FuncInfoXData); + OS.emitValueToAlignment(4); + OS.emitLabel(FuncInfoXData); AddComment("MagicNumber"); - OS.EmitIntValue(0x19930522, 4); + OS.emitInt32(0x19930522); AddComment("MaxState"); - OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4); + OS.emitInt32(FuncInfo.CxxUnwindMap.size()); AddComment("UnwindMap"); - OS.EmitValue(create32bitRef(UnwindMapXData), 4); + OS.emitValue(create32bitRef(UnwindMapXData), 4); AddComment("NumTryBlocks"); - OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); + OS.emitInt32(FuncInfo.TryBlockMap.size()); AddComment("TryBlockMap"); - OS.EmitValue(create32bitRef(TryBlockMapXData), 4); + OS.emitValue(create32bitRef(TryBlockMapXData), 4); AddComment("IPMapEntries"); - OS.EmitIntValue(IPToStateTable.size(), 4); + OS.emitInt32(IPToStateTable.size()); AddComment("IPToStateXData"); - OS.EmitValue(create32bitRef(IPToStateXData), 4); + OS.emitValue(create32bitRef(IPToStateXData), 4); if (Asm->MAI->usesWindowsCFI()) { AddComment("UnwindHelp"); - OS.EmitIntValue(UnwindHelpOffset, 4); + OS.emitInt32(UnwindHelpOffset); } AddComment("ESTypeList"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); AddComment("EHFlags"); - OS.EmitIntValue(1, 4); + OS.emitInt32(1); // UnwindMapEntry { // int32_t ToState; // void (*Action)(); // }; if (UnwindMapXData) { - OS.EmitLabel(UnwindMapXData); + OS.emitLabel(UnwindMapXData); for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) { MCSymbol *CleanupSym = getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>()); AddComment("ToState"); - OS.EmitIntValue(UME.ToState, 4); + OS.emitInt32(UME.ToState); AddComment("Action"); - OS.EmitValue(create32bitRef(CleanupSym), 4); + OS.emitValue(create32bitRef(CleanupSym), 4); } } @@ -773,7 +774,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // HandlerType *HandlerArray; // }; if (TryBlockMapXData) { - OS.EmitLabel(TryBlockMapXData); + OS.emitLabel(TryBlockMapXData); SmallVector<MCSymbol *, 1> HandlerMaps; for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; @@ -795,19 +796,19 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { "bad trymap interval"); AddComment("TryLow"); - OS.EmitIntValue(TBME.TryLow, 4); + OS.emitInt32(TBME.TryLow); AddComment("TryHigh"); - OS.EmitIntValue(TBME.TryHigh, 4); + OS.emitInt32(TBME.TryHigh); AddComment("CatchHigh"); - OS.EmitIntValue(TBME.CatchHigh, 4); + OS.emitInt32(TBME.CatchHigh); AddComment("NumCatches"); - OS.EmitIntValue(TBME.HandlerArray.size(), 4); + OS.emitInt32(TBME.HandlerArray.size()); AddComment("HandlerArray"); - OS.EmitValue(create32bitRef(HandlerMapXData), 4); + OS.emitValue(create32bitRef(HandlerMapXData), 4); } // All funclets use the same parent frame offset currently. @@ -829,7 +830,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // void (*Handler)(); // int32_t ParentFrameOffset; // x64 and AArch64 only // }; - OS.EmitLabel(HandlerMapXData); + OS.emitLabel(HandlerMapXData); for (const WinEHHandlerType &HT : TBME.HandlerArray) { // Get the frame escape label with the offset of the catch object. If // the index is INT_MAX, then there is no catch object, and we should @@ -847,20 +848,20 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>()); AddComment("Adjectives"); - OS.EmitIntValue(HT.Adjectives, 4); + OS.emitInt32(HT.Adjectives); AddComment("Type"); - OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); + OS.emitValue(create32bitRef(HT.TypeDescriptor), 4); AddComment("CatchObjOffset"); - OS.EmitValue(FrameAllocOffsetRef, 4); + OS.emitValue(FrameAllocOffsetRef, 4); AddComment("Handler"); - OS.EmitValue(create32bitRef(HandlerSym), 4); + OS.emitValue(create32bitRef(HandlerSym), 4); if (shouldEmitPersonality) { AddComment("ParentFrameOffset"); - OS.EmitIntValue(ParentFrameOffset, 4); + OS.emitInt32(ParentFrameOffset); } } } @@ -871,12 +872,12 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // int32_t State; // }; if (IPToStateXData) { - OS.EmitLabel(IPToStateXData); + OS.emitLabel(IPToStateXData); for (auto &IPStatePair : IPToStateTable) { AddComment("IP"); - OS.EmitValue(IPStatePair.first, 4); + OS.emitValue(IPStatePair.first, 4); AddComment("ToState"); - OS.EmitIntValue(IPStatePair.second, 4); + OS.emitInt32(IPStatePair.second); } } } @@ -956,7 +957,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, MCContext &Ctx = Asm->OutContext; MCSymbol *ParentFrameOffset = Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); - Asm->OutStreamer->EmitAssignment(ParentFrameOffset, + Asm->OutStreamer->emitAssignment(ParentFrameOffset, MCConstantExpr::create(Offset, Ctx)); } @@ -979,8 +980,8 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { // Emit the __ehtable label that we use for llvm.x86.seh.lsda. MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName); - OS.EmitValueToAlignment(4); - OS.EmitLabel(LSDALabel); + OS.emitValueToAlignment(4); + OS.emitLabel(LSDALabel); const auto *Per = cast<Function>(F.getPersonalityFn()->stripPointerCasts()); StringRef PerName = Per->getName(); @@ -1011,7 +1012,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { int GSCookieOffset = -2; const MachineFrameInfo &MFI = MF->getFrameInfo(); if (MFI.hasStackProtectorIndex()) { - unsigned UnusedReg; + Register UnusedReg; const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); int SSPIdx = MFI.getStackProtectorIndex(); GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg); @@ -1021,20 +1022,20 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { // TODO(etienneb): Get rid of this value and change it for and assertion. int EHCookieOffset = 9999; if (FuncInfo.EHGuardFrameIndex != INT_MAX) { - unsigned UnusedReg; + Register UnusedReg; const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); int EHGuardIdx = FuncInfo.EHGuardFrameIndex; EHCookieOffset = TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg); } AddComment("GSCookieOffset"); - OS.EmitIntValue(GSCookieOffset, 4); + OS.emitInt32(GSCookieOffset); AddComment("GSCookieXOROffset"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); AddComment("EHCookieOffset"); - OS.EmitIntValue(EHCookieOffset, 4); + OS.emitInt32(EHCookieOffset); AddComment("EHCookieXOROffset"); - OS.EmitIntValue(0, 4); + OS.emitInt32(0); BaseState = -2; } @@ -1047,11 +1048,11 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { // _except_handler4 it's -2. Do that replacement here if necessary. int ToState = UME.ToState == -1 ? BaseState : UME.ToState; AddComment("ToState"); - OS.EmitIntValue(ToState, 4); + OS.emitInt32(ToState); AddComment(UME.IsFinally ? "Null" : "FilterFunction"); - OS.EmitValue(create32bitRef(UME.Filter), 4); + OS.emitValue(create32bitRef(UME.Filter), 4); AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler"); - OS.EmitValue(create32bitRef(ExceptOrFinally), 4); + OS.emitValue(create32bitRef(ExceptOrFinally), 4); } } @@ -1124,9 +1125,9 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) { // Write out a sentinel indicating the end of the standard (Windows) xdata // and the start of the additional (CLR) info. - OS.EmitIntValue(0xffffffff, 4); + OS.emitInt32(0xffffffff); // Write out the number of funclets - OS.EmitIntValue(NumStates, 4); + OS.emitInt32(NumStates); // Walk the machine blocks/instrs, computing and emitting a few things: // 1. Emit a list of the offsets to each handler entry, in lexical order. @@ -1164,7 +1165,7 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) { } // Emit the function/funclet end and, if this is a funclet (and not the // root function), record it in the EndSymbolMap. - OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4); + OS.emitValue(getOffset(EndSymbol, FuncBeginSym), 4); if (FuncletState != NullState) { // Record the end of the handler. EndSymbolMap[FuncletState] = EndSymbol; @@ -1217,7 +1218,7 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) { } // Now emit the clause info, starting with the number of clauses. - OS.EmitIntValue(Clauses.size(), 4); + OS.emitInt32(Clauses.size()); for (ClrClause &Clause : Clauses) { // Emit a CORINFO_EH_CLAUSE : /* @@ -1299,18 +1300,18 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) { assert(Clause.EnclosingState > MinClauseMap[Clause.State]); Flags |= 8; } - OS.EmitIntValue(Flags, 4); + OS.emitInt32(Flags); // Write the clause start/end - OS.EmitValue(ClauseBegin, 4); - OS.EmitValue(ClauseEnd, 4); + OS.emitValue(ClauseBegin, 4); + OS.emitValue(ClauseEnd, 4); // Write out the handler start/end - OS.EmitValue(HandlerBegin, 4); - OS.EmitValue(HandlerEnd, 4); + OS.emitValue(HandlerBegin, 4); + OS.emitValue(HandlerEnd, 4); // Write out the type token or filter offset assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters"); - OS.EmitIntValue(Entry.TypeToken, 4); + OS.emitInt32(Entry.TypeToken); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h index dc5036302131..8bd5d1bc6d2a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -16,12 +16,10 @@ #include "EHStreamer.h" namespace llvm { -class Function; class GlobalValue; class MachineFunction; class MCExpr; class MCSection; -class Value; struct WinEHFuncInfo; class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp index 37a50cde6391..a5030305435c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -89,7 +89,7 @@ namespace { AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); - void expandPartwordCmpXchg(AtomicCmpXchgInst *I); + bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); @@ -105,7 +105,7 @@ namespace { bool isIdempotentRMW(AtomicRMWInst *RMWI); bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); - bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align, + bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, AtomicOrdering Ordering2, @@ -152,47 +152,15 @@ static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); } -// Helper functions to retrieve the alignment of atomic instructions. -static unsigned getAtomicOpAlign(LoadInst *LI) { - unsigned Align = LI->getAlignment(); - // In the future, if this IR restriction is relaxed, we should - // return DataLayout::getABITypeAlignment when there's no align - // value. - assert(Align != 0 && "An atomic LoadInst always has an explicit alignment"); - return Align; -} - -static unsigned getAtomicOpAlign(StoreInst *SI) { - unsigned Align = SI->getAlignment(); - // In the future, if this IR restriction is relaxed, we should - // return DataLayout::getABITypeAlignment when there's no align - // value. - assert(Align != 0 && "An atomic StoreInst always has an explicit alignment"); - return Align; -} - -static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) { - // TODO(PR27168): This instruction has no alignment attribute, but unlike the - // default alignment for load/store, the default here is to assume - // it has NATURAL alignment, not DataLayout-specified alignment. - const DataLayout &DL = RMWI->getModule()->getDataLayout(); - return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); -} - -static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) { - // TODO(PR27168): same comment as above. - const DataLayout &DL = CASI->getModule()->getDataLayout(); - return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); -} - // Determine if a particular atomic operation has a supported size, // and is of appropriate alignment, to be passed through for target // lowering. (Versus turning into a __atomic libcall) template <typename Inst> static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); - return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; + Align Alignment = I->getAlign(); + return Alignment >= Size && + Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; } bool AtomicExpand::runOnFunction(Function &F) { @@ -383,7 +351,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); - NewLI->setAlignment(MaybeAlign(LI->getAlignment())); + NewLI->setAlignment(LI->getAlign()); NewLI->setVolatile(LI->isVolatile()); NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); @@ -470,7 +438,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); - NewSI->setAlignment(MaybeAlign(SI->getAlignment())); + NewSI->setAlignment(SI->getAlign()); NewSI->setVolatile(SI->isVolatile()); NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); @@ -570,8 +538,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(AI); if (ValueSize < MinCASSize) { - llvm_unreachable( - "MinCmpXchgSizeInBits not yet supported for LL/SC architectures."); + expandPartwordAtomicRMW(AI, + TargetLoweringBase::AtomicExpansionKind::LLSC); } else { auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { return performAtomicOp(AI->getOperation(), Builder, Loaded, @@ -608,16 +576,43 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { namespace { -/// Result values from createMaskInstrs helper. struct PartwordMaskValues { - Type *WordType; - Type *ValueType; - Value *AlignedAddr; - Value *ShiftAmt; - Value *Mask; - Value *Inv_Mask; + // These three fields are guaranteed to be set by createMaskInstrs. + Type *WordType = nullptr; + Type *ValueType = nullptr; + Value *AlignedAddr = nullptr; + // The remaining fields can be null. + Value *ShiftAmt = nullptr; + Value *Mask = nullptr; + Value *Inv_Mask = nullptr; }; +LLVM_ATTRIBUTE_UNUSED +raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { + auto PrintObj = [&O](auto *V) { + if (V) + O << *V; + else + O << "nullptr"; + O << '\n'; + }; + O << "PartwordMaskValues {\n"; + O << " WordType: "; + PrintObj(PMV.WordType); + O << " ValueType: "; + PrintObj(PMV.ValueType); + O << " AlignedAddr: "; + PrintObj(PMV.AlignedAddr); + O << " ShiftAmt: "; + PrintObj(PMV.ShiftAmt); + O << " Mask: "; + PrintObj(PMV.Mask); + O << " Inv_Mask: "; + PrintObj(PMV.Inv_Mask); + O << "}\n"; + return O; +} + } // end anonymous namespace /// This is a helper function which builds instructions to provide @@ -638,48 +633,74 @@ struct PartwordMaskValues { /// Inv_Mask: The inverse of Mask. static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, - unsigned WordSize) { - PartwordMaskValues Ret; + unsigned MinWordSize) { + PartwordMaskValues PMV; - BasicBlock *BB = I->getParent(); - Function *F = BB->getParent(); Module *M = I->getModule(); - - LLVMContext &Ctx = F->getContext(); + LLVMContext &Ctx = M->getContext(); const DataLayout &DL = M->getDataLayout(); - unsigned ValueSize = DL.getTypeStoreSize(ValueType); - assert(ValueSize < WordSize); + PMV.ValueType = ValueType; + PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) + : ValueType; + if (PMV.ValueType == PMV.WordType) { + PMV.AlignedAddr = Addr; + return PMV; + } - Ret.ValueType = ValueType; - Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8); + assert(ValueSize < MinWordSize); Type *WordPtrType = - Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); + PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); - Ret.AlignedAddr = Builder.CreateIntToPtr( - Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType, + PMV.AlignedAddr = Builder.CreateIntToPtr( + Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, "AlignedAddr"); - Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB"); + Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { // turn bytes into bits - Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3); + PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); } else { // turn bytes into bits, and count from the other side. - Ret.ShiftAmt = - Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3); + PMV.ShiftAmt = Builder.CreateShl( + Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); } - Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt"); - Ret.Mask = Builder.CreateShl( - ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt, + PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); + PMV.Mask = Builder.CreateShl( + ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, "Mask"); - Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask"); + PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); + return PMV; +} + +static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, + const PartwordMaskValues &PMV) { + assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); + if (PMV.WordType == PMV.ValueType) + return WideWord; + + Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); + Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted"); + return Trunc; +} - return Ret; +static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, + Value *Updated, const PartwordMaskValues &PMV) { + assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); + assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); + if (PMV.WordType == PMV.ValueType) + return Updated; + + Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); + Value *Shift = + Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); + Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); + Value *Or = Builder.CreateOr(And, Shift, "inserted"); + return Or; } /// Emit IR to implement a masked version of a given atomicrmw @@ -719,13 +740,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, // Finally, comparison ops will operate on the full value, so // truncate down to the original size, and expand out again after // doing the operation. - Value *Loaded_Shiftdown = Builder.CreateTrunc( - Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType); - Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc); - Value *NewVal_Shiftup = Builder.CreateShl( - Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); - Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); - Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup); + Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); + Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); + Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); return FinalVal; } default: @@ -738,12 +755,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, /// /// It will create an LL/SC or cmpxchg loop, as appropriate, the same /// way as a typical atomicrmw expansion. The only difference here is -/// that the operation inside of the loop must operate only upon a +/// that the operation inside of the loop may operate upon only a /// part of the value. void AtomicExpand::expandPartwordAtomicRMW( AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { - assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg); - AtomicOrdering MemOpOrder = AI->getOrdering(); IRBuilder<> Builder(AI); @@ -761,13 +776,18 @@ void AtomicExpand::expandPartwordAtomicRMW( ValOperand_Shifted, AI->getValOperand(), PMV); }; - // TODO: When we're ready to support LLSC conversions too, use - // insertRMWLLSCLoop here for ExpansionKind==LLSC. - Value *OldResult = - insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, - PerformPartwordOp, createCmpXchgInstFun); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + Value *OldResult; + if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { + OldResult = + insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, + PerformPartwordOp, createCmpXchgInstFun); + } else { + assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); + OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, + MemOpOrder, PerformPartwordOp); + } + + Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } @@ -800,14 +820,13 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, AI->getOrdering()); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); return NewAI; } -void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { +bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { // The basic idea here is that we're expanding a cmpxchg of a // smaller memory size up to a word-sized cmpxchg. To do this, we // need to add a retry-loop for strong cmpxchg, so that @@ -923,14 +942,14 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { // partword.cmpxchg.end: Builder.SetInsertPoint(CI); - Value *FinalOldVal = Builder.CreateTrunc( - Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Res = Builder.CreateInsertValue(Res, Success, 1); CI->replaceAllUsesWith(Res); CI->eraseFromParent(); + return true; } void AtomicExpand::expandAtomicOpToLLSC( @@ -965,8 +984,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, AI->getOrdering()); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } @@ -987,9 +1005,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, CI->getSuccessOrdering()); - Value *FinalOldVal = Builder.CreateTrunc( - Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); - + Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Value *Success = Builder.CreateICmpEQ( @@ -1126,24 +1142,28 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // // The full expansion we produce is: // [...] + // %aligned.addr = ... // cmpxchg.start: - // %unreleasedload = @load.linked(%addr) - // %should_store = icmp eq %unreleasedload, %desired - // br i1 %should_store, label %cmpxchg.fencedstore, + // %unreleasedload = @load.linked(%aligned.addr) + // %unreleasedload.extract = extract value from %unreleasedload + // %should_store = icmp eq %unreleasedload.extract, %desired + // br i1 %should_store, label %cmpxchg.releasingstore, // label %cmpxchg.nostore // cmpxchg.releasingstore: // fence? // br label cmpxchg.trystore // cmpxchg.trystore: - // %loaded.trystore = phi [%unreleasedload, %releasingstore], + // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], // [%releasedload, %cmpxchg.releasedload] - // %stored = @store_conditional(%new, %addr) + // %updated.new = insert %new into %loaded.trystore + // %stored = @store_conditional(%updated.new, %aligned.addr) // %success = icmp eq i32 %stored, 0 // br i1 %success, label %cmpxchg.success, // label %cmpxchg.releasedload/%cmpxchg.failure // cmpxchg.releasedload: - // %releasedload = @load.linked(%addr) - // %should_store = icmp eq %releasedload, %desired + // %releasedload = @load.linked(%aligned.addr) + // %releasedload.extract = extract value from %releasedload + // %should_store = icmp eq %releasedload.extract, %desired // br i1 %should_store, label %cmpxchg.trystore, // label %cmpxchg.failure // cmpxchg.success: @@ -1159,9 +1179,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // fence? // br label %cmpxchg.end // cmpxchg.end: - // %loaded = phi [%loaded.nostore, %cmpxchg.failure], - // [%loaded.trystore, %cmpxchg.trystore] + // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], + // [%loaded.trystore, %cmpxchg.trystore] // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %loaded = extract value from %loaded.exit // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] @@ -1187,13 +1208,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(BB); if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) TLI->emitLeadingFence(Builder, CI, SuccessOrder); + + PartwordMaskValues PMV = + createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, + TLI->getMinCmpXchgSizeInBits() / 8); Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(StartBB); - Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *UnreleasedLoad = + TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *UnreleasedLoadExtract = + extractMaskedValue(Builder, UnreleasedLoad, PMV); Value *ShouldStore = Builder.CreateICmpEQ( - UnreleasedLoad, CI->getCompareOperand(), "should_store"); + UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). @@ -1205,8 +1233,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = TLI->emitStoreConditional( - Builder, CI->getNewValOperand(), Addr, MemOpOrder); + PHINode *LoadedTryStore = + Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); + LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); + Value *NewValueInsert = + insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); + Value *StoreSuccess = + TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; @@ -1216,13 +1249,16 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; if (HasReleasedLoadBB) { - SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(), - "should_store"); + SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); + ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, + CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + // Update PHI node in TryStoreBB. + LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); } else Builder.CreateUnreachable(); @@ -1234,6 +1270,12 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); + PHINode *LoadedNoStore = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); + LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); + if (HasReleasedLoadBB) + LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); + // In the failing case, where we don't execute the store-conditional, the // target might want to balance out the load-linked with a dedicated // instruction (e.g., on ARM, clearing the exclusive monitor). @@ -1241,6 +1283,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(FailureBB); Builder.SetInsertPoint(FailureBB); + PHINode *LoadedFailure = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); + LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); + if (CI->isWeak()) + LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); if (ShouldInsertFencesForAtomic) TLI->emitTrailingFence(Builder, CI, FailureOrder); Builder.CreateBr(ExitBB); @@ -1250,32 +1297,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate // PHI. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + PHINode *LoadedExit = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); + LoadedExit->addIncoming(LoadedTryStore, SuccessBB); + LoadedExit->addIncoming(LoadedFailure, FailureBB); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); - // Setup the builder so we can create any PHIs we need. - Value *Loaded; - if (!HasReleasedLoadBB) - Loaded = UnreleasedLoad; - else { - Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin()); - PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB); - TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin()); - PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB); - NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(ExitBB, ++ExitBB->begin()); - PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB); - ExitLoaded->addIncoming(NoStoreLoaded, FailureBB); - - Loaded = ExitLoaded; - } + // This is the "exit value" from the cmpxchg expansion. It may be of + // a type wider than the one in the cmpxchg instruction. + Value *LoadedFull = LoadedExit; + + Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); + Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. @@ -1377,7 +1412,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( Builder.SetInsertPoint(BB); LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); // Atomics require at least natural alignment. - InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8)); + InitLoaded->setAlignment(Align(ResultTy->getPrimitiveSizeInBits() / 8)); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -1414,11 +1449,9 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); case TargetLoweringBase::AtomicExpansionKind::None: if (ValueSize < MinCASSize) - expandPartwordCmpXchg(CI); + return expandPartwordCmpXchg(CI); return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { - assert(ValueSize >= MinCASSize && - "MinCmpXchgSizeInBits not yet supported for LL/SC expansions."); return expandAtomicCmpXchg(CI); } case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: @@ -1449,7 +1482,7 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, // must be one of the potentially-specialized sizes, and the value // type must actually exist in C on the target (otherwise, the // function wouldn't actually be defined.) -static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, +static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL) { // TODO: "LargestSize" is an approximation for "largest type that // you can express in C". It seems to be the case that int128 is @@ -1459,7 +1492,7 @@ static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, // really be some more reliable way in LLVM of determining integer // sizes which are valid in the target's C ABI... unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; - return Align >= Size && + return Alignment >= Size && (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && Size <= LargestSize; } @@ -1469,10 +1502,9 @@ void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); bool expanded = expandAtomicOpToLibcall( - I, Size, Align, I->getPointerOperand(), nullptr, nullptr, + I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); (void)expanded; assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load"); @@ -1483,11 +1515,10 @@ void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); bool expanded = expandAtomicOpToLibcall( - I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr, - I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); + I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), + nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); (void)expanded; assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store"); } @@ -1498,10 +1529,9 @@ void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); bool expanded = expandAtomicOpToLibcall( - I, Size, Align, I->getPointerOperand(), I->getNewValOperand(), + I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), Libcalls); (void)expanded; @@ -1571,13 +1601,12 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); bool Success = false; if (!Libcalls.empty()) Success = expandAtomicOpToLibcall( - I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr, - I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); + I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), + nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); // The expansion failed: either there were no libcalls at all for // the operation (min/max), or there were only size-specialized @@ -1608,7 +1637,7 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { // 'I' are extracted from the Instruction subclass by the // caller. Depending on the particular call, some will be null. bool AtomicExpand::expandAtomicOpToLibcall( - Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand, + Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { assert(Libcalls.size() == 6); @@ -1619,10 +1648,10 @@ bool AtomicExpand::expandAtomicOpToLibcall( IRBuilder<> Builder(I); IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); - bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL); + bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); - unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy); + const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); // TODO: the "order" argument type is "int", not int32. So // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. @@ -1712,7 +1741,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'expected' argument, if present. if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); - AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment)); + AllocaCASExpected->setAlignment(AllocaAlignment); unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); AllocaCASExpected_i8 = @@ -1731,7 +1760,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( Args.push_back(IntValue); } else { AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); - AllocaValue->setAlignment(MaybeAlign(AllocaAlignment)); + AllocaValue->setAlignment(AllocaAlignment); AllocaValue_i8 = Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); @@ -1743,7 +1772,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'ret' argument. if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); - AllocaResult->setAlignment(MaybeAlign(AllocaAlignment)); + AllocaResult->setAlignment(AllocaAlignment); unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); AllocaResult_i8 = Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp new file mode 100644 index 000000000000..a35c4d813acc --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp @@ -0,0 +1,457 @@ +//===-- BBSectionsPrepare.cpp ---=========---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// BBSectionsPrepare implementation. +// +// The purpose of this pass is to assign sections to basic blocks when +// -fbasic-block-sections= option is used. Further, with profile information +// only the subset of basic blocks with profiles are placed in separate sections +// and the rest are grouped in a cold section. The exception handling blocks are +// treated specially to ensure they are all in one seciton. +// +// Basic Block Sections +// ==================== +// +// With option, -fbasic-block-sections=list, every function may be split into +// clusters of basic blocks. Every cluster will be emitted into a separate +// section with its basic blocks sequenced in the given order. To get the +// optimized performance, the clusters must form an optimal BB layout for the +// function. Every cluster's section is labeled with a symbol to allow the +// linker to reorder the sections in any arbitrary sequence. A global order of +// these sections would encapsulate the function layout. +// +// There are a couple of challenges to be addressed: +// +// 1. The last basic block of every cluster should not have any implicit +// fallthrough to its next basic block, as it can be reordered by the linker. +// The compiler should make these fallthroughs explicit by adding +// unconditional jumps.. +// +// 2. All inter-cluster branch targets would now need to be resolved by the +// linker as they cannot be calculated during compile time. This is done +// using static relocations. Further, the compiler tries to use short branch +// instructions on some ISAs for small branch offsets. This is not possible +// for inter-cluster branches as the offset is not determined at compile +// time, and therefore, long branch instructions have to be used for those. +// +// 3. Debug Information (DebugInfo) and Call Frame Information (CFI) emission +// needs special handling with basic block sections. DebugInfo needs to be +// emitted with more relocations as basic block sections can break a +// function into potentially several disjoint pieces, and CFI needs to be +// emitted per cluster. This also bloats the object file and binary sizes. +// +// Basic Block Labels +// ================== +// +// With -fbasic-block-sections=labels, or when a basic block is placed in a +// unique section, it is labelled with a symbol. This allows easy mapping of +// virtual addresses from PMU profiles back to the corresponding basic blocks. +// Since the number of basic blocks is large, the labeling bloats the symbol +// table sizes and the string table sizes significantly. While the binary size +// does increase, it does not affect performance as the symbol table is not +// loaded in memory during run-time. The string table size bloat is kept very +// minimal using a unary naming scheme that uses string suffix compression. The +// basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ... This +// turns out to be very good for string table sizes and the bloat in the string +// table size for a very large binary is ~8 %. The naming also allows using +// the --symbol-ordering-file option in LLD to arbitrarily reorder the +// sections. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Target/TargetMachine.h" + +using llvm::SmallSet; +using llvm::SmallVector; +using llvm::StringMap; +using llvm::StringRef; +using namespace llvm; + +namespace { + +// This struct represents the cluster information for a machine basic block. +struct BBClusterInfo { + // MachineBasicBlock ID. + unsigned MBBNumber; + // Cluster ID this basic block belongs to. + unsigned ClusterID; + // Position of basic block within the cluster. + unsigned PositionInCluster; +}; + +using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo, 4>>; + +class BBSectionsPrepare : public MachineFunctionPass { +public: + static char ID; + + // This contains the basic-block-sections profile. + const MemoryBuffer *MBuf = nullptr; + + // This encapsulates the BB cluster information for the whole program. + // + // For every function name, it contains the cluster information for (all or + // some of) its basic blocks. The cluster information for every basic block + // includes its cluster ID along with the position of the basic block in that + // cluster. + ProgramBBClusterInfoMapTy ProgramBBClusterInfo; + + // Some functions have alias names. We use this map to find the main alias + // name for which we have mapping in ProgramBBClusterInfo. + StringMap<StringRef> FuncAliasMap; + + BBSectionsPrepare(const MemoryBuffer *Buf) + : MachineFunctionPass(ID), MBuf(Buf) { + initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry()); + }; + + BBSectionsPrepare() : MachineFunctionPass(ID) { + initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Basic Block Sections Analysis"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Read profiles of basic blocks if available here. + bool doInitialization(Module &M) override; + + /// Identify basic blocks that need separate sections and prepare to emit them + /// accordingly. + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +char BBSectionsPrepare::ID = 0; +INITIALIZE_PASS(BBSectionsPrepare, "bbsections-prepare", + "Prepares for basic block sections, by splitting functions " + "into clusters of basic blocks.", + false, false) + +// This function updates and optimizes the branching instructions of every basic +// block in a given function to account for changes in the layout. +static void updateBranches( + MachineFunction &MF, + const SmallVector<MachineBasicBlock *, 4> &PreLayoutFallThroughs) { + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SmallVector<MachineOperand, 4> Cond; + for (auto &MBB : MF) { + auto NextMBBI = std::next(MBB.getIterator()); + auto *FTMBB = PreLayoutFallThroughs[MBB.getNumber()]; + // If this block had a fallthrough before we need an explicit unconditional + // branch to that block if either + // 1- the block ends a section, which means its next block may be + // reorderd by the linker, or + // 2- the fallthrough block is not adjacent to the block in the new + // order. + if (FTMBB && (MBB.isEndSection() || &*NextMBBI != FTMBB)) + TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc()); + + // We do not optimize branches for machine basic blocks ending sections, as + // their adjacent block might be reordered by the linker. + if (MBB.isEndSection()) + continue; + + // It might be possible to optimize branches by flipping the branch + // condition. + Cond.clear(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. + if (TII->analyzeBranch(MBB, TBB, FBB, Cond)) + continue; + MBB.updateTerminator(FTMBB); + } +} + +// This function provides the BBCluster information associated with a function. +// Returns true if a valid association exists and false otherwise. +static bool getBBClusterInfoForFunction( + const MachineFunction &MF, const StringMap<StringRef> FuncAliasMap, + const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, + std::vector<Optional<BBClusterInfo>> &V) { + // Get the main alias name for the function. + auto FuncName = MF.getName(); + auto R = FuncAliasMap.find(FuncName); + StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second; + + // Find the assoicated cluster information. + auto P = ProgramBBClusterInfo.find(AliasName); + if (P == ProgramBBClusterInfo.end()) + return false; + + if (P->second.empty()) { + // This indicates that sections are desired for all basic blocks of this + // function. We clear the BBClusterInfo vector to denote this. + V.clear(); + return true; + } + + V.resize(MF.getNumBlockIDs()); + for (auto bbClusterInfo : P->second) { + // Bail out if the cluster information contains invalid MBB numbers. + if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs()) + return false; + V[bbClusterInfo.MBBNumber] = bbClusterInfo; + } + return true; +} + +// This function sorts basic blocks according to the cluster's information. +// All explicitly specified clusters of basic blocks will be ordered +// accordingly. All non-specified BBs go into a separate "Cold" section. +// Additionally, if exception handling landing pads end up in more than one +// clusters, they are moved into a single "Exception" section. Eventually, +// clusters are ordered in increasing order of their IDs, with the "Exception" +// and "Cold" succeeding all other clusters. +// FuncBBClusterInfo represent the cluster information for basic blocks. If this +// is empty, it means unique sections for all basic blocks in the function. +static bool assignSectionsAndSortBasicBlocks( + MachineFunction &MF, + const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) { + assert(MF.hasBBSections() && "BB Sections is not set for function."); + // This variable stores the section ID of the cluster containing eh_pads (if + // all eh_pads are one cluster). If more than one cluster contain eh_pads, we + // set it equal to ExceptionSectionID. + Optional<MBBSectionID> EHPadsSectionID; + + for (auto &MBB : MF) { + // With the 'all' option, every basic block is placed in a unique section. + // With the 'list' option, every basic block is placed in a section + // associated with its cluster, unless we want individual unique sections + // for every basic block in this function (if FuncBBClusterInfo is empty). + if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All || + FuncBBClusterInfo.empty()) { + // If unique sections are desired for all basic blocks of the function, we + // set every basic block's section ID equal to its number (basic block + // id). This further ensures that basic blocks are ordered canonically. + MBB.setSectionID({static_cast<unsigned int>(MBB.getNumber())}); + } else if (FuncBBClusterInfo[MBB.getNumber()].hasValue()) + MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID); + else { + // BB goes into the special cold section if it is not specified in the + // cluster info map. + MBB.setSectionID(MBBSectionID::ColdSectionID); + } + + if (MBB.isEHPad() && EHPadsSectionID != MBB.getSectionID() && + EHPadsSectionID != MBBSectionID::ExceptionSectionID) { + // If we already have one cluster containing eh_pads, this must be updated + // to ExceptionSectionID. Otherwise, we set it equal to the current + // section ID. + EHPadsSectionID = EHPadsSectionID.hasValue() + ? MBBSectionID::ExceptionSectionID + : MBB.getSectionID(); + } + } + + // If EHPads are in more than one section, this places all of them in the + // special exception section. + if (EHPadsSectionID == MBBSectionID::ExceptionSectionID) + for (auto &MBB : MF) + if (MBB.isEHPad()) + MBB.setSectionID(EHPadsSectionID.getValue()); + + SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs( + MF.getNumBlockIDs()); + for (auto &MBB : MF) + PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); + + // We make sure that the cluster including the entry basic block precedes all + // other clusters. + auto EntryBBSectionID = MF.front().getSectionID(); + + // Helper function for ordering BB sections as follows: + // * Entry section (section including the entry block). + // * Regular sections (in increasing order of their Number). + // ... + // * Exception section + // * Cold section + auto MBBSectionOrder = [EntryBBSectionID](const MBBSectionID &LHS, + const MBBSectionID &RHS) { + // We make sure that the section containing the entry block precedes all the + // other sections. + if (LHS == EntryBBSectionID || RHS == EntryBBSectionID) + return LHS == EntryBBSectionID; + return LHS.Type == RHS.Type ? LHS.Number < RHS.Number : LHS.Type < RHS.Type; + }; + + // We sort all basic blocks to make sure the basic blocks of every cluster are + // contiguous and ordered accordingly. Furthermore, clusters are ordered in + // increasing order of their section IDs, with the exception and the + // cold section placed at the end of the function. + MF.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) { + auto XSectionID = X.getSectionID(); + auto YSectionID = Y.getSectionID(); + if (XSectionID != YSectionID) + return MBBSectionOrder(XSectionID, YSectionID); + // If the two basic block are in the same section, the order is decided by + // their position within the section. + if (XSectionID.Type == MBBSectionID::SectionType::Default) + return FuncBBClusterInfo[X.getNumber()]->PositionInCluster < + FuncBBClusterInfo[Y.getNumber()]->PositionInCluster; + return X.getNumber() < Y.getNumber(); + }); + + // Set IsBeginSection and IsEndSection according to the assigned section IDs. + MF.assignBeginEndSections(); + + // After reordering basic blocks, we must update basic block branches to + // insert explicit fallthrough branches when required and optimize branches + // when possible. + updateBranches(MF, PreLayoutFallThroughs); + + return true; +} + +bool BBSectionsPrepare::runOnMachineFunction(MachineFunction &MF) { + auto BBSectionsType = MF.getTarget().getBBSectionsType(); + assert(BBSectionsType != BasicBlockSection::None && + "BB Sections not enabled!"); + // Renumber blocks before sorting them for basic block sections. This is + // useful during sorting, basic blocks in the same section will retain the + // default order. This renumbering should also be done for basic block + // labels to match the profiles with the correct blocks. + MF.RenumberBlocks(); + + if (BBSectionsType == BasicBlockSection::Labels) { + MF.setBBSectionsType(BBSectionsType); + MF.createBBLabels(); + return true; + } + + std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo; + if (BBSectionsType == BasicBlockSection::List && + !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, + FuncBBClusterInfo)) + return true; + MF.setBBSectionsType(BBSectionsType); + MF.createBBLabels(); + assignSectionsAndSortBasicBlocks(MF, FuncBBClusterInfo); + return true; +} + +// Basic Block Sections can be enabled for a subset of machine basic blocks. +// This is done by passing a file containing names of functions for which basic +// block sections are desired. Additionally, machine basic block ids of the +// functions can also be specified for a finer granularity. Moreover, a cluster +// of basic blocks could be assigned to the same section. +// A file with basic block sections for all of function main and three blocks +// for function foo (of which 1 and 2 are placed in a cluster) looks like this: +// ---------------------------- +// list.txt: +// !main +// !foo +// !!1 2 +// !!4 +static Error getBBClusterInfo(const MemoryBuffer *MBuf, + ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, + StringMap<StringRef> &FuncAliasMap) { + assert(MBuf); + line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); + + auto invalidProfileError = [&](auto Message) { + return make_error<StringError>( + Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + + Twine(LineIt.line_number()) + ": " + Message), + inconvertibleErrorCode()); + }; + + auto FI = ProgramBBClusterInfo.end(); + + // Current cluster ID corresponding to this function. + unsigned CurrentCluster = 0; + // Current position in the current cluster. + unsigned CurrentPosition = 0; + + // Temporary set to ensure every basic block ID appears once in the clusters + // of a function. + SmallSet<unsigned, 4> FuncBBIDs; + + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef S(*LineIt); + if (S[0] == '@') + continue; + // Check for the leading "!" + if (!S.consume_front("!") || S.empty()) + break; + // Check for second "!" which indicates a cluster of basic blocks. + if (S.consume_front("!")) { + if (FI == ProgramBBClusterInfo.end()) + return invalidProfileError( + "Cluster list does not follow a function name specifier."); + SmallVector<StringRef, 4> BBIndexes; + S.split(BBIndexes, ' '); + // Reset current cluster position. + CurrentPosition = 0; + for (auto BBIndexStr : BBIndexes) { + unsigned long long BBIndex; + if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) + return invalidProfileError(Twine("Unsigned integer expected: '") + + BBIndexStr + "'."); + if (!FuncBBIDs.insert(BBIndex).second) + return invalidProfileError(Twine("Duplicate basic block id found '") + + BBIndexStr + "'."); + if (!BBIndex && CurrentPosition) + return invalidProfileError("Entry BB (0) does not begin a cluster."); + + FI->second.emplace_back(BBClusterInfo{ + ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); + } + CurrentCluster++; + } else { // This is a function name specifier. + // Function aliases are separated using '/'. We use the first function + // name for the cluster info mapping and delegate all other aliases to + // this one. + SmallVector<StringRef, 4> Aliases; + S.split(Aliases, '/'); + for (size_t i = 1; i < Aliases.size(); ++i) + FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); + + // Prepare for parsing clusters of this function name. + // Start a new cluster map for this function name. + FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; + CurrentCluster = 0; + FuncBBIDs.clear(); + } + } + return Error::success(); +} + +bool BBSectionsPrepare::doInitialization(Module &M) { + if (!MBuf) + return false; + if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) + report_fatal_error(std::move(Err)); + return false; +} + +void BBSectionsPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineFunctionPass * +llvm::createBBSectionsPreparePass(const MemoryBuffer *Buf) { + return new BBSectionsPrepare(Buf); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp index 35964b2cdbda..c6d5aa37834f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -129,15 +130,13 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { // HW that requires structurized CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && PassConfig->getEnableTailMerge(); - BranchFolder::MBFIWrapper MBBFreqInfo( + MBFIWrapper MBBFreqInfo( getAnalysis<MachineBlockFrequencyInfo>()); BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, getAnalysis<MachineBranchProbabilityInfo>(), &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI()); - auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); - return Folder.OptimizeFunction( - MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(), - MMIWP ? &MMIWP->getMMI() : nullptr); + return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), + MF.getSubtarget().getRegisterInfo()); } BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, @@ -170,7 +169,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { // Update call site info. std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) { - if (MI.isCall(MachineInstr::IgnoreBundle)) + if (MI.shouldUpdateCallSiteInfo()) MF->eraseCallSiteInfo(&MI); }); // Remove the block. @@ -183,7 +182,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { bool BranchFolder::OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, - MachineModuleInfo *mmi, MachineLoopInfo *mli, bool AfterPlacement) { if (!tii) return false; @@ -193,7 +191,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, AfterBlockPlacement = AfterPlacement; TII = tii; TRI = tri; - MMI = mmi; MLI = mli; this->MRI = &MRI; @@ -201,14 +198,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, if (!UpdateLiveIns) MRI.invalidateLiveness(); - // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; - for (MachineBasicBlock &MBB : MF) { - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector<MachineOperand, 4> Cond; - if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true)) - MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); - } // Recalculate EH scope membership. EHScopeMembership = getEHScopeMembership(MF); @@ -354,6 +344,9 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, MBBI1->isInlineAsm()) { break; } + if (MBBI1->getFlag(MachineInstr::NoMerge) || + MBBI2->getFlag(MachineInstr::NoMerge)) + break; ++TailLen; I1 = MBBI1; I2 = MBBI2; @@ -501,42 +494,6 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { #endif } -BlockFrequency -BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const { - auto I = MergedBBFreq.find(MBB); - - if (I != MergedBBFreq.end()) - return I->second; - - return MBFI.getBlockFreq(MBB); -} - -void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, - BlockFrequency F) { - MergedBBFreq[MBB] = F; -} - -raw_ostream & -BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const { - return MBFI.printBlockFreq(OS, getBlockFreq(MBB)); -} - -raw_ostream & -BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, - const BlockFrequency Freq) const { - return MBFI.printBlockFreq(OS, Freq); -} - -void BranchFolder::MBFIWrapper::view(const Twine &Name, bool isSimple) { - MBFI.view(Name, isSimple); -} - -uint64_t -BranchFolder::MBFIWrapper::getEntryFreq() const { - return MBFI.getEntryFreq(); -} - /// CountTerminators - Count the number of terminators in the given /// block and set I to the position of the first non-terminator, if there /// is one, or MBB->end() otherwise. @@ -591,7 +548,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineBasicBlock *PredBB, DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, bool AfterPlacement, - BranchFolder::MBFIWrapper &MBBFreqInfo, + MBFIWrapper &MBBFreqInfo, ProfileSummaryInfo *PSI) { // It is never profitable to tail-merge blocks from two different EH scopes. if (!EHScopeMembership.empty()) { @@ -691,8 +648,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineFunction *MF = MBB1->getParent(); bool OptForSize = MF->getFunction().hasOptSize() || - (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) && - llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI())); + (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) && + llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo)); return EffectiveTailLen >= 2 && OptForSize && (FullBlockTail1 || FullBlockTail2); } @@ -900,7 +857,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { LiveRegs.clear(); LiveRegs.addLiveOuts(*Pred); MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator(); - for (unsigned Reg : NewLiveIns) { + for (Register Reg : NewLiveIns) { if (!LiveRegs.available(*MRI, Reg)) continue; DebugLoc DL; @@ -1126,8 +1083,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (!UniquePreds.insert(PBB).second) continue; - // Skip blocks which may jump to a landing pad. Can't tail merge these. - if (PBB->hasEHPadSuccessor()) + // Skip blocks which may jump to a landing pad or jump from an asm blob. + // Can't tail merge these. + if (PBB->hasEHPadSuccessor() || PBB->mayHaveInlineAsmBr()) continue; // After block placement, only consider predecessors that belong to the @@ -1373,6 +1331,13 @@ ReoptimizeBlock: SameEHScope = MBBEHScope->second == FallThroughEHScope->second; } + // Analyze the branch in the current block. As a side-effect, this may cause + // the block to become empty. + MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; + SmallVector<MachineOperand, 4> CurCond; + bool CurUnAnalyzable = + TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); + // If this block is empty, make everyone use its fall-through, not the block // explicitly. Landing pads should not do this since the landing-pad table // points to this block. Blocks with their addresses taken shouldn't be @@ -1415,10 +1380,6 @@ ReoptimizeBlock: bool PriorUnAnalyzable = TII->analyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); if (!PriorUnAnalyzable) { - // If the CFG for the prior block has extra edges, remove them. - MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, - !PriorCond.empty()); - // If the previous branch is conditional and both conditions go to the same // destination, remove the branch, replacing it with an unconditional one or // a fall-through. @@ -1439,7 +1400,7 @@ ReoptimizeBlock: // has been used, but it can happen if tail merging splits a fall-through // predecessor of a block. // This has to check PrevBB->succ_size() because EH edges are ignored by - // AnalyzeBranch. + // analyzeBranch. if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && PrevBB.succ_size() == 1 && !MBB->hasAddressTaken() && !MBB->isEHPad()) { @@ -1549,7 +1510,7 @@ ReoptimizeBlock: bool OptForSize = MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI()); + llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo); if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) { // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch // direction, thereby defeating careful block placement and regressing @@ -1586,15 +1547,7 @@ ReoptimizeBlock: } } - // Analyze the branch in the current block. - MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; - SmallVector<MachineOperand, 4> CurCond; - bool CurUnAnalyzable = - TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); if (!CurUnAnalyzable) { - // If the CFG for the prior block has extra edges, remove them. - MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); - // If this is a two-way branch, and the FBB branches to this block, reverse // the condition so the single-basic-block loop is faster. Instead of: // Loop: xxx; jcc Out; jmp Loop @@ -1671,7 +1624,7 @@ ReoptimizeBlock: PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); // If this change resulted in PMBB ending in a conditional // branch where both conditions go to the same destination, - // change this to an unconditional branch (and fix the CFG). + // change this to an unconditional branch. MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr; SmallVector<MachineOperand, 4> NewCurCond; bool NewCurUnAnalyzable = TII->analyzeBranch( @@ -1683,7 +1636,6 @@ ReoptimizeBlock: TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; - PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false); } } } @@ -1714,13 +1666,15 @@ ReoptimizeBlock: if (!MBB->isEHPad()) { // Check all the predecessors of this block. If one of them has no fall - // throughs, move this block right after it. + // throughs, and analyzeBranch thinks it _could_ fallthrough to this + // block, move this block right after it. for (MachineBasicBlock *PredBB : MBB->predecessors()) { // Analyze the branch at the end of the pred. MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; if (PredBB != MBB && !PredBB->canFallThrough() && !TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) && + (PredTBB == MBB || PredFBB == MBB) && (!CurFallsThru || !CurTBB || !CurFBB) && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { // If the current block doesn't fall through, just move it. @@ -1746,21 +1700,24 @@ ReoptimizeBlock: } if (!CurFallsThru) { - // Check all successors to see if we can move this block before it. - for (MachineBasicBlock *SuccBB : MBB->successors()) { - // Analyze the branch at the end of the block before the succ. - MachineFunction::iterator SuccPrev = --SuccBB->getIterator(); - - // If this block doesn't already fall-through to that successor, and if - // the succ doesn't already have a block that can fall through into it, - // and if the successor isn't an EH destination, we can arrange for the - // fallthrough to happen. - if (SuccBB != MBB && &*SuccPrev != MBB && - !SuccPrev->canFallThrough() && !CurUnAnalyzable && - !SuccBB->isEHPad()) { - MBB->moveBefore(SuccBB); - MadeChange = true; - goto ReoptimizeBlock; + // Check analyzable branch-successors to see if we can move this block + // before one. + if (!CurUnAnalyzable) { + for (MachineBasicBlock *SuccBB : {CurFBB, CurTBB}) { + if (!SuccBB) + continue; + // Analyze the branch at the end of the block before the succ. + MachineFunction::iterator SuccPrev = --SuccBB->getIterator(); + + // If this block doesn't already fall-through to that successor, and + // if the succ doesn't already have a block that can fall through into + // it, we can arrange for the fallthrough to happen. + if (SuccBB != MBB && &*SuccPrev != MBB && + !SuccPrev->canFallThrough()) { + MBB->moveBefore(SuccBB); + MadeChange = true; + goto ReoptimizeBlock; + } } } @@ -1819,9 +1776,9 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, } template <class Container> -static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, +static void addRegAndItsAliases(Register Reg, const TargetRegisterInfo *TRI, Container &Set) { - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Set.insert(*AI); } else { @@ -1840,8 +1797,8 @@ static MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, - SmallSet<unsigned,4> &Uses, - SmallSet<unsigned,4> &Defs) { + SmallSet<Register, 4> &Uses, + SmallSet<Register, 4> &Defs) { MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); if (!TII->isUnpredicatedTerminator(*Loc)) return MBB->end(); @@ -1877,8 +1834,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // The terminator is probably a conditional branch, try not to separate the // branch from condition setting instruction. - MachineBasicBlock::iterator PI = - skipDebugInstructionsBackward(std::prev(Loc), MBB->begin()); + MachineBasicBlock::iterator PI = prev_nodbg(Loc, MBB->begin()); bool IsDef = false; for (const MachineOperand &MO : PI->operands()) { @@ -1953,14 +1909,14 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { // Find a suitable position to hoist the common instructions to. Also figure // out which registers are used or defined by instructions from the insertion // point to the end of the block. - SmallSet<unsigned, 4> Uses, Defs; + SmallSet<Register, 4> Uses, Defs; MachineBasicBlock::iterator Loc = findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs); if (Loc == MBB->end()) return false; bool HasDups = false; - SmallSet<unsigned, 4> ActiveDefsSet, AllDefsSet; + SmallSet<Register, 4> ActiveDefsSet, AllDefsSet; MachineBasicBlock::iterator TIB = TBB->begin(); MachineBasicBlock::iterator FIB = FBB->begin(); MachineBasicBlock::iterator TIE = TBB->end(); @@ -2044,7 +2000,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!AllDefsSet.count(Reg)) { continue; } - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) ActiveDefsSet.erase(*AI); } else { @@ -2057,7 +2013,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; Register Reg = MO.getReg(); - if (!Reg || Register::isVirtualRegister(Reg)) + if (!Reg || Reg.isVirtual()) continue; addRegAndItsAliases(Reg, TRI, ActiveDefsSet); addRegAndItsAliases(Reg, TRI, AllDefsSet); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h index 7a4c68ea09f5..49c6bcae2db4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h @@ -13,7 +13,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/Compiler.h" #include <cstdint> #include <vector> @@ -21,21 +20,18 @@ namespace llvm { class BasicBlock; -class MachineBlockFrequencyInfo; class MachineBranchProbabilityInfo; class MachineFunction; class MachineLoopInfo; class MachineModuleInfo; class MachineRegisterInfo; +class MBFIWrapper; class ProfileSummaryInfo; -class raw_ostream; class TargetInstrInfo; class TargetRegisterInfo; class LLVM_LIBRARY_VISIBILITY BranchFolder { public: - class MBFIWrapper; - explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, MBFIWrapper &FreqInfo, @@ -49,7 +45,7 @@ class TargetRegisterInfo; /// given function. Block placement changes the layout and may create new /// tail merging opportunities. bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, - const TargetRegisterInfo *tri, MachineModuleInfo *mmi, + const TargetRegisterInfo *tri, MachineLoopInfo *mli = nullptr, bool AfterPlacement = false); @@ -128,32 +124,9 @@ class TargetRegisterInfo; const TargetInstrInfo *TII; const MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; - MachineModuleInfo *MMI; MachineLoopInfo *MLI; LivePhysRegs LiveRegs; - public: - /// This class keeps track of branch frequencies of newly created - /// blocks and tail-merged blocks. - class MBFIWrapper { - public: - MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} - - BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; - void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); - raw_ostream &printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const; - raw_ostream &printBlockFreq(raw_ostream &OS, - const BlockFrequency Freq) const; - void view(const Twine &Name, bool isSimple = true); - uint64_t getEntryFreq() const; - const MachineBlockFrequencyInfo &getMBFI() { return MBFI; } - - private: - const MachineBlockFrequencyInfo &MBFI; - DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq; - }; - private: MBFIWrapper &MBBFreqInfo; const MachineBranchProbabilityInfo &MBPI; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp index f05517d178ae..5a3ec1a36f96 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -67,16 +67,13 @@ class BranchRelaxation : public MachineFunctionPass { unsigned postOffset(const MachineBasicBlock &MBB) const { const unsigned PO = Offset + Size; const Align Alignment = MBB.getAlignment(); - if (Alignment == 1) - return PO; - const Align ParentAlign = MBB.getParent()->getAlignment(); if (Alignment <= ParentAlign) - return PO + offsetToAlignment(PO, Alignment); + return alignTo(PO, Alignment); // The alignment of this MBB is larger than the function's alignment, so we // can't tell whether or not it will insert nops. Assume that it will. - return PO + Alignment.value() + offsetToAlignment(PO, Alignment); + return alignTo(PO, Alignment) + Alignment.value() - ParentAlign.value(); } }; @@ -129,7 +126,6 @@ void BranchRelaxation::verify() { unsigned PrevNum = MF->begin()->getNumber(); for (MachineBasicBlock &MBB : *MF) { const unsigned Num = MBB.getNumber(); - assert(isAligned(MBB.getAlignment(), BlockInfo[Num].Offset)); assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset); assert(BlockInfo[Num].Size == computeBlockSize(MBB)); PrevNum = Num; @@ -195,10 +191,9 @@ unsigned BranchRelaxation::getInstrOffset(const MachineInstr &MI) const { void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) { unsigned PrevNum = Start.getNumber(); - for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) { + for (auto &MBB : + make_range(std::next(MachineFunction::iterator(Start)), MF->end())) { unsigned Num = MBB.getNumber(); - if (!Num) // block zero is never changed from offset zero. - continue; // Get the offset and known bits at the end of the layout predecessor. // Include the alignment of the current block. BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(MBB); @@ -250,8 +245,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, // Cleanup potential unconditional branch to successor block. // Note that updateTerminator may change the size of the blocks. - NewBB->updateTerminator(); - OrigBB->updateTerminator(); + OrigBB->updateTerminator(NewBB); // Figure out how large the OrigBB is. As the first half of the original // block, it cannot contain a tablejump. The size includes diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp index 9bae9d36add1..b01a264dd97d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -106,9 +106,18 @@ FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); } bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { + + // We can't change tied operands. + if (MI->isRegTiedToDefOperand(OpIdx)) + return false; + MachineOperand &MO = MI->getOperand(OpIdx); assert(MO.isUndef() && "Expected undef machine operand"); + // We can't change registers that aren't renamable. + if (!MO.isRenamable()) + return false; + Register OriginalReg = MO.getReg(); // Update only undef operands that have reg units that are mapped to one root. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp index ef548c84d3c0..23c7fea01f28 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -18,6 +18,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -76,15 +78,32 @@ class CFIInstrInserter : public MachineFunctionPass { unsigned IncomingCFARegister = 0; /// Value of cfa register valid at basic block exit. unsigned OutgoingCFARegister = 0; + /// Set of callee saved registers saved at basic block entry. + BitVector IncomingCSRSaved; + /// Set of callee saved registers saved at basic block exit. + BitVector OutgoingCSRSaved; /// If in/out cfa offset and register values for this block have already /// been set or not. bool Processed = false; }; +#define INVALID_REG UINT_MAX +#define INVALID_OFFSET INT_MAX + /// contains the location where CSR register is saved. + struct CSRSavedLocation { + CSRSavedLocation(Optional<unsigned> R, Optional<int> O) + : Reg(R), Offset(O) {} + Optional<unsigned> Reg; + Optional<int> Offset; + }; + /// Contains cfa offset and register values valid at entry and exit of basic /// blocks. std::vector<MBBCFAInfo> MBBVector; + /// Map the callee save registers to the locations where they are saved. + SmallDenseMap<unsigned, CSRSavedLocation, 16> CSRLocMap; + /// Calculate cfa offset and register values valid at entry and exit for all /// basic blocks in a function. void calculateCFAInfo(MachineFunction &MF); @@ -105,10 +124,11 @@ class CFIInstrInserter : public MachineFunctionPass { /// if needed. The negated value is needed when creating CFI instructions that /// set absolute offset. int getCorrectCFAOffset(MachineBasicBlock *MBB) { - return -MBBVector[MBB->getNumber()].IncomingCFAOffset; + return MBBVector[MBB->getNumber()].IncomingCFAOffset; } - void report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ); + void reportCFAError(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ); + void reportCSRError(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ); /// Go through each MBB in a function and check that outgoing offset and /// register of its predecessors match incoming offset and register of that /// MBB, as well as that incoming offset and register of its successors match @@ -132,6 +152,8 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { // function. unsigned InitialRegister = MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + unsigned NumRegs = TRI.getNumRegs(); // Initialize MBBMap. for (MachineBasicBlock &MBB : MF) { @@ -141,17 +163,17 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { MBBInfo.OutgoingCFAOffset = InitialOffset; MBBInfo.IncomingCFARegister = InitialRegister; MBBInfo.OutgoingCFARegister = InitialRegister; + MBBInfo.IncomingCSRSaved.resize(NumRegs); + MBBInfo.OutgoingCSRSaved.resize(NumRegs); MBBVector[MBB.getNumber()] = MBBInfo; } + CSRLocMap.clear(); // Set in/out cfa info for all blocks in the function. This traversal is based // on the assumption that the first block in the function is the entry block // i.e. that it has initial cfa offset and register values as incoming CFA // information. - for (MachineBasicBlock &MBB : MF) { - if (MBBVector[MBB.getNumber()].Processed) continue; - updateSuccCFAInfo(MBBVector[MBB.getNumber()]); - } + updateSuccCFAInfo(MBBVector[MF.front().getNumber()]); } void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { @@ -159,12 +181,17 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { int SetOffset = MBBInfo.IncomingCFAOffset; // Outgoing cfa register set by the block. unsigned SetRegister = MBBInfo.IncomingCFARegister; - const std::vector<MCCFIInstruction> &Instrs = - MBBInfo.MBB->getParent()->getFrameInstructions(); + MachineFunction *MF = MBBInfo.MBB->getParent(); + const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions(); + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + unsigned NumRegs = TRI.getNumRegs(); + BitVector CSRSaved(NumRegs), CSRRestored(NumRegs); // Determine cfa offset and register set by the block. for (MachineInstr &MI : *MBBInfo.MBB) { if (MI.isCFIInstruction()) { + Optional<unsigned> CSRReg; + Optional<int> CSROffset; unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); const MCCFIInstruction &CFI = Instrs[CFIIndex]; switch (CFI.getOperation()) { @@ -181,6 +208,18 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { SetRegister = CFI.getRegister(); SetOffset = CFI.getOffset(); break; + case MCCFIInstruction::OpOffset: + CSROffset = CFI.getOffset(); + break; + case MCCFIInstruction::OpRegister: + CSRReg = CFI.getRegister2(); + break; + case MCCFIInstruction::OpRelOffset: + CSROffset = CFI.getOffset() - SetOffset; + break; + case MCCFIInstruction::OpRestore: + CSRRestored.set(CFI.getRegister()); + break; case MCCFIInstruction::OpRememberState: // TODO: Add support for handling cfi_remember_state. #ifndef NDEBUG @@ -198,18 +237,24 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { #endif break; // Other CFI directives do not affect CFA value. + case MCCFIInstruction::OpUndefined: case MCCFIInstruction::OpSameValue: - case MCCFIInstruction::OpOffset: - case MCCFIInstruction::OpRelOffset: case MCCFIInstruction::OpEscape: - case MCCFIInstruction::OpRestore: - case MCCFIInstruction::OpUndefined: - case MCCFIInstruction::OpRegister: case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpGnuArgsSize: break; } + if (CSRReg || CSROffset) { + auto It = CSRLocMap.find(CFI.getRegister()); + if (It == CSRLocMap.end()) { + CSRLocMap.insert( + {CFI.getRegister(), CSRSavedLocation(CSRReg, CSROffset)}); + } else if (It->second.Reg != CSRReg || It->second.Offset != CSROffset) { + llvm_unreachable("Different saved locations for the same CSR"); + } + CSRSaved.set(CFI.getRegister()); + } } } @@ -218,6 +263,11 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { // Update outgoing CFA info. MBBInfo.OutgoingCFAOffset = SetOffset; MBBInfo.OutgoingCFARegister = SetRegister; + + // Update outgoing CSR info. + MBBInfo.OutgoingCSRSaved = MBBInfo.IncomingCSRSaved; + MBBInfo.OutgoingCSRSaved |= CSRSaved; + MBBInfo.OutgoingCSRSaved.reset(CSRRestored); } void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) { @@ -227,15 +277,13 @@ void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) { do { MachineBasicBlock *Current = Stack.pop_back_val(); MBBCFAInfo &CurrentInfo = MBBVector[Current->getNumber()]; - if (CurrentInfo.Processed) - continue; - calculateOutgoingCFAInfo(CurrentInfo); for (auto *Succ : CurrentInfo.MBB->successors()) { MBBCFAInfo &SuccInfo = MBBVector[Succ->getNumber()]; if (!SuccInfo.Processed) { SuccInfo.IncomingCFAOffset = CurrentInfo.OutgoingCFAOffset; SuccInfo.IncomingCFARegister = CurrentInfo.OutgoingCFARegister; + SuccInfo.IncomingCSRSaved = CurrentInfo.OutgoingCSRSaved; Stack.push_back(Succ); } } @@ -255,29 +303,31 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { auto MBBI = MBBInfo.MBB->begin(); DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI); - if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) { + // If the current MBB will be placed in a unique section, a full DefCfa + // must be emitted. + const bool ForceFullCFA = MBB.isBeginSection(); + + if ((PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset && + PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) || + ForceFullCFA) { // If both outgoing offset and register of a previous block don't match - // incoming offset and register of this block, add a def_cfa instruction - // with the correct offset and register for this block. - if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( - nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB))); - BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - // If outgoing offset of a previous block doesn't match incoming offset - // of this block, add a def_cfa_offset instruction with the correct - // offset for this block. - } else { - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createDefCfaOffset( - nullptr, getCorrectCFAOffset(&MBB))); - BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } + // incoming offset and register of this block, or if this block begins a + // section, add a def_cfa instruction with the correct offset and + // register for this block. + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + InsertedCFIInstr = true; + } else if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) { + // If outgoing offset of a previous block doesn't match incoming offset + // of this block, add a def_cfa_offset instruction with the correct + // offset for this block. + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset( + nullptr, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); InsertedCFIInstr = true; - // If outgoing register of a previous block doesn't match incoming - // register of this block, add a def_cfa_register instruction with the - // correct register for this block. } else if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { unsigned CFIIndex = @@ -287,12 +337,53 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { .addCFIIndex(CFIIndex); InsertedCFIInstr = true; } + + if (ForceFullCFA) { + MF.getSubtarget().getFrameLowering()->emitCalleeSavedFrameMoves( + *MBBInfo.MBB, MBBI); + InsertedCFIInstr = true; + PrevMBBInfo = &MBBInfo; + continue; + } + + BitVector SetDifference = PrevMBBInfo->OutgoingCSRSaved; + SetDifference.reset(MBBInfo.IncomingCSRSaved); + for (int Reg : SetDifference.set_bits()) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg)); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + InsertedCFIInstr = true; + } + + SetDifference = MBBInfo.IncomingCSRSaved; + SetDifference.reset(PrevMBBInfo->OutgoingCSRSaved); + for (int Reg : SetDifference.set_bits()) { + auto it = CSRLocMap.find(Reg); + assert(it != CSRLocMap.end() && "Reg should have an entry in CSRLocMap"); + unsigned CFIIndex; + CSRSavedLocation RO = it->second; + if (!RO.Reg && RO.Offset) { + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, *RO.Offset)); + } else if (RO.Reg && !RO.Offset) { + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createRegister(nullptr, Reg, *RO.Reg)); + } else { + llvm_unreachable("RO.Reg and RO.Offset cannot both be valid/invalid"); + } + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + InsertedCFIInstr = true; + } + PrevMBBInfo = &MBBInfo; } return InsertedCFIInstr; } -void CFIInstrInserter::report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ) { +void CFIInstrInserter::reportCFAError(const MBBCFAInfo &Pred, + const MBBCFAInfo &Succ) { errs() << "*** Inconsistent CFA register and/or offset between pred and succ " "***\n"; errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber() @@ -307,6 +398,22 @@ void CFIInstrInserter::report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ) { << " incoming CFA Offset:" << Succ.IncomingCFAOffset << "\n"; } +void CFIInstrInserter::reportCSRError(const MBBCFAInfo &Pred, + const MBBCFAInfo &Succ) { + errs() << "*** Inconsistent CSR Saved between pred and succ in function " + << Pred.MBB->getParent()->getName() << " ***\n"; + errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber() + << " outgoing CSR Saved: "; + for (int Reg : Pred.OutgoingCSRSaved.set_bits()) + errs() << Reg << " "; + errs() << "\n"; + errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber() + << " incoming CSR Saved: "; + for (int Reg : Succ.IncomingCSRSaved.set_bits()) + errs() << Reg << " "; + errs() << "\n"; +} + unsigned CFIInstrInserter::verify(MachineFunction &MF) { unsigned ErrorNum = 0; for (auto *CurrMBB : depth_first(&MF)) { @@ -321,7 +428,13 @@ unsigned CFIInstrInserter::verify(MachineFunction &MF) { // we don't generate epilogues inside such blocks. if (SuccMBBInfo.MBB->succ_empty() && !SuccMBBInfo.MBB->isReturnBlock()) continue; - report(CurrMBBInfo, SuccMBBInfo); + reportCFAError(CurrMBBInfo, SuccMBBInfo); + ErrorNum++; + } + // Check that IncomingCSRSaved of every successor matches the + // OutgoingCSRSaved of CurrMBB + if (SuccMBBInfo.IncomingCSRSaved != CurrMBBInfo.OutgoingCSRSaved) { + reportCSRError(CurrMBBInfo, SuccMBBInfo); ErrorNum++; } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp index bf97aaee3665..5d6ee09c8438 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -203,9 +203,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, }; std::set<CopyHint> CopyHints; - for (MachineRegisterInfo::reg_instr_iterator - I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); - I != E; ) { + for (MachineRegisterInfo::reg_instr_nodbg_iterator + I = mri.reg_instr_nodbg_begin(li.reg), + E = mri.reg_instr_nodbg_end(); + I != E;) { MachineInstr *mi = &*(I++); // For local split artifacts, we are interested only in instructions between @@ -215,7 +216,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, continue; numInstr++; - if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugInstr()) + if (mi->isIdentityCopy() || mi->isImplicitDef()) continue; if (!visited.insert(mi).second) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp index a397039180a4..3d8c2c8b00aa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp @@ -42,29 +42,27 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, /// its parameter attribute. void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, int MinSize, - int MinAlignment, ISD::ArgFlagsTy ArgFlags) { - Align MinAlign(MinAlignment); - Align Alignment(ArgFlags.getByValAlign()); + Align MinAlign, ISD::ArgFlagsTy ArgFlags) { + Align Alignment = ArgFlags.getNonZeroByValAlign(); unsigned Size = ArgFlags.getByValSize(); if (MinSize > (int)Size) Size = MinSize; if (MinAlign > Alignment) Alignment = MinAlign; ensureMaxAlignment(Alignment); - MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, - Alignment.value()); + MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Alignment); Size = unsigned(alignTo(Size, MinAlign)); - unsigned Offset = AllocateStack(Size, Alignment.value()); + unsigned Offset = AllocateStack(Size, Alignment); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } /// Mark a register and all of its aliases as allocated. -void CCState::MarkAllocated(unsigned Reg) { +void CCState::MarkAllocated(MCPhysReg Reg) { for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) - UsedRegs[*AI/32] |= 1 << (*AI&31); + UsedRegs[*AI / 32] |= 1 << (*AI & 31); } -bool CCState::IsShadowAllocatedReg(unsigned Reg) const { +bool CCState::IsShadowAllocatedReg(MCRegister Reg) const { if (!isAllocated(Reg)) return false; @@ -276,18 +274,14 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC, for (unsigned I = 0, E = RVLocs1.size(); I != E; ++I) { const CCValAssign &Loc1 = RVLocs1[I]; const CCValAssign &Loc2 = RVLocs2[I]; - if (Loc1.getLocInfo() != Loc2.getLocInfo()) - return false; - bool RegLoc1 = Loc1.isRegLoc(); - if (RegLoc1 != Loc2.isRegLoc()) + + if ( // Must both be in registers, or both in memory + Loc1.isRegLoc() != Loc2.isRegLoc() || + // Must fill the same part of their locations + Loc1.getLocInfo() != Loc2.getLocInfo() || + // Memory offset/register number must be the same + Loc1.getExtraInfo() != Loc2.getExtraInfo()) return false; - if (RegLoc1) { - if (Loc1.getLocReg() != Loc2.getLocReg()) - return false; - } else { - if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset()) - return false; - } } return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp index 20fc67cc66ae..7a8c022c82da 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp @@ -20,12 +20,14 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); + initializeBBSectionsPreparePass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCFGuardLongjmpPass(Registry); initializeCFIInstrInserterPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeDebugifyMachineModulePass(Registry); initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); @@ -37,6 +39,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeFEntryInserterPass(Registry); initializeFinalizeISelPass(Registry); initializeFinalizeMachineBundlesPass(Registry); + initializeFixupStatepointCallerSavedPass(Registry); initializeFuncletLayoutPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); @@ -97,11 +100,13 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeSafeStackLegacyPassPass(Registry); initializeScalarizeMaskedMemIntrinPass(Registry); initializeShrinkWrapPass(Registry); + initializeSjLjEHPreparePass(Registry); initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); initializeStackMapLivenessPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); + initializeStripDebugMachineModulePass(Registry); initializeTailDuplicatePass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index 7d77664fbf69..e8b8e6c93cf0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -43,7 +43,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -61,7 +60,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" -#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" @@ -178,6 +176,17 @@ static cl::opt<bool> ProfileGuidedSectionPrefix( "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Use profile info to add section prefix for hot/cold functions")); +static cl::opt<bool> ProfileUnknownInSpecialSection( + "profile-unknown-in-special-section", cl::Hidden, cl::init(false), + cl::ZeroOrMore, + cl::desc("In profiling mode like sampleFDO, if a function doesn't have " + "profile, we cannot tell the function is cold for sure because " + "it may be a function newly added without ever being sampled. " + "With the flag enabled, compiler can put such profile unknown " + "functions into a special section, so runtime system can choose " + "to handle it in a different way than .text section, to save " + "RAM for example. ")); + static cl::opt<unsigned> FreqRatioToSkipMerge( "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " @@ -230,6 +239,15 @@ static cl::opt<bool> EnableICMP_EQToICMP_ST( "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion.")); +static cl::opt<bool> + VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), + cl::desc("Enable BFI update verification for " + "CodeGenPrepare.")); + +static cl::opt<bool> OptimizePhiTypes( + "cgp-optimize-phi-types", cl::Hidden, cl::init(false), + cl::desc("Enable converting phi types in CodeGenPrepare")); + namespace { enum ExtType { @@ -327,6 +345,7 @@ class TypePromotionTransaction; // FIXME: When we can selectively preserve passes, preserve the domtree. AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetPassConfig>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); } @@ -368,12 +387,14 @@ class TypePromotionTransaction; bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace); + bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr); bool optimizeInlineAsmInst(CallInst *CS); bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); bool optimizeLoadExt(LoadInst *Load); bool optimizeShiftInst(BinaryOperator *BO); + bool optimizeFunnelShift(IntrinsicInst *Fsh); bool optimizeSelectInst(SelectInst *SI); bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); bool optimizeSwitchInst(SwitchInst *SI); @@ -389,20 +410,25 @@ class TypePromotionTransaction; unsigned CreatedInstsCost = 0); bool mergeSExts(Function &F); bool splitLargeGEPOffsets(); + bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited, + SmallPtrSetImpl<Instruction *> &DeletedInstrs); + bool optimizePhiTypes(Function &F); bool performAddressTypePromotion( Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, bool HasPromoted, TypePromotionTransaction &TPT, SmallVectorImpl<Instruction *> &SpeculativelyMovedExts); bool splitBranchCondition(Function &F, bool &ModifiedDT); - bool simplifyOffsetableRelocate(Instruction &I); + bool simplifyOffsetableRelocate(GCStatepointInst &I); bool tryToSinkFreeOperands(Instruction *I); - bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, + bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, + Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID); bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT); bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT); bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT); + void verifyBFIUpdates(Function &F); }; } // end anonymous namespace @@ -428,12 +454,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) { InsertedInsts.clear(); PromotedInsts.clear(); - if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { - TM = &TPC->getTM<TargetMachine>(); - SubtargetInfo = TM->getSubtargetImpl(F); - TLI = SubtargetInfo->getTargetLowering(); - TRI = SubtargetInfo->getRegisterInfo(); - } + TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); + SubtargetInfo = TM->getSubtargetImpl(F); + TLI = SubtargetInfo->getTargetLowering(); + TRI = SubtargetInfo->getRegisterInfo(); TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); @@ -446,14 +470,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) { F.setSectionPrefix(".hot"); else if (PSI->isFunctionColdInCallGraph(&F, *BFI)) F.setSectionPrefix(".unlikely"); + else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() && + PSI->isFunctionHotnessUnknown(F)) + F.setSectionPrefix(".unknown"); } /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. - if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI && - TLI->isSlowDivBypassed()) { + if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) { const DenseMap<unsigned int, unsigned int> &BypassWidths = - TLI->getBypassSlowDivWidths(); + TLI->getBypassSlowDivWidths(); BasicBlock* BB = &*F.begin(); while (BB != nullptr) { // bypassSlowDivision may create new BBs, but we don't want to reapply the @@ -495,6 +521,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) { MadeChange |= mergeSExts(F); if (!LargeOffsetGEPMap.empty()) MadeChange |= splitLargeGEPOffsets(); + MadeChange |= optimizePhiTypes(F); + + if (MadeChange) + eliminateFallThrough(F); // Really free removed instructions during promotion. for (Instruction *I : RemovedInsts) @@ -550,11 +580,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) { } if (!DisableGCOpts) { - SmallVector<Instruction *, 2> Statepoints; + SmallVector<GCStatepointInst *, 2> Statepoints; for (BasicBlock &BB : F) for (Instruction &I : BB) - if (isStatepoint(I)) - Statepoints.push_back(&I); + if (auto *SP = dyn_cast<GCStatepointInst>(&I)) + Statepoints.push_back(SP); for (auto &I : Statepoints) EverMadeChange |= simplifyOffsetableRelocate(*I); } @@ -563,9 +593,23 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // preparatory transforms. EverMadeChange |= placeDbgValues(F); +#ifndef NDEBUG + if (VerifyBFIUpdates) + verifyBFIUpdates(F); +#endif + return EverMadeChange; } +// Verify BFI has been updated correctly by recomputing BFI and comparing them. +void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) { + DominatorTree NewDT(F); + LoopInfo NewLI(NewDT); + BranchProbabilityInfo NewBPI(F, NewLI, TLInfo); + BlockFrequencyInfo NewBFI(F, NewBPI, NewLI); + NewBFI.verifyMatch(*BFI); +} + /// Merge basic blocks which are connected by a single edge, where one of the /// basic blocks has a single successor pointing to the other basic block, /// which has a single predecessor. @@ -749,7 +793,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, BlockFrequency PredFreq = BFI->getBlockFreq(Pred); BlockFrequency BBFreq = BFI->getBlockFreq(BB); - for (auto SameValueBB : SameIncomingValueBBs) + for (auto *SameValueBB : SameIncomingValueBBs) if (SameValueBB->getUniquePredecessor() == Pred && DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB)) BBFreq += BFI->getBlockFreq(SameValueBB); @@ -925,7 +969,7 @@ static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl<Value *> &OffsetV) { for (unsigned i = 1; i < GEP->getNumOperands(); i++) { // Only accept small constant integer operands - auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i)); + auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (!Op || Op->getZExtValue() > 20) return false; } @@ -949,7 +993,7 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, // be skipped by optimization and we do not care about them. for (auto R = RelocatedBase->getParent()->getFirstInsertionPt(); &*R != RelocatedBase; ++R) - if (auto RI = dyn_cast<GCRelocateInst>(R)) + if (auto *RI = dyn_cast<GCRelocateInst>(R)) if (RI->getStatepoint() == RelocatedBase->getStatepoint()) if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) { RelocatedBase->moveBefore(RI); @@ -973,7 +1017,7 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, } Value *Base = ToReplace->getBasePtr(); - auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr()); + auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr()); if (!Derived || Derived->getPointerOperand() != Base) continue; @@ -1050,10 +1094,9 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, // %base' = gc.relocate(%tok, i32 4, i32 4) // %ptr' = gep %base' + 15 // %val = load %ptr' -bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { +bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) { bool MadeChange = false; SmallVector<GCRelocateInst *, 2> AllRelocateCalls; - for (auto *U : I.users()) if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U)) // Collect all the relocate calls associated with a statepoint @@ -1187,6 +1230,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, } bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, + Value *Arg0, Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID) { if (BO->getParent() != Cmp->getParent()) { @@ -1204,8 +1248,6 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, } // We allow matching the canonical IR (add X, C) back to (usubo X, -C). - Value *Arg0 = BO->getOperand(0); - Value *Arg1 = BO->getOperand(1); if (BO->getOpcode() == Instruction::Add && IID == Intrinsic::usub_with_overflow) { assert(isa<Constant>(Arg1) && "Unexpected input for usubo"); @@ -1215,7 +1257,9 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, // Insert at the first instruction of the pair. Instruction *InsertPt = nullptr; for (Instruction &Iter : *Cmp->getParent()) { - if (&Iter == BO || &Iter == Cmp) { + // If BO is an XOR, it is not guaranteed that it comes after both inputs to + // the overflow intrinsic are defined. + if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) { InsertPt = &Iter; break; } @@ -1224,12 +1268,16 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, IRBuilder<> Builder(InsertPt); Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); - Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); + if (BO->getOpcode() != Instruction::Xor) { + Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); + BO->replaceAllUsesWith(Math); + } else + assert(BO->hasOneUse() && + "Patterns with XOr should use the BO only in the compare"); Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); - BO->replaceAllUsesWith(Math); Cmp->replaceAllUsesWith(OV); - BO->eraseFromParent(); Cmp->eraseFromParent(); + BO->eraseFromParent(); return true; } @@ -1269,12 +1317,17 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT) { Value *A, *B; BinaryOperator *Add; - if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) + if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) { if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) return false; + // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases. + A = Add->getOperand(0); + B = Add->getOperand(1); + } if (!TLI->shouldFormOverflowOp(ISD::UADDO, - TLI->getValueType(*DL, Add->getType()))) + TLI->getValueType(*DL, Add->getType()), + Add->hasNUsesOrMore(2))) return false; // We don't want to move around uses of condition values this late, so we @@ -1283,7 +1336,8 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) return false; - if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow)) + if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp, + Intrinsic::uadd_with_overflow)) return false; // Reset callers - do not crash by iterating over a dead instruction. @@ -1341,10 +1395,12 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, return false; if (!TLI->shouldFormOverflowOp(ISD::USUBO, - TLI->getValueType(*DL, Sub->getType()))) + TLI->getValueType(*DL, Sub->getType()), + Sub->hasNUsesOrMore(2))) return false; - if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow)) + if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1), + Cmp, Intrinsic::usub_with_overflow)) return false; // Reset callers - do not crash by iterating over a dead instruction. @@ -1813,9 +1869,6 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, const TargetLowering *TLI, const DataLayout *DL, bool &ModifiedDT) { - if (!TLI || !DL) - return false; - // If a zero input is undefined, it doesn't make sense to despeculate that. if (match(CountZeros->getOperand(1), m_One())) return false; @@ -1877,7 +1930,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // Lower inline assembly if we can. // If we found an inline asm expession, and if the target knows how to // lower it to normal LLVM code, do so now. - if (TLI && isa<InlineAsm>(CI->getCalledValue())) { + if (CI->isInlineAsm()) { if (TLI->ExpandInlineAsm(CI)) { // Avoid invalidating the iterator. CurInstIterator = BB->begin(); @@ -1894,7 +1947,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // Align the pointer arguments to this call if the target thinks it's a good // idea unsigned MinSize, PrefAlign; - if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { + if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { for (auto &Arg : CI->arg_operands()) { // We want to align both objects whose address is used directly and // objects whose address is used in casts and GEPs, though it only makes @@ -1912,7 +1965,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { AllocaInst *AI; if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign && DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) - AI->setAlignment(MaybeAlign(PrefAlign)); + AI->setAlignment(Align(PrefAlign)); // Global variables can only be aligned if they are defined in this // object (i.e. they are uniquely initialized in this object), and // over-aligning global variables that have an explicit section is @@ -1927,12 +1980,14 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // If this is a memcpy (or similar) then we may be able to improve the // alignment if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { - unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL); - if (DestAlign > MI->getDestAlignment()) + Align DestAlign = getKnownAlignment(MI->getDest(), *DL); + MaybeAlign MIDestAlign = MI->getDestAlign(); + if (!MIDestAlign || DestAlign > *MIDestAlign) MI->setDestAlignment(DestAlign); if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { - unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL); - if (SrcAlign > MTI->getSourceAlignment()) + MaybeAlign MTISrcAlign = MTI->getSourceAlign(); + Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL); + if (!MTISrcAlign || SrcAlign > *MTISrcAlign) MTI->setSourceAlignment(SrcAlign); } } @@ -1942,8 +1997,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // cold block. This interacts with our handling for loads and stores to // ensure that we can fold all uses of a potential addressing computation // into their uses. TODO: generalize this to work over profiling data - bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get()); - if (!OptForSize && CI->hasFnAttr(Attribute::Cold)) + if (CI->hasFnAttr(Attribute::Cold) && + !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) for (auto &Arg : CI->arg_operands()) { if (!Arg->getType()->isPointerTy()) continue; @@ -1955,10 +2010,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { if (II) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::assume: { + II->eraseFromParent(); + return true; + } + case Intrinsic::experimental_widenable_condition: { // Give up on future widening oppurtunties so that we can fold away dead // paths and merge blocks before going into block-local instruction - // selection. + // selection. if (II->use_empty()) { II->eraseFromParent(); return true; @@ -2008,21 +2068,43 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { case Intrinsic::ctlz: // If counting zeros is expensive, try to avoid it. return despeculateCountZeros(II, TLI, DL, ModifiedDT); + case Intrinsic::fshl: + case Intrinsic::fshr: + return optimizeFunnelShift(II); case Intrinsic::dbg_value: return fixupDbgValue(II); + case Intrinsic::vscale: { + // If datalayout has no special restrictions on vector data layout, + // replace `llvm.vscale` by an equivalent constant expression + // to benefit from cheap constant propagation. + Type *ScalableVectorTy = + VectorType::get(Type::getInt8Ty(II->getContext()), 1, true); + if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinSize() == 8) { + auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo()); + auto *One = ConstantInt::getSigned(II->getType(), 1); + auto *CGep = + ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One); + II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType())); + II->eraseFromParent(); + return true; + } + break; } - - if (TLI) { - SmallVector<Value*, 2> PtrOps; - Type *AccessTy; - if (TLI->getAddrModeArguments(II, PtrOps, AccessTy)) - while (!PtrOps.empty()) { - Value *PtrVal = PtrOps.pop_back_val(); - unsigned AS = PtrVal->getType()->getPointerAddressSpace(); - if (optimizeMemoryInst(II, PtrVal, AccessTy, AS)) - return true; - } + case Intrinsic::masked_gather: + return optimizeGatherScatterInst(II, II->getArgOperand(0)); + case Intrinsic::masked_scatter: + return optimizeGatherScatterInst(II, II->getArgOperand(1)); } + + SmallVector<Value *, 2> PtrOps; + Type *AccessTy; + if (TLI->getAddrModeArguments(II, PtrOps, AccessTy)) + while (!PtrOps.empty()) { + Value *PtrVal = PtrOps.pop_back_val(); + unsigned AS = PtrVal->getType()->getPointerAddressSpace(); + if (optimizeMemoryInst(II, PtrVal, AccessTy, AS)) + return true; + } } // From here on out we're working with named functions. @@ -2033,7 +2115,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // to fortified library functions (e.g. __memcpy_chk) that have the default // "don't know" as the objectsize. Anything else should be left alone. FortifiedLibCallSimplifier Simplifier(TLInfo, true); - if (Value *V = Simplifier.optimizeCall(CI)) { + IRBuilder<> Builder(CI); + if (Value *V = Simplifier.optimizeCall(CI, Builder)) { CI->replaceAllUsesWith(V); CI->eraseFromParent(); return true; @@ -2073,14 +2156,12 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { /// ret i32 %tmp2 /// @endcode bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) { - if (!TLI) - return false; - ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RetI) return false; PHINode *PN = nullptr; + ExtractValueInst *EVI = nullptr; BitCastInst *BCI = nullptr; Value *V = RetI->getReturnValue(); if (V) { @@ -2088,6 +2169,14 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT if (BCI) V = BCI->getOperand(0); + EVI = dyn_cast<ExtractValueInst>(V); + if (EVI) { + V = EVI->getOperand(0); + if (!std::all_of(EVI->idx_begin(), EVI->idx_end(), + [](unsigned idx) { return idx == 0; })) + return false; + } + PN = dyn_cast<PHINode>(V); if (!PN) return false; @@ -2101,7 +2190,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT if (PN) { BasicBlock::iterator BI = BB->begin(); // Skip over debug and the bitcast. - do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI); + do { + ++BI; + } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI); if (&*BI != RetI) return false; } else { @@ -2157,6 +2248,11 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT // Duplicate the return into TailCallBB. (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB); + assert(!VerifyBFIUpdates || + BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB)); + BFI->setBlockFreq( + BB, + (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency()); ModifiedDT = Changed = true; ++NumRetsDup; } @@ -2354,6 +2450,9 @@ namespace { /// This class provides transaction based operation on the IR. /// Every change made through this class is recorded in the internal state and /// can be undone (rollback) until commit is called. +/// CGP does not check if instructions could be speculatively executed when +/// moved. Preserving the original location would pessimize the debugging +/// experience, as well as negatively impact the quality of sample PGO. class TypePromotionTransaction { /// This represents the common interface of the individual transaction. /// Each class implements the logic for doing one specific modification on @@ -2516,6 +2615,7 @@ class TypePromotionTransaction { /// trunc Opnd to Ty. TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { IRBuilder<> Builder(Opnd); + Builder.SetCurrentDebugLocation(DebugLoc()); Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); } @@ -2568,6 +2668,7 @@ class TypePromotionTransaction { ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) : TypePromotionAction(InsertPt) { IRBuilder<> Builder(InsertPt); + Builder.SetCurrentDebugLocation(DebugLoc()); Val = Builder.CreateZExt(Opnd, Ty, "promoted"); LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); } @@ -2721,8 +2822,9 @@ public: TypePromotionTransaction(SetOfInstrs &RemovedInsts) : RemovedInsts(RemovedInsts) {} - /// Advocate every changes made in that transaction. - void commit(); + /// Advocate every changes made in that transaction. Return true if any change + /// happen. + bool commit(); /// Undo all the changes made after the given point. void rollback(ConstRestorationPt Point); @@ -2828,11 +2930,13 @@ TypePromotionTransaction::getRestorationPoint() const { return !Actions.empty() ? Actions.back().get() : nullptr; } -void TypePromotionTransaction::commit() { +bool TypePromotionTransaction::commit() { for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; ++It) (*It)->commit(); + bool Modified = !Actions.empty(); Actions.clear(); + return Modified; } void TypePromotionTransaction::rollback( @@ -3115,7 +3219,7 @@ public: SmallPtrSet<Value *, 32> Visited; WorkList.push_back(Val); while (!WorkList.empty()) { - auto P = WorkList.pop_back_val(); + auto *P = WorkList.pop_back_val(); if (!Visited.insert(P).second) continue; if (auto *PI = dyn_cast<Instruction>(P)) @@ -3164,13 +3268,13 @@ public: void destroyNewNodes(Type *CommonType) { // For safe erasing, replace the uses with dummy value first. - auto Dummy = UndefValue::get(CommonType); - for (auto I : AllPhiNodes) { + auto *Dummy = UndefValue::get(CommonType); + for (auto *I : AllPhiNodes) { I->replaceAllUsesWith(Dummy); I->eraseFromParent(); } AllPhiNodes.clear(); - for (auto I : AllSelectNodes) { + for (auto *I : AllSelectNodes) { I->replaceAllUsesWith(Dummy); I->eraseFromParent(); } @@ -3511,7 +3615,7 @@ private: // Must be a Phi node then. auto *PHI = cast<PHINode>(V); // Fill the Phi node with values from predecessors. - for (auto B : predecessors(PHI->getParent())) { + for (auto *B : predecessors(PHI->getParent())) { Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B); assert(Map.find(PV) != Map.end() && "No predecessor Value!"); PHI->addIncoming(ST.Get(Map[PV]), B); @@ -3625,10 +3729,11 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, // X*Scale + C*Scale to addr mode. ConstantInt *CI = nullptr; Value *AddLHS = nullptr; if (isa<Instruction>(ScaleReg) && // not a constant expr. - match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { + match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) && + CI->getValue().isSignedIntN(64)) { TestAddrMode.InBounds = false; TestAddrMode.ScaledReg = AddLHS; - TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; + TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale; // If this addressing mode is legal, commit it and remember that we folded // this instruction. @@ -3849,7 +3954,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // We can get through binary operator, if it is legal. In other words, the // binary operator must have a nuw or nsw flag. const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); - if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && + if (isa_and_nonnull<OverflowingBinaryOperator>(BinOp) && ((!IsSExt && BinOp->hasNoUnsignedWrap()) || (IsSExt && BinOp->hasNoSignedWrap()))) return true; @@ -4251,15 +4356,20 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); ConstantOffset += SL->getElementOffset(Idx); } else { - uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); - if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { - const APInt &CVal = CI->getValue(); - if (CVal.getMinSignedBits() <= 64) { - ConstantOffset += CVal.getSExtValue() * TypeSize; - continue; + TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType()); + if (TS.isNonZero()) { + // The optimisations below currently only work for fixed offsets. + if (TS.isScalable()) + return false; + int64_t TypeSize = TS.getFixedSize(); + if (ConstantInt *CI = + dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { + const APInt &CVal = CI->getValue(); + if (CVal.getMinSignedBits() <= 64) { + ConstantOffset += CVal.getSExtValue() * TypeSize; + continue; + } } - } - if (TypeSize) { // Scales of zero don't do anything. // We only allow one variable index at the moment. if (VariableOperand != -1) return false; @@ -4422,11 +4532,13 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { - // Fold in immediates if legal for the target. - AddrMode.BaseOffs += CI->getSExtValue(); - if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) - return true; - AddrMode.BaseOffs -= CI->getSExtValue(); + if (CI->getValue().isSignedIntN(64)) { + // Fold in immediates if legal for the target. + AddrMode.BaseOffs += CI->getSExtValue(); + if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) + return true; + AddrMode.BaseOffs -= CI->getSExtValue(); + } } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { // If this is a global variable, try to fold it into the addressing mode. if (!AddrMode.BaseGV) { @@ -4502,8 +4614,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetRegisterInfo &TRI) { const Function *F = CI->getFunction(); TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, - ImmutableCallSite(CI)); + TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI); for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; @@ -4581,14 +4692,16 @@ static bool FindAllMemoryUses( } if (CallInst *CI = dyn_cast<CallInst>(UserI)) { - // If this is a cold call, we can sink the addressing calculation into - // the cold path. See optimizeCallInst - bool OptForSize = OptSize || + if (CI->hasFnAttr(Attribute::Cold)) { + // If this is a cold call, we can sink the addressing calculation into + // the cold path. See optimizeCallInst + bool OptForSize = OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); - if (!OptForSize && CI->hasFnAttr(Attribute::Cold)) - continue; + if (!OptForSize) + continue; + } - InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); + InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand()); if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. @@ -4854,7 +4967,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, TPT.rollback(LastKnownGood); return false; } - TPT.commit(); + bool Modified = TPT.commit(); // Get the combined AddrMode (or the only AddrMode, if we only had one). ExtAddrMode AddrMode = AddrModes.getAddrMode(); @@ -4868,7 +4981,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, })) { LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); - return false; + return Modified; } // Insert this computation right after this user. Since our caller is @@ -4891,7 +5004,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && - TM && SubtargetInfo->addrSinkUsingGEPs())) { + SubtargetInfo->addrSinkUsingGEPs())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode @@ -4909,7 +5022,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // We can't add more than one pointer together, nor can we scale a // pointer (both of which seem meaningless). if (ResultPtr || AddrMode.Scale != 1) - return false; + return Modified; ResultPtr = AddrMode.ScaledReg; AddrMode.Scale = 0; @@ -4926,12 +5039,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *ScaledRegTy = AddrMode.ScaledReg->getType(); if (cast<IntegerType>(IntPtrTy)->getBitWidth() > cast<IntegerType>(ScaledRegTy)->getBitWidth()) - return false; + return Modified; } if (AddrMode.BaseGV) { if (ResultPtr) - return false; + return Modified; ResultPtr = AddrMode.BaseGV; } @@ -4955,7 +5068,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { SunkAddr = Constant::getNullValue(Addr->getType()); } else if (!ResultPtr) { - return false; + return Modified; } else { Type *I8PtrTy = Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); @@ -5040,7 +5153,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) || (AddrMode.BaseGV && DL->isNonIntegralPointerType(AddrMode.BaseGV->getType()))) - return false; + return Modified; LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); @@ -5080,7 +5193,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Instruction *I = dyn_cast_or_null<Instruction>(Result); if (I && (Result != AddrMode.BaseReg)) I->eraseFromParent(); - return false; + return Modified; } if (AddrMode.Scale != 1) V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), @@ -5142,6 +5255,119 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return true; } +/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find +/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can +/// only handle a 2 operand GEP in the same basic block or a splat constant +/// vector. The 2 operands to the GEP must have a scalar pointer and a vector +/// index. +/// +/// If the existing GEP has a vector base pointer that is splat, we can look +/// through the splat to find the scalar pointer. If we can't find a scalar +/// pointer there's nothing we can do. +/// +/// If we have a GEP with more than 2 indices where the middle indices are all +/// zeroes, we can replace it with 2 GEPs where the second has 2 operands. +/// +/// If the final index isn't a vector or is a splat, we can emit a scalar GEP +/// followed by a GEP with an all zeroes vector index. This will enable +/// SelectionDAGBuilder to use a the scalar GEP as the uniform base and have a +/// zero index. +bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, + Value *Ptr) { + const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP || !GEP->hasIndices()) + return false; + + // If the GEP and the gather/scatter aren't in the same BB, don't optimize. + // FIXME: We should support this by sinking the GEP. + if (MemoryInst->getParent() != GEP->getParent()) + return false; + + SmallVector<Value *, 2> Ops(GEP->op_begin(), GEP->op_end()); + + bool RewriteGEP = false; + + if (Ops[0]->getType()->isVectorTy()) { + Ops[0] = const_cast<Value *>(getSplatValue(Ops[0])); + if (!Ops[0]) + return false; + RewriteGEP = true; + } + + unsigned FinalIndex = Ops.size() - 1; + + // Ensure all but the last index is 0. + // FIXME: This isn't strictly required. All that's required is that they are + // all scalars or splats. + for (unsigned i = 1; i < FinalIndex; ++i) { + auto *C = dyn_cast<Constant>(Ops[i]); + if (!C) + return false; + if (isa<VectorType>(C->getType())) + C = C->getSplatValue(); + auto *CI = dyn_cast_or_null<ConstantInt>(C); + if (!CI || !CI->isZero()) + return false; + // Scalarize the index if needed. + Ops[i] = CI; + } + + // Try to scalarize the final index. + if (Ops[FinalIndex]->getType()->isVectorTy()) { + if (Value *V = const_cast<Value *>(getSplatValue(Ops[FinalIndex]))) { + auto *C = dyn_cast<ConstantInt>(V); + // Don't scalarize all zeros vector. + if (!C || !C->isZero()) { + Ops[FinalIndex] = V; + RewriteGEP = true; + } + } + } + + // If we made any changes or the we have extra operands, we need to generate + // new instructions. + if (!RewriteGEP && Ops.size() == 2) + return false; + + unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements(); + + IRBuilder<> Builder(MemoryInst); + + Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); + + Value *NewAddr; + + // If the final index isn't a vector, emit a scalar GEP containing all ops + // and a vector GEP with all zeroes final index. + if (!Ops[FinalIndex]->getType()->isVectorTy()) { + NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front()); + auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); + NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy)); + } else { + Value *Base = Ops[0]; + Value *Index = Ops[FinalIndex]; + + // Create a scalar GEP if there are more than 2 operands. + if (Ops.size() != 2) { + // Replace the last index with 0. + Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); + Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front()); + } + + // Now create the GEP with scalar pointer and vector index. + NewAddr = Builder.CreateGEP(Base, Index); + } + + MemoryInst->replaceUsesOfWith(Ptr, NewAddr); + + // If we have no uses, recursively delete the value and all dead instructions + // using it. + if (Ptr->use_empty()) + RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo); + + return true; +} + /// If there are any memory operands, use OptimizeMemoryInst to sink their /// address computing into the block when possible / profitable. bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { @@ -5150,7 +5376,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { const TargetRegisterInfo *TRI = TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI->ParseConstraints(*DL, TRI, CS); + TLI->ParseConstraints(*DL, TRI, *CS); unsigned ArgNo = 0; for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; @@ -5231,7 +5457,7 @@ bool CodeGenPrepare::tryToPromoteExts( bool Promoted = false; // Iterate over all the extensions to try to promote them. - for (auto I : Exts) { + for (auto *I : Exts) { // Early check if we directly have ext(load). if (isa<LoadInst>(I->getOperand(0))) { ProfitablyMovedExts.push_back(I); @@ -5242,7 +5468,7 @@ bool CodeGenPrepare::tryToPromoteExts( // this check inside the for loop is to catch the case where an extension // is directly fed by a load because in such case the extension can be moved // up without any promotion on its operands. - if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion) + if (!TLI->enableExtLdPromotion() || DisableExtLdPromotion) return false; // Get the action to perform the promotion. @@ -5292,7 +5518,7 @@ bool CodeGenPrepare::tryToPromoteExts( SmallVector<Instruction *, 2> NewlyMovedExts; (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost); bool NewPromoted = false; - for (auto ExtInst : NewlyMovedExts) { + for (auto *ExtInst : NewlyMovedExts) { Instruction *MovedExt = cast<Instruction>(ExtInst); Value *ExtOperand = MovedExt->getOperand(0); // If we have reached to a load, we need this extra profitability check @@ -5358,9 +5584,9 @@ bool CodeGenPrepare::mergeSExts(Function &F) { return Changed; } -// Spliting large data structures so that the GEPs accessing them can have +// Splitting large data structures so that the GEPs accessing them can have // smaller offsets so that they can be sunk to the same blocks as their users. -// For example, a large struct starting from %base is splitted into two parts +// For example, a large struct starting from %base is split into two parts // where the second part starts from %new_base. // // Before: @@ -5421,7 +5647,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { int64_t BaseOffset = LargeOffsetGEPs.begin()->second; Value *NewBaseGEP = nullptr; - auto LargeOffsetGEP = LargeOffsetGEPs.begin(); + auto *LargeOffsetGEP = LargeOffsetGEPs.begin(); while (LargeOffsetGEP != LargeOffsetGEPs.end()) { GetElementPtrInst *GEP = LargeOffsetGEP->first; int64_t Offset = LargeOffsetGEP->second; @@ -5435,7 +5661,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { GEP->getAddressSpace())) { // We need to create a new base if the offset to the current base is // too large to fit into the addressing mode. So, a very large struct - // may be splitted into several parts. + // may be split into several parts. BaseGEP = GEP; BaseOffset = Offset; NewBaseGEP = nullptr; @@ -5506,6 +5732,155 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { return Changed; } +bool CodeGenPrepare::optimizePhiType( + PHINode *I, SmallPtrSetImpl<PHINode *> &Visited, + SmallPtrSetImpl<Instruction *> &DeletedInstrs) { + // We are looking for a collection on interconnected phi nodes that together + // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts + // are of the same type. Convert the whole set of nodes to the type of the + // bitcast. + Type *PhiTy = I->getType(); + Type *ConvertTy = nullptr; + if (Visited.count(I) || + (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy())) + return false; + + SmallVector<Instruction *, 4> Worklist; + Worklist.push_back(cast<Instruction>(I)); + SmallPtrSet<PHINode *, 4> PhiNodes; + PhiNodes.insert(I); + Visited.insert(I); + SmallPtrSet<Instruction *, 4> Defs; + SmallPtrSet<Instruction *, 4> Uses; + + while (!Worklist.empty()) { + Instruction *II = Worklist.pop_back_val(); + + if (auto *Phi = dyn_cast<PHINode>(II)) { + // Handle Defs, which might also be PHI's + for (Value *V : Phi->incoming_values()) { + if (auto *OpPhi = dyn_cast<PHINode>(V)) { + if (!PhiNodes.count(OpPhi)) { + if (Visited.count(OpPhi)) + return false; + PhiNodes.insert(OpPhi); + Visited.insert(OpPhi); + Worklist.push_back(OpPhi); + } + } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) { + if (!Defs.count(OpLoad)) { + Defs.insert(OpLoad); + Worklist.push_back(OpLoad); + } + } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) { + if (!Defs.count(OpEx)) { + Defs.insert(OpEx); + Worklist.push_back(OpEx); + } + } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) { + if (!ConvertTy) + ConvertTy = OpBC->getOperand(0)->getType(); + if (OpBC->getOperand(0)->getType() != ConvertTy) + return false; + if (!Defs.count(OpBC)) { + Defs.insert(OpBC); + Worklist.push_back(OpBC); + } + } else if (!isa<UndefValue>(V)) + return false; + } + } + + // Handle uses which might also be phi's + for (User *V : II->users()) { + if (auto *OpPhi = dyn_cast<PHINode>(V)) { + if (!PhiNodes.count(OpPhi)) { + if (Visited.count(OpPhi)) + return false; + PhiNodes.insert(OpPhi); + Visited.insert(OpPhi); + Worklist.push_back(OpPhi); + } + } else if (auto *OpStore = dyn_cast<StoreInst>(V)) { + if (OpStore->getOperand(0) != II) + return false; + Uses.insert(OpStore); + } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) { + if (!ConvertTy) + ConvertTy = OpBC->getType(); + if (OpBC->getType() != ConvertTy) + return false; + Uses.insert(OpBC); + } else + return false; + } + } + + if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy)) + return false; + + LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to " + << *ConvertTy << "\n"); + + // Create all the new phi nodes of the new type, and bitcast any loads to the + // correct type. + ValueToValueMap ValMap; + ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy); + for (Instruction *D : Defs) { + if (isa<BitCastInst>(D)) + ValMap[D] = D->getOperand(0); + else + ValMap[D] = + new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode()); + } + for (PHINode *Phi : PhiNodes) + ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(), + Phi->getName() + ".tc", Phi); + // Pipe together all the PhiNodes. + for (PHINode *Phi : PhiNodes) { + PHINode *NewPhi = cast<PHINode>(ValMap[Phi]); + for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++) + NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)], + Phi->getIncomingBlock(i)); + } + // And finally pipe up the stores and bitcasts + for (Instruction *U : Uses) { + if (isa<BitCastInst>(U)) { + DeletedInstrs.insert(U); + U->replaceAllUsesWith(ValMap[U->getOperand(0)]); + } else + U->setOperand(0, + new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U)); + } + + // Save the removed phis to be deleted later. + for (PHINode *Phi : PhiNodes) + DeletedInstrs.insert(Phi); + return true; +} + +bool CodeGenPrepare::optimizePhiTypes(Function &F) { + if (!OptimizePhiTypes) + return false; + + bool Changed = false; + SmallPtrSet<PHINode *, 4> Visited; + SmallPtrSet<Instruction *, 4> DeletedInstrs; + + // Attempt to optimize all the phis in the functions to the correct type. + for (auto &BB : F) + for (auto &Phi : BB.phis()) + Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs); + + // Remove any old phi's that have been converted. + for (auto *I : DeletedInstrs) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + + return Changed; +} + /// Return true, if an ext(load) can be formed from an extension in /// \p MovedExts. bool CodeGenPrepare::canFormExtLd( @@ -5567,11 +5942,6 @@ bool CodeGenPrepare::canFormExtLd( /// \p Inst[in/out] the extension may be modified during the process if some /// promotions apply. bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { - // ExtLoad formation and address type promotion infrastructure requires TLI to - // be effective. - if (!TLI) - return false; - bool AllowPromotionWithoutCommonHeader = false; /// See if it is an interesting sext operations for the address type /// promotion before trying to promote it, e.g., the ones with the right @@ -5596,16 +5966,8 @@ bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) { assert(LI && ExtFedByLoad && "Expect a valid load and extension"); TPT.commit(); - // Move the extend into the same block as the load + // Move the extend into the same block as the load. ExtFedByLoad->moveAfter(LI); - // CGP does not check if the zext would be speculatively executed when moved - // to the same basic block as the load. Preserving its original location - // would pessimize the debugging experience, as well as negatively impact - // the quality of sample pgo. We don't want to use "line 0" as that has a - // size cost in the line-table section and logically the zext can be seen as - // part of the load. Therefore we conservatively reuse the same debug - // location for the load and the zext. - ExtFedByLoad->setDebugLoc(LI->getDebugLoc()); ++NumExtsMoved; Inst = ExtFedByLoad; return true; @@ -5633,7 +5995,7 @@ bool CodeGenPrepare::performAddressTypePromotion( bool Promoted = false; SmallPtrSet<Instruction *, 1> UnhandledExts; bool AllSeenFirst = true; - for (auto I : SpeculativelyMovedExts) { + for (auto *I : SpeculativelyMovedExts) { Value *HeadOfChain = I->getOperand(0); DenseMap<Value *, Instruction *>::iterator AlreadySeen = SeenChainsForSExt.find(HeadOfChain); @@ -5651,7 +6013,7 @@ bool CodeGenPrepare::performAddressTypePromotion( TPT.commit(); if (HasPromoted) Promoted = true; - for (auto I : SpeculativelyMovedExts) { + for (auto *I : SpeculativelyMovedExts) { Value *HeadOfChain = I->getOperand(0); SeenChainsForSExt[HeadOfChain] = nullptr; ValToSExtendedUses[HeadOfChain].push_back(I); @@ -5662,7 +6024,7 @@ bool CodeGenPrepare::performAddressTypePromotion( // This is the first chain visited from the header, keep the current chain // as unhandled. Defer to promote this until we encounter another SExt // chain derived from the same header. - for (auto I : SpeculativelyMovedExts) { + for (auto *I : SpeculativelyMovedExts) { Value *HeadOfChain = I->getOperand(0); SeenChainsForSExt[HeadOfChain] = Inst; } @@ -5670,7 +6032,7 @@ bool CodeGenPrepare::performAddressTypePromotion( } if (!AllSeenFirst && !UnhandledExts.empty()) - for (auto VisitedSExt : UnhandledExts) { + for (auto *VisitedSExt : UnhandledExts) { if (RemovedInsts.count(VisitedSExt)) continue; TypePromotionTransaction TPT(RemovedInsts); @@ -5681,7 +6043,7 @@ bool CodeGenPrepare::performAddressTypePromotion( TPT.commit(); if (HasPromoted) Promoted = true; - for (auto I : Chains) { + for (auto *I : Chains) { Value *HeadOfChain = I->getOperand(0); // Mark this as handled. SeenChainsForSExt[HeadOfChain] = nullptr; @@ -5701,7 +6063,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) { return false; // Only do this xform if truncating is free. - if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) + if (!TLI->isTruncateFree(I->getType(), Src->getType())) return false; // Only safe to perform the optimization if the source is also defined in @@ -5947,7 +6309,8 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { // If it's safe to speculatively execute, then it should not have side // effects; therefore, it's safe to sink and possibly *not* execute. return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && - TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive; + TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= + TargetTransformInfo::TCC_Expensive; } /// Returns true if a SelectInst should be turned into an explicit branch. @@ -6044,13 +6407,47 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { return true; } +bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) { + Intrinsic::ID Opcode = Fsh->getIntrinsicID(); + assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) && + "Expected a funnel shift"); + + // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper + // than general vector shifts, and (3) the shift amount is select-of-splatted + // values, hoist the funnel shifts before the select: + // fsh Op0, Op1, (select Cond, TVal, FVal) --> + // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal) + // + // This is inverting a generic IR transform when we know that the cost of a + // general vector shift is more than the cost of 2 shift-by-scalars. + // We can't do this effectively in SDAG because we may not be able to + // determine if the select operands are splats from within a basic block. + Type *Ty = Fsh->getType(); + if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty)) + return false; + Value *Cond, *TVal, *FVal; + if (!match(Fsh->getOperand(2), + m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal))))) + return false; + if (!isSplatValue(TVal) || !isSplatValue(FVal)) + return false; + + IRBuilder<> Builder(Fsh); + Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1); + Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, TVal }); + Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, FVal }); + Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); + Fsh->replaceAllUsesWith(NewSel); + Fsh->eraseFromParent(); + return true; +} + /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // If branch conversion isn't desirable, exit early. - if (DisableSelectToBranch || - OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) || - !TLI) + if (DisableSelectToBranch || OptSize || + llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())) return false; // Find all consecutive select instructions that share the same condition. @@ -6103,7 +6500,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // Into: // start: // %cmp = cmp uge i32 %a, %b - // br i1 %cmp, label %select.true, label %select.false + // %cmp.frozen = freeze %cmp + // br i1 %cmp.frozen, label %select.true, label %select.false // select.true: // br label %select.end // select.false: @@ -6111,6 +6509,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // select.end: // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] // + // %cmp should be frozen, otherwise it may introduce undefined behavior. // In addition, we may sink instructions that produce %c or %d from // the entry block into the destination(s) of the new branch. // If the true or false blocks do not contain a sunken instruction, that @@ -6189,7 +6588,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { TT = TrueBlock; FT = FalseBlock; } - IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI); + IRBuilder<> IB(SI); + auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); + IB.CreateCondBr(CondFr, TT, FT, SI); SmallPtrSet<const Instruction *, 2> INS; INS.insert(ASI.begin(), ASI.end()); @@ -6216,79 +6617,54 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { return true; } -static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { - SmallVector<int, 16> Mask(SVI->getShuffleMask()); - int SplatElem = -1; - for (unsigned i = 0; i < Mask.size(); ++i) { - if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) - return false; - SplatElem = Mask[i]; - } - - return true; -} - -/// Some targets have expensive vector shifts if the lanes aren't all the same -/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases -/// it's often worth sinking a shufflevector splat down to its use so that -/// codegen can spot all lanes are identical. +/// Some targets only accept certain types for splat inputs. For example a VDUP +/// in MVE takes a GPR (integer) register, and the instruction that incorporate +/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register. bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { - BasicBlock *DefBB = SVI->getParent(); - - // Only do this xform if variable vector shifts are particularly expensive. - if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) + if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), + m_Undef(), m_ZeroMask()))) return false; - - // We only expect better codegen by sinking a shuffle if we can recognise a - // constant splat. - if (!isBroadcastShuffle(SVI)) + Type *NewType = TLI->shouldConvertSplatType(SVI); + if (!NewType) return false; - // InsertedShuffles - Only insert a shuffle in each block once. - DenseMap<BasicBlock*, Instruction*> InsertedShuffles; - - bool MadeChange = false; - for (User *U : SVI->users()) { - Instruction *UI = cast<Instruction>(U); - - // Figure out which BB this ext is used in. - BasicBlock *UserBB = UI->getParent(); - if (UserBB == DefBB) continue; - - // For now only apply this when the splat is used by a shift instruction. - if (!UI->isShift()) continue; - - // Everything checks out, sink the shuffle if the user's block doesn't - // already have a copy. - Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; - - if (!InsertedShuffle) { - BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); - assert(InsertPt != UserBB->end()); - InsertedShuffle = - new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), - SVI->getOperand(2), "", &*InsertPt); - InsertedShuffle->setDebugLoc(SVI->getDebugLoc()); - } - - UI->replaceUsesOfWith(SVI, InsertedShuffle); - MadeChange = true; - } - - // If we removed all uses, nuke the shuffle. - if (SVI->use_empty()) { - SVI->eraseFromParent(); - MadeChange = true; - } + auto *SVIVecType = cast<FixedVectorType>(SVI->getType()); + assert(!NewType->isVectorTy() && "Expected a scalar type!"); + assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() && + "Expected a type of the same size!"); + auto *NewVecType = + FixedVectorType::get(NewType, SVIVecType->getNumElements()); + + // Create a bitcast (shuffle (insert (bitcast(..)))) + IRBuilder<> Builder(SVI->getContext()); + Builder.SetInsertPoint(SVI); + Value *BC1 = Builder.CreateBitCast( + cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType); + Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1, + (uint64_t)0); + Value *Shuffle = Builder.CreateShuffleVector( + Insert, UndefValue::get(NewVecType), SVI->getShuffleMask()); + Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType); + + SVI->replaceAllUsesWith(BC2); + RecursivelyDeleteTriviallyDeadInstructions(SVI); + + // Also hoist the bitcast up to its operand if it they are not in the same + // block. + if (auto *BCI = dyn_cast<Instruction>(BC1)) + if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0))) + if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) && + !Op->isTerminator() && !Op->isEHPad()) + BCI->moveAfter(Op); - return MadeChange; + return true; } bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { // If the operands of I can be folded into a target instruction together with // I, duplicate and sink them. SmallVector<Use *, 4> OpsToSink; - if (!TLI || !TLI->shouldSinkOperands(I, OpsToSink)) + if (!TLI->shouldSinkOperands(I, OpsToSink)) return false; // OpsToSink can contain multiple uses in a use chain (e.g. @@ -6341,9 +6717,6 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { } bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { - if (!TLI || !DL) - return false; - Value *Cond = SI->getCondition(); Type *OldType = Cond->getType(); LLVMContext &Context = Cond->getContext(); @@ -6495,6 +6868,8 @@ class VectorPromoteHelper { uint64_t ScalarCost = TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); uint64_t VectorCost = StoreExtractCombineCost; + enum TargetTransformInfo::TargetCostKind CostKind = + TargetTransformInfo::TCK_RecipThroughput; for (const auto &Inst : InstsToBePromoted) { // Compute the cost. // By construction, all instructions being promoted are arithmetic ones. @@ -6510,8 +6885,9 @@ class VectorPromoteHelper { !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue : TargetTransformInfo::OK_AnyValue; ScalarCost += TTI.getArithmeticInstrCost( - Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); + Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK); VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, + CostKind, Arg0OVK, Arg1OVK); } LLVM_DEBUG( @@ -6540,19 +6916,23 @@ class VectorPromoteHelper { UseSplat = true; } - unsigned End = getTransitionType()->getVectorNumElements(); + ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount(); if (UseSplat) - return ConstantVector::getSplat(End, Val); - - SmallVector<Constant *, 4> ConstVec; - UndefValue *UndefVal = UndefValue::get(Val->getType()); - for (unsigned Idx = 0; Idx != End; ++Idx) { - if (Idx == ExtractIdx) - ConstVec.push_back(Val); - else - ConstVec.push_back(UndefVal); - } - return ConstantVector::get(ConstVec); + return ConstantVector::getSplat(EC, Val); + + if (!EC.Scalable) { + SmallVector<Constant *, 4> ConstVec; + UndefValue *UndefVal = UndefValue::get(Val->getType()); + for (unsigned Idx = 0; Idx != EC.Min; ++Idx) { + if (Idx == ExtractIdx) + ConstVec.push_back(Val); + else + ConstVec.push_back(UndefVal); + } + return ConstantVector::get(ConstVec); + } else + llvm_unreachable( + "Generate scalable vector for non-splat is unimplemented"); } /// Check if promoting to a vector type an operand at \p OperandIdx @@ -6707,7 +7087,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { /// has this feature and this is profitable. bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { unsigned CombineCost = std::numeric_limits<unsigned>::max(); - if (DisableStoreExtract || !TLI || + if (DisableStoreExtract || (!StressStoreExtract && !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), Inst->getOperand(1), CombineCost))) @@ -6794,6 +7174,14 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI) { // Handle simple but common cases only. Type *StoreType = SI.getValueOperand()->getType(); + + // The code below assumes shifting a value by <number of bits>, + // whereas scalable vectors would have to be shifted by + // <2log(vscale) + number of bits> in order to store the + // low/high parts. Bailing out for now. + if (isa<ScalableVectorType>(StoreType)) + return false; + if (!DL.typeSizeEqualsStoreSize(StoreType) || DL.getTypeSizeInBits(StoreType) == 0) return false; @@ -6857,20 +7245,19 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, Value *Addr = Builder.CreateBitCast( SI.getOperand(1), SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); + Align Alignment = SI.getAlign(); const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper); - if (IsOffsetStore) + if (IsOffsetStore) { Addr = Builder.CreateGEP( SplitStoreType, Addr, ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); - MaybeAlign Alignment(SI.getAlignment()); - if (IsOffsetStore && Alignment) { + // When splitting the store in half, naturally one half will retain the // alignment of the original wider store, regardless of whether it was // over-aligned or not, while the other will require adjustment. Alignment = commonAlignment(Alignment, HalfValBitSize / 8); } - Builder.CreateAlignedStore( - V, Addr, Alignment.hasValue() ? Alignment.getValue().value() : 0); + Builder.CreateAlignedStore(V, Addr, Alignment); }; CreateSplitStore(LValue, false); @@ -6959,7 +7346,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, return false; ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1)); // Check that GEPI is a cheap one. - if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType()) + if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(), + TargetTransformInfo::TCK_SizeAndLatency) > TargetTransformInfo::TCC_Basic) return false; Value *GEPIOp = GEPI->getOperand(0); @@ -7008,7 +7396,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, cast<ConstantInt>(UGEPI->getOperand(1))->getType()) return false; ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); - if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType()) + if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(), + TargetTransformInfo::TCK_SizeAndLatency) > TargetTransformInfo::TCC_Basic) return false; UGEPIs.push_back(UGEPI); @@ -7019,7 +7408,9 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, for (GetElementPtrInst *UGEPI : UGEPIs) { ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); - unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType()); + unsigned ImmCost = + TTI->getIntImmCost(NewIdx, GEPIIdx->getType(), + TargetTransformInfo::TCK_SizeAndLatency); if (ImmCost > TargetTransformInfo::TCC_Basic) return false; } @@ -7076,16 +7467,15 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { if (isa<Constant>(CI->getOperand(0))) return false; - if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL)) + if (OptimizeNoopCopyExpression(CI, *TLI, *DL)) return true; if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { /// Sink a zext or sext into its user blocks if the target type doesn't /// fit in one register - if (TLI && - TLI->getTypeAction(CI->getContext(), + if (TLI->getTypeAction(CI->getContext(), TLI->getValueType(*DL, CI->getType())) == - TargetLowering::TypeExpandInteger) { + TargetLowering::TypeExpandInteger) { return SinkCast(CI); } else { bool MadeChange = optimizeExt(I); @@ -7096,30 +7486,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { } if (auto *Cmp = dyn_cast<CmpInst>(I)) - if (TLI && optimizeCmp(Cmp, ModifiedDT)) + if (optimizeCmp(Cmp, ModifiedDT)) return true; if (LoadInst *LI = dyn_cast<LoadInst>(I)) { LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); - if (TLI) { - bool Modified = optimizeLoadExt(LI); - unsigned AS = LI->getPointerAddressSpace(); - Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); - return Modified; - } - return false; + bool Modified = optimizeLoadExt(LI); + unsigned AS = LI->getPointerAddressSpace(); + Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); + return Modified; } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (TLI && splitMergedValStore(*SI, *DL, *TLI)) + if (splitMergedValStore(*SI, *DL, *TLI)) return true; SI->setMetadata(LLVMContext::MD_invariant_group, nullptr); - if (TLI) { - unsigned AS = SI->getPointerAddressSpace(); - return optimizeMemoryInst(I, SI->getOperand(1), - SI->getOperand(0)->getType(), AS); - } - return false; + unsigned AS = SI->getPointerAddressSpace(); + return optimizeMemoryInst(I, SI->getOperand(1), + SI->getOperand(0)->getType(), AS); } if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { @@ -7136,15 +7520,14 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); - if (BinOp && (BinOp->getOpcode() == Instruction::And) && - EnableAndCmpSinking && TLI) + if (BinOp && (BinOp->getOpcode() == Instruction::And) && EnableAndCmpSinking) return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts); // TODO: Move this into the switch on opcode - it handles shifts already. if (BinOp && (BinOp->getOpcode() == Instruction::AShr || BinOp->getOpcode() == Instruction::LShr)) { ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); - if (TLI && CI && TLI->hasExtractBitsInsn()) + if (CI && TLI->hasExtractBitsInsn()) if (OptimizeExtractBits(BinOp, CI, *TLI, *DL)) return true; } @@ -7167,6 +7550,35 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { return false; } + if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) { + // freeze(icmp a, const)) -> icmp (freeze a), const + // This helps generate efficient conditional jumps. + Instruction *CmpI = nullptr; + if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0))) + CmpI = II; + else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0))) + CmpI = F->getFastMathFlags().none() ? F : nullptr; + + if (CmpI && CmpI->hasOneUse()) { + auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1); + bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) || + isa<ConstantPointerNull>(Op0); + bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) || + isa<ConstantPointerNull>(Op1); + if (Const0 || Const1) { + if (!Const0 || !Const1) { + auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI); + F->takeName(FI); + CmpI->setOperand(Const0 ? 1 : 0, F); + } + FI->replaceAllUsesWith(CmpI); + FI->eraseFromParent(); + return true; + } + } + return false; + } + if (tryToSinkFreeOperands(I)) return true; @@ -7223,7 +7635,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { } bool MadeBitReverse = true; - while (TLI && MadeBitReverse) { + while (MadeBitReverse) { MadeBitReverse = false; for (auto &I : reverse(BB)) { if (makeBitReverse(I, *DL, *TLI)) { @@ -7335,7 +7747,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. /// bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { - if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive()) + if (!TM->Options.EnableFastISel || TLI->isJumpExpensive()) return false; bool MadeChange = false; @@ -7376,7 +7788,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); // Create a new BB. - auto TmpBB = + auto *TmpBB = BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split", BB.getParent(), BB.getNextNode()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp new file mode 100644 index 000000000000..12dadf97e02c --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp @@ -0,0 +1,634 @@ +//===-- CommandFlags.cpp - Command Line Flags Interface ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains codegen-specific flags that are shared between different +// command line tools. The tools "llc" and "opt" both use this file to prevent +// flag duplication. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CommandFlags.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Host.h" + +using namespace llvm; + +#define CGOPT(TY, NAME) \ + static cl::opt<TY> *NAME##View; \ + TY codegen::get##NAME() { \ + assert(NAME##View && "RegisterCodeGenFlags not created."); \ + return *NAME##View; \ + } + +#define CGLIST(TY, NAME) \ + static cl::list<TY> *NAME##View; \ + std::vector<TY> codegen::get##NAME() { \ + assert(NAME##View && "RegisterCodeGenFlags not created."); \ + return *NAME##View; \ + } + +#define CGOPT_EXP(TY, NAME) \ + CGOPT(TY, NAME) \ + Optional<TY> codegen::getExplicit##NAME() { \ + if (NAME##View->getNumOccurrences()) { \ + TY res = *NAME##View; \ + return res; \ + } \ + return None; \ + } + +CGOPT(std::string, MArch) +CGOPT(std::string, MCPU) +CGLIST(std::string, MAttrs) +CGOPT_EXP(Reloc::Model, RelocModel) +CGOPT(ThreadModel::Model, ThreadModel) +CGOPT_EXP(CodeModel::Model, CodeModel) +CGOPT(ExceptionHandling, ExceptionModel) +CGOPT_EXP(CodeGenFileType, FileType) +CGOPT(FramePointer::FP, FramePointerUsage) +CGOPT(bool, EnableUnsafeFPMath) +CGOPT(bool, EnableNoInfsFPMath) +CGOPT(bool, EnableNoNaNsFPMath) +CGOPT(bool, EnableNoSignedZerosFPMath) +CGOPT(bool, EnableNoTrappingFPMath) +CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath) +CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math) +CGOPT(bool, EnableHonorSignDependentRoundingFPMath) +CGOPT(FloatABI::ABIType, FloatABIForCalls) +CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps) +CGOPT(bool, DontPlaceZerosInBSS) +CGOPT(bool, EnableGuaranteedTailCallOpt) +CGOPT(bool, DisableTailCalls) +CGOPT(bool, StackSymbolOrdering) +CGOPT(unsigned, OverrideStackAlignment) +CGOPT(bool, StackRealign) +CGOPT(std::string, TrapFuncName) +CGOPT(bool, UseCtors) +CGOPT(bool, RelaxELFRelocations) +CGOPT_EXP(bool, DataSections) +CGOPT_EXP(bool, FunctionSections) +CGOPT(std::string, BBSections) +CGOPT(unsigned, TLSSize) +CGOPT(bool, EmulatedTLS) +CGOPT(bool, UniqueSectionNames) +CGOPT(bool, UniqueBasicBlockSectionNames) +CGOPT(EABI, EABIVersion) +CGOPT(DebuggerKind, DebuggerTuningOpt) +CGOPT(bool, EnableStackSizeSection) +CGOPT(bool, EnableAddrsig) +CGOPT(bool, EmitCallSiteInfo) +CGOPT(bool, EnableDebugEntryValues) +CGOPT(bool, ForceDwarfFrameSection) +CGOPT(bool, XRayOmitFunctionIndex) + +codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { +#define CGBINDOPT(NAME) \ + do { \ + NAME##View = std::addressof(NAME); \ + } while (0) + + static cl::opt<std::string> MArch( + "march", cl::desc("Architecture to generate code for (see --version)")); + CGBINDOPT(MArch); + + static cl::opt<std::string> MCPU( + "mcpu", cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::value_desc("cpu-name"), cl::init("")); + CGBINDOPT(MCPU); + + static cl::list<std::string> MAttrs( + "mattr", cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,...")); + CGBINDOPT(MAttrs); + + static cl::opt<Reloc::Model> RelocModel( + "relocation-model", cl::desc("Choose relocation model"), + cl::values( + clEnumValN(Reloc::Static, "static", "Non-relocatable code"), + clEnumValN(Reloc::PIC_, "pic", + "Fully relocatable, position independent code"), + clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", + "Relocatable external references, non-relocatable code"), + clEnumValN( + Reloc::ROPI, "ropi", + "Code and read-only data relocatable, accessed PC-relative"), + clEnumValN( + Reloc::RWPI, "rwpi", + "Read-write data relocatable, accessed relative to static base"), + clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi", + "Combination of ropi and rwpi"))); + CGBINDOPT(RelocModel); + + static cl::opt<ThreadModel::Model> ThreadModel( + "thread-model", cl::desc("Choose threading model"), + cl::init(ThreadModel::POSIX), + cl::values( + clEnumValN(ThreadModel::POSIX, "posix", "POSIX thread model"), + clEnumValN(ThreadModel::Single, "single", "Single thread model"))); + CGBINDOPT(ThreadModel); + + static cl::opt<CodeModel::Model> CodeModel( + "code-model", cl::desc("Choose code model"), + cl::values(clEnumValN(CodeModel::Tiny, "tiny", "Tiny code model"), + clEnumValN(CodeModel::Small, "small", "Small code model"), + clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"), + clEnumValN(CodeModel::Medium, "medium", "Medium code model"), + clEnumValN(CodeModel::Large, "large", "Large code model"))); + CGBINDOPT(CodeModel); + + static cl::opt<ExceptionHandling> ExceptionModel( + "exception-model", cl::desc("exception model"), + cl::init(ExceptionHandling::None), + cl::values( + clEnumValN(ExceptionHandling::None, "default", + "default exception handling model"), + clEnumValN(ExceptionHandling::DwarfCFI, "dwarf", + "DWARF-like CFI based exception handling"), + clEnumValN(ExceptionHandling::SjLj, "sjlj", + "SjLj exception handling"), + clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"), + clEnumValN(ExceptionHandling::WinEH, "wineh", + "Windows exception model"), + clEnumValN(ExceptionHandling::Wasm, "wasm", + "WebAssembly exception handling"))); + CGBINDOPT(ExceptionModel); + + static cl::opt<CodeGenFileType> FileType( + "filetype", cl::init(CGFT_AssemblyFile), + cl::desc( + "Choose a file type (not all types are supported by all targets):"), + cl::values( + clEnumValN(CGFT_AssemblyFile, "asm", "Emit an assembly ('.s') file"), + clEnumValN(CGFT_ObjectFile, "obj", + "Emit a native object ('.o') file"), + clEnumValN(CGFT_Null, "null", + "Emit nothing, for performance testing"))); + CGBINDOPT(FileType); + + static cl::opt<FramePointer::FP> FramePointerUsage( + "frame-pointer", + cl::desc("Specify frame pointer elimination optimization"), + cl::init(FramePointer::None), + cl::values( + clEnumValN(FramePointer::All, "all", + "Disable frame pointer elimination"), + clEnumValN(FramePointer::NonLeaf, "non-leaf", + "Disable frame pointer elimination for non-leaf frame"), + clEnumValN(FramePointer::None, "none", + "Enable frame pointer elimination"))); + CGBINDOPT(FramePointerUsage); + + static cl::opt<bool> EnableUnsafeFPMath( + "enable-unsafe-fp-math", + cl::desc("Enable optimizations that may decrease FP precision"), + cl::init(false)); + CGBINDOPT(EnableUnsafeFPMath); + + static cl::opt<bool> EnableNoInfsFPMath( + "enable-no-infs-fp-math", + cl::desc("Enable FP math optimizations that assume no +-Infs"), + cl::init(false)); + CGBINDOPT(EnableNoInfsFPMath); + + static cl::opt<bool> EnableNoNaNsFPMath( + "enable-no-nans-fp-math", + cl::desc("Enable FP math optimizations that assume no NaNs"), + cl::init(false)); + CGBINDOPT(EnableNoNaNsFPMath); + + static cl::opt<bool> EnableNoSignedZerosFPMath( + "enable-no-signed-zeros-fp-math", + cl::desc("Enable FP math optimizations that assume " + "the sign of 0 is insignificant"), + cl::init(false)); + CGBINDOPT(EnableNoSignedZerosFPMath); + + static cl::opt<bool> EnableNoTrappingFPMath( + "enable-no-trapping-fp-math", + cl::desc("Enable setting the FP exceptions build " + "attribute not to use exceptions"), + cl::init(false)); + CGBINDOPT(EnableNoTrappingFPMath); + + static const auto DenormFlagEnumOptions = + cl::values(clEnumValN(DenormalMode::IEEE, "ieee", + "IEEE 754 denormal numbers"), + clEnumValN(DenormalMode::PreserveSign, "preserve-sign", + "the sign of a flushed-to-zero number is preserved " + "in the sign of 0"), + clEnumValN(DenormalMode::PositiveZero, "positive-zero", + "denormals are flushed to positive zero")); + + // FIXME: Doesn't have way to specify separate input and output modes. + static cl::opt<DenormalMode::DenormalModeKind> DenormalFPMath( + "denormal-fp-math", + cl::desc("Select which denormal numbers the code is permitted to require"), + cl::init(DenormalMode::IEEE), + DenormFlagEnumOptions); + CGBINDOPT(DenormalFPMath); + + static cl::opt<DenormalMode::DenormalModeKind> DenormalFP32Math( + "denormal-fp-math-f32", + cl::desc("Select which denormal numbers the code is permitted to require for float"), + cl::init(DenormalMode::Invalid), + DenormFlagEnumOptions); + CGBINDOPT(DenormalFP32Math); + + static cl::opt<bool> EnableHonorSignDependentRoundingFPMath( + "enable-sign-dependent-rounding-fp-math", cl::Hidden, + cl::desc("Force codegen to assume rounding mode can change dynamically"), + cl::init(false)); + CGBINDOPT(EnableHonorSignDependentRoundingFPMath); + + static cl::opt<FloatABI::ABIType> FloatABIForCalls( + "float-abi", cl::desc("Choose float ABI type"), + cl::init(FloatABI::Default), + cl::values(clEnumValN(FloatABI::Default, "default", + "Target default float ABI type"), + clEnumValN(FloatABI::Soft, "soft", + "Soft float ABI (implied by -soft-float)"), + clEnumValN(FloatABI::Hard, "hard", + "Hard float ABI (uses FP registers)"))); + CGBINDOPT(FloatABIForCalls); + + static cl::opt<FPOpFusion::FPOpFusionMode> FuseFPOps( + "fp-contract", cl::desc("Enable aggressive formation of fused FP ops"), + cl::init(FPOpFusion::Standard), + cl::values( + clEnumValN(FPOpFusion::Fast, "fast", + "Fuse FP ops whenever profitable"), + clEnumValN(FPOpFusion::Standard, "on", "Only fuse 'blessed' FP ops."), + clEnumValN(FPOpFusion::Strict, "off", + "Only fuse FP ops when the result won't be affected."))); + CGBINDOPT(FuseFPOps); + + static cl::opt<bool> DontPlaceZerosInBSS( + "nozero-initialized-in-bss", + cl::desc("Don't place zero-initialized symbols into bss section"), + cl::init(false)); + CGBINDOPT(DontPlaceZerosInBSS); + + static cl::opt<bool> EnableGuaranteedTailCallOpt( + "tailcallopt", + cl::desc( + "Turn fastcc calls into tail calls by (potentially) changing ABI."), + cl::init(false)); + CGBINDOPT(EnableGuaranteedTailCallOpt); + + static cl::opt<bool> DisableTailCalls( + "disable-tail-calls", cl::desc("Never emit tail calls"), cl::init(false)); + CGBINDOPT(DisableTailCalls); + + static cl::opt<bool> StackSymbolOrdering( + "stack-symbol-ordering", cl::desc("Order local stack symbols."), + cl::init(true)); + CGBINDOPT(StackSymbolOrdering); + + static cl::opt<unsigned> OverrideStackAlignment( + "stack-alignment", cl::desc("Override default stack alignment"), + cl::init(0)); + CGBINDOPT(OverrideStackAlignment); + + static cl::opt<bool> StackRealign( + "stackrealign", + cl::desc("Force align the stack to the minimum alignment"), + cl::init(false)); + CGBINDOPT(StackRealign); + + static cl::opt<std::string> TrapFuncName( + "trap-func", cl::Hidden, + cl::desc("Emit a call to trap function rather than a trap instruction"), + cl::init("")); + CGBINDOPT(TrapFuncName); + + static cl::opt<bool> UseCtors("use-ctors", + cl::desc("Use .ctors instead of .init_array."), + cl::init(false)); + CGBINDOPT(UseCtors); + + static cl::opt<bool> RelaxELFRelocations( + "relax-elf-relocations", + cl::desc( + "Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"), + cl::init(false)); + CGBINDOPT(RelaxELFRelocations); + + static cl::opt<bool> DataSections( + "data-sections", cl::desc("Emit data into separate sections"), + cl::init(false)); + CGBINDOPT(DataSections); + + static cl::opt<bool> FunctionSections( + "function-sections", cl::desc("Emit functions into separate sections"), + cl::init(false)); + CGBINDOPT(FunctionSections); + + static cl::opt<std::string> BBSections( + "basicblock-sections", + cl::desc("Emit basic blocks into separate sections"), + cl::value_desc("all | <function list (file)> | labels | none"), + cl::init("none")); + CGBINDOPT(BBSections); + + static cl::opt<unsigned> TLSSize( + "tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0)); + CGBINDOPT(TLSSize); + + static cl::opt<bool> EmulatedTLS( + "emulated-tls", cl::desc("Use emulated TLS model"), cl::init(false)); + CGBINDOPT(EmulatedTLS); + + static cl::opt<bool> UniqueSectionNames( + "unique-section-names", cl::desc("Give unique names to every section"), + cl::init(true)); + CGBINDOPT(UniqueSectionNames); + + static cl::opt<bool> UniqueBasicBlockSectionNames( + "unique-bb-section-names", + cl::desc("Give unique names to every basic block section"), + cl::init(false)); + CGBINDOPT(UniqueBasicBlockSectionNames); + + static cl::opt<EABI> EABIVersion( + "meabi", cl::desc("Set EABI type (default depends on triple):"), + cl::init(EABI::Default), + cl::values( + clEnumValN(EABI::Default, "default", "Triple default EABI version"), + clEnumValN(EABI::EABI4, "4", "EABI version 4"), + clEnumValN(EABI::EABI5, "5", "EABI version 5"), + clEnumValN(EABI::GNU, "gnu", "EABI GNU"))); + CGBINDOPT(EABIVersion); + + static cl::opt<DebuggerKind> DebuggerTuningOpt( + "debugger-tune", cl::desc("Tune debug info for a particular debugger"), + cl::init(DebuggerKind::Default), + cl::values( + clEnumValN(DebuggerKind::GDB, "gdb", "gdb"), + clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"), + clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)"))); + CGBINDOPT(DebuggerTuningOpt); + + static cl::opt<bool> EnableStackSizeSection( + "stack-size-section", + cl::desc("Emit a section containing stack size metadata"), + cl::init(false)); + CGBINDOPT(EnableStackSizeSection); + + static cl::opt<bool> EnableAddrsig( + "addrsig", cl::desc("Emit an address-significance table"), + cl::init(false)); + CGBINDOPT(EnableAddrsig); + + static cl::opt<bool> EmitCallSiteInfo( + "emit-call-site-info", + cl::desc( + "Emit call site debug information, if debug information is enabled."), + cl::init(false)); + CGBINDOPT(EmitCallSiteInfo); + + static cl::opt<bool> EnableDebugEntryValues( + "debug-entry-values", + cl::desc("Enable debug info for the debug entry values."), + cl::init(false)); + CGBINDOPT(EnableDebugEntryValues); + + static cl::opt<bool> ForceDwarfFrameSection( + "force-dwarf-frame-section", + cl::desc("Always emit a debug frame section."), cl::init(false)); + CGBINDOPT(ForceDwarfFrameSection); + + static cl::opt<bool> XRayOmitFunctionIndex( + "no-xray-index", cl::desc("Don't emit xray_fn_idx section"), + cl::init(false)); + CGBINDOPT(XRayOmitFunctionIndex); + +#undef CGBINDOPT + + mc::RegisterMCTargetOptionsFlags(); +} + +llvm::BasicBlockSection +codegen::getBBSectionsMode(llvm::TargetOptions &Options) { + if (getBBSections() == "all") + return BasicBlockSection::All; + else if (getBBSections() == "labels") + return BasicBlockSection::Labels; + else if (getBBSections() == "none") + return BasicBlockSection::None; + else { + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getFile(getBBSections()); + if (!MBOrErr) { + errs() << "Error loading basic block sections function list file: " + << MBOrErr.getError().message() << "\n"; + } else { + Options.BBSectionsFuncListBuf = std::move(*MBOrErr); + } + return BasicBlockSection::List; + } +} + +// Common utility function tightly tied to the options listed here. Initializes +// a TargetOptions object with CodeGen flags and returns it. +TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() { + TargetOptions Options; + Options.AllowFPOpFusion = getFuseFPOps(); + Options.UnsafeFPMath = getEnableUnsafeFPMath(); + Options.NoInfsFPMath = getEnableNoInfsFPMath(); + Options.NoNaNsFPMath = getEnableNoNaNsFPMath(); + Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath(); + Options.NoTrappingFPMath = getEnableNoTrappingFPMath(); + + DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath(); + + // FIXME: Should have separate input and output flags + Options.setFPDenormalMode(DenormalMode(DenormKind, DenormKind)); + + Options.HonorSignDependentRoundingFPMathOption = + getEnableHonorSignDependentRoundingFPMath(); + if (getFloatABIForCalls() != FloatABI::Default) + Options.FloatABIType = getFloatABIForCalls(); + Options.NoZerosInBSS = getDontPlaceZerosInBSS(); + Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt(); + Options.StackAlignmentOverride = getOverrideStackAlignment(); + Options.StackSymbolOrdering = getStackSymbolOrdering(); + Options.UseInitArray = !getUseCtors(); + Options.RelaxELFRelocations = getRelaxELFRelocations(); + Options.DataSections = getDataSections(); + Options.FunctionSections = getFunctionSections(); + Options.BBSections = getBBSectionsMode(Options); + Options.UniqueSectionNames = getUniqueSectionNames(); + Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames(); + Options.TLSSize = getTLSSize(); + Options.EmulatedTLS = getEmulatedTLS(); + Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0; + Options.ExceptionModel = getExceptionModel(); + Options.EmitStackSizeSection = getEnableStackSizeSection(); + Options.EmitAddrsig = getEnableAddrsig(); + Options.EmitCallSiteInfo = getEmitCallSiteInfo(); + Options.EnableDebugEntryValues = getEnableDebugEntryValues(); + Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); + Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); + + Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); + + Options.ThreadModel = getThreadModel(); + Options.EABIVersion = getEABIVersion(); + Options.DebuggerTuning = getDebuggerTuningOpt(); + + return Options; +} + +std::string codegen::getCPUStr() { + // If user asked for the 'native' CPU, autodetect here. If autodection fails, + // this will set the CPU to an empty string which tells the target to + // pick a basic default. + if (getMCPU() == "native") + return std::string(sys::getHostCPUName()); + + return getMCPU(); +} + +std::string codegen::getFeaturesStr() { + SubtargetFeatures Features; + + // If user asked for the 'native' CPU, we need to autodetect features. + // This is necessary for x86 where the CPU might not support all the + // features the autodetected CPU name lists in the target. For example, + // not all Sandybridge processors support AVX. + if (getMCPU() == "native") { + StringMap<bool> HostFeatures; + if (sys::getHostCPUFeatures(HostFeatures)) + for (auto &F : HostFeatures) + Features.AddFeature(F.first(), F.second); + } + + for (auto const &MAttr : getMAttrs()) + Features.AddFeature(MAttr); + + return Features.getString(); +} + +std::vector<std::string> codegen::getFeatureList() { + SubtargetFeatures Features; + + // If user asked for the 'native' CPU, we need to autodetect features. + // This is necessary for x86 where the CPU might not support all the + // features the autodetected CPU name lists in the target. For example, + // not all Sandybridge processors support AVX. + if (getMCPU() == "native") { + StringMap<bool> HostFeatures; + if (sys::getHostCPUFeatures(HostFeatures)) + for (auto &F : HostFeatures) + Features.AddFeature(F.first(), F.second); + } + + for (auto const &MAttr : getMAttrs()) + Features.AddFeature(MAttr); + + return Features.getFeatures(); +} + +void codegen::renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val) { + B.addAttribute(Name, Val ? "true" : "false"); +} + +#define HANDLE_BOOL_ATTR(CL, AttrName) \ + do { \ + if (CL->getNumOccurrences() > 0 && !F.hasFnAttribute(AttrName)) \ + renderBoolStringAttr(NewAttrs, AttrName, *CL); \ + } while (0) + +/// Set function attributes of function \p F based on CPU, Features, and command +/// line flags. +void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, + Function &F) { + auto &Ctx = F.getContext(); + AttributeList Attrs = F.getAttributes(); + AttrBuilder NewAttrs; + + if (!CPU.empty() && !F.hasFnAttribute("target-cpu")) + NewAttrs.addAttribute("target-cpu", CPU); + if (!Features.empty()) { + // Append the command line features to any that are already on the function. + StringRef OldFeatures = + F.getFnAttribute("target-features").getValueAsString(); + if (OldFeatures.empty()) + NewAttrs.addAttribute("target-features", Features); + else { + SmallString<256> Appended(OldFeatures); + Appended.push_back(','); + Appended.append(Features); + NewAttrs.addAttribute("target-features", Appended); + } + } + if (FramePointerUsageView->getNumOccurrences() > 0 && + !F.hasFnAttribute("frame-pointer")) { + if (getFramePointerUsage() == FramePointer::All) + NewAttrs.addAttribute("frame-pointer", "all"); + else if (getFramePointerUsage() == FramePointer::NonLeaf) + NewAttrs.addAttribute("frame-pointer", "non-leaf"); + else if (getFramePointerUsage() == FramePointer::None) + NewAttrs.addAttribute("frame-pointer", "none"); + } + if (DisableTailCallsView->getNumOccurrences() > 0) + NewAttrs.addAttribute("disable-tail-calls", + toStringRef(getDisableTailCalls())); + if (getStackRealign()) + NewAttrs.addAttribute("stackrealign"); + + HANDLE_BOOL_ATTR(EnableUnsafeFPMathView, "unsafe-fp-math"); + HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math"); + HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math"); + HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math"); + + if (DenormalFPMathView->getNumOccurrences() > 0 && + !F.hasFnAttribute("denormal-fp-math")) { + DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath(); + + // FIXME: Command line flag should expose separate input/output modes. + NewAttrs.addAttribute("denormal-fp-math", + DenormalMode(DenormKind, DenormKind).str()); + } + + if (DenormalFP32MathView->getNumOccurrences() > 0 && + !F.hasFnAttribute("denormal-fp-math-f32")) { + // FIXME: Command line flag should expose separate input/output modes. + DenormalMode::DenormalModeKind DenormKind = getDenormalFP32Math(); + + NewAttrs.addAttribute( + "denormal-fp-math-f32", + DenormalMode(DenormKind, DenormKind).str()); + } + + if (TrapFuncNameView->getNumOccurrences() > 0) + for (auto &B : F) + for (auto &I : B) + if (auto *Call = dyn_cast<CallInst>(&I)) + if (const auto *F = Call->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::debugtrap || + F->getIntrinsicID() == Intrinsic::trap) + Call->addAttribute( + AttributeList::FunctionIndex, + Attribute::get(Ctx, "trap-func-name", getTrapFuncName())); + + // Let NewAttrs override Attrs. + F.setAttributes( + Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); +} + +/// Set function attributes of functions in Module M based on CPU, +/// Features, and command line flags. +void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, + Module &M) { + for (Function &F : M) + setFunctionAttributes(CPU, Features, F); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 8d9d48402b31..7ae42b010261 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -14,7 +14,6 @@ #include "CriticalAntiDepBreaker.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -33,9 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cassert> -#include <map> #include <utility> -#include <vector> using namespace llvm; @@ -702,3 +699,9 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, return Broken; } + +AntiDepBreaker * +llvm::createCriticalAntiDepBreaker(MachineFunction &MFi, + const RegisterClassInfo &RCI) { + return new CriticalAntiDepBreaker(MFi, RCI); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h index 4e127ce525c8..640506b6e9ed 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -15,8 +15,8 @@ #ifndef LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H #define LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H -#include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/AntiDepBreaker.h" #include "llvm/Support/Compiler.h" #include <map> #include <vector> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp index af347fd7e73d..c75c957bff8a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -48,6 +48,7 @@ namespace { // RewindFunction - _Unwind_Resume or the target equivalent. FunctionCallee RewindFunction = nullptr; + CodeGenOpt::Level OptLevel; DominatorTree *DT = nullptr; const TargetLowering *TLI = nullptr; @@ -61,7 +62,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare() : FunctionPass(ID) {} + DwarfEHPrepare(CodeGenOpt::Level OptLevel = CodeGenOpt::Default) + : FunctionPass(ID), OptLevel(OptLevel) {} bool runOnFunction(Function &Fn) override; @@ -89,12 +91,15 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE, "Prepare DWARF exceptions", false, false) -FunctionPass *llvm::createDwarfEHPass() { return new DwarfEHPrepare(); } +FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) { + return new DwarfEHPrepare(OptLevel); +} void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); + if (OptLevel != CodeGenOpt::None) + AU.addRequired<DominatorTreeWrapperPass>(); } /// GetExceptionObject - Return the exception object from the value passed into @@ -202,7 +207,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { LLVMContext &Ctx = Fn.getContext(); - size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads); + size_t ResumesLeft = Resumes.size(); + if (OptLevel != CodeGenOpt::None) + ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads); + if (ResumesLeft == 0) return true; // We pruned them all. @@ -259,7 +267,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { bool DwarfEHPrepare::runOnFunction(Function &Fn) { const TargetMachine &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + DT = OptLevel != CodeGenOpt::None + ? &getAnalysis<DominatorTreeWrapperPass>().getDomTree() : nullptr; TLI = TM.getSubtargetImpl(Fn)->getTargetLowering(); bool Changed = InsertUnwindResumeCalls(Fn); DT = nullptr; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp index d45e424184d7..96d4efb856c1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -91,10 +91,10 @@ public: /// The block containing phis after the if-then-else. MachineBasicBlock *Tail; - /// The 'true' conditional block as determined by AnalyzeBranch. + /// The 'true' conditional block as determined by analyzeBranch. MachineBasicBlock *TBB; - /// The 'false' conditional block as determined by AnalyzeBranch. + /// The 'false' conditional block as determined by analyzeBranch. MachineBasicBlock *FBB; /// isTriangle - When there is no 'else' block, either TBB or FBB will be @@ -121,7 +121,7 @@ public: SmallVector<PHIInfo, 8> PHIs; private: - /// The branch condition determined by AnalyzeBranch. + /// The branch condition determined by analyzeBranch. SmallVector<MachineOperand, 4> Cond; /// Instructions in Head that define values used by the conditional blocks. @@ -486,18 +486,18 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { // This is weird, probably some sort of degenerate CFG. if (!TBB) { - LLVM_DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); + LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n"); return false; } // Make sure the analyzed branch is conditional; one of the successors // could be a landing pad. (Empty landing pads can be generated on Windows.) if (Cond.empty()) { - LLVM_DEBUG(dbgs() << "AnalyzeBranch found an unconditional branch.\n"); + LLVM_DEBUG(dbgs() << "analyzeBranch found an unconditional branch.\n"); return false; } - // AnalyzeBranch doesn't set FBB on a fall-through branch. + // analyzeBranch doesn't set FBB on a fall-through branch. // Make sure it is always set. FBB = TBB == Succ0 ? Succ1 : Succ0; @@ -520,8 +520,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI"); // Get target information. - if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, - PI.CondCycles, PI.TCycles, PI.FCycles)) { + if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(), + PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles, + PI.FCycles)) { LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI); return false; } @@ -758,7 +759,7 @@ void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv, assert(Node != HeadNode && "Cannot erase the head node"); while (Node->getNumChildren()) { assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); - DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); + DomTree->changeImmediateDominator(Node->back(), HeadNode); } DomTree->eraseNode(B); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp index dfaf7f584652..0b2ffda50a39 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp index a1adf4ef9820..9f85db9de884 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -23,7 +23,9 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -76,7 +78,7 @@ class MemCmpExpansion { IRBuilder<> Builder; // Represents the decomposition in blocks of the expansion. For example, // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and - // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. + // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}. struct LoadEntry { LoadEntry(unsigned LoadSize, uint64_t Offset) : LoadSize(LoadSize), Offset(Offset) { @@ -103,8 +105,12 @@ class MemCmpExpansion { Value *getMemCmpExpansionZeroCase(); Value *getMemCmpEqZeroOneBlock(); Value *getMemCmpOneBlock(); - Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType, - uint64_t OffsetBytes); + struct LoadPair { + Value *Lhs = nullptr; + Value *Rhs = nullptr; + }; + LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType, + unsigned OffsetBytes); static LoadEntryVector computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes, @@ -261,18 +267,56 @@ void MemCmpExpansion::createResultBlock() { EndBlock->getParent(), EndBlock); } -/// Return a pointer to an element of type `LoadSizeType` at offset -/// `OffsetBytes`. -Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source, - Type *LoadSizeType, - uint64_t OffsetBytes) { +MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, + bool NeedsBSwap, + Type *CmpSizeType, + unsigned OffsetBytes) { + // Get the memory source at offset `OffsetBytes`. + Value *LhsSource = CI->getArgOperand(0); + Value *RhsSource = CI->getArgOperand(1); + Align LhsAlign = LhsSource->getPointerAlignment(DL); + Align RhsAlign = RhsSource->getPointerAlignment(DL); if (OffsetBytes > 0) { auto *ByteType = Type::getInt8Ty(CI->getContext()); - Source = Builder.CreateConstGEP1_64( - ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()), + LhsSource = Builder.CreateConstGEP1_64( + ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()), + OffsetBytes); + RhsSource = Builder.CreateConstGEP1_64( + ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()), OffsetBytes); + LhsAlign = commonAlignment(LhsAlign, OffsetBytes); + RhsAlign = commonAlignment(RhsAlign, OffsetBytes); + } + LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo()); + RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo()); + + // Create a constant or a load from the source. + Value *Lhs = nullptr; + if (auto *C = dyn_cast<Constant>(LhsSource)) + Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL); + if (!Lhs) + Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign); + + Value *Rhs = nullptr; + if (auto *C = dyn_cast<Constant>(RhsSource)) + Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL); + if (!Rhs) + Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign); + + // Swap bytes if required. + if (NeedsBSwap) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + Lhs = Builder.CreateCall(Bswap, Lhs); + Rhs = Builder.CreateCall(Bswap, Rhs); + } + + // Zero extend if required. + if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) { + Lhs = Builder.CreateZExt(Lhs, CmpSizeType); + Rhs = Builder.CreateZExt(Rhs, CmpSizeType); } - return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo()); + return {Lhs, Rhs}; } // This function creates the IR instructions for loading and comparing 1 byte. @@ -282,18 +326,10 @@ Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source, void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes) { Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); - Value *Source1 = - getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes); - Value *Source2 = - getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes); - - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); - Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + const LoadPair Loads = + getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false, + Type::getInt32Ty(CI->getContext()), OffsetBytes); + Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs); PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); @@ -340,41 +376,19 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, : IntegerType::get(CI->getContext(), MaxLoadSize * 8); for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; - - IntegerType *LoadSizeType = - IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - - Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, - CurLoadEntry.Offset); - Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, - CurLoadEntry.Offset); - - // Get a constant or load a value for each source address. - Value *LoadSrc1 = nullptr; - if (auto *Source1C = dyn_cast<Constant>(Source1)) - LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); - if (!LoadSrc1) - LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - - Value *LoadSrc2 = nullptr; - if (auto *Source2C = dyn_cast<Constant>(Source2)) - LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); - if (!LoadSrc2) - LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + const LoadPair Loads = getLoadPair( + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), + /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset); if (NumLoads != 1) { - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } // If we have multiple loads per block, we need to generate a composite // comparison using xor+or. - Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs); Diff = Builder.CreateZExt(Diff, MaxLoadType); XorList.push_back(Diff); } else { // If there's only one load per block, we just compare the loaded values. - Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs); } } @@ -451,35 +465,18 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, - CurLoadEntry.Offset); - Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, - CurLoadEntry.Offset); - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian()) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } - - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } + const LoadPair Loads = + getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType, + CurLoadEntry.Offset); // Add the loaded values to the phi nodes for calculating memcmp result only // if result is not used in a zero equality. if (!IsUsedForZeroCmp) { - ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); - ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]); } - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs); BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) ? EndBlock : LoadCmpBlocks[BlockIndex + 1]; @@ -568,42 +565,27 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { /// the compare, branch, and phi IR that is required in the general case. Value *MemCmpExpansion::getMemCmpOneBlock() { Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian() && Size != 1) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } + bool NeedsBSwap = DL.isLittleEndian() && Size != 1; + // The i8 and i16 cases don't need compares. We zext the loaded values and + // subtract them to get the suitable negative, zero, or positive i32 result. if (Size < 4) { - // The i8 and i16 cases don't need compares. We zext the loaded values and - // subtract them to get the suitable negative, zero, or positive i32 result. - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); - return Builder.CreateSub(LoadSrc1, LoadSrc2); + const LoadPair Loads = + getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(), + /*Offset*/ 0); + return Builder.CreateSub(Loads.Lhs, Loads.Rhs); } + const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType, + /*Offset*/ 0); // The result of memcmp is negative, zero, or positive, so produce that by // subtracting 2 extended compare bits: sub (ugt, ult). // If a target prefers to use selects to get -1/0/1, they should be able // to transform this later. The inverse transform (going from selects to math) // may not be possible in the DAG because the selects got converted into // branches before we got there. - Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); - Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); + Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs); + Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs); Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); return Builder.CreateSub(ZextUGT, ZextULT); @@ -843,7 +825,7 @@ bool ExpandMemCmpPass::runOnBlock( continue; } LibFunc Func; - if (TLI->getLibFunc(ImmutableCallSite(CI), Func) && + if (TLI->getLibFunc(*CI, Func) && (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) { return true; @@ -869,6 +851,9 @@ PreservedAnalyses ExpandMemCmpPass::runImpl( ++BBIt; } } + if (MadeChanges) + for (BasicBlock &BB : F) + SimplifyInstructionsInBlock(&BB); return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp index 4ccf1d2c8c50..45f21c1085dd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp @@ -125,7 +125,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { if (!FMF.allowReassoc()) Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK); else { - if (!isPowerOf2_32(Vec->getType()->getVectorNumElements())) + if (!isPowerOf2_32( + cast<FixedVectorType>(Vec->getType())->getNumElements())) continue; Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); @@ -146,7 +147,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmin: { Value *Vec = II->getArgOperand(0); - if (!isPowerOf2_32(Vec->getType()->getVectorNumElements())) + if (!isPowerOf2_32( + cast<FixedVectorType>(Vec->getType())->getNumElements())) continue; Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp index 4c0f30bce820..c2194929e2e7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp @@ -35,8 +35,8 @@ struct FEntryInserter : public MachineFunctionPass { } bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) { - const std::string FEntryName = - MF.getFunction().getFnAttribute("fentry-call").getValueAsString(); + const std::string FEntryName = std::string( + MF.getFunction().getFnAttribute("fentry-call").getValueAsString()); if (FEntryName != "true") return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp index de0b4fa87098..23560b4cd136 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp @@ -57,17 +57,17 @@ void FaultMaps::serializeToFaultMapSection() { OS.SwitchSection(FaultMapSection); // Emit a dummy symbol to force section inclusion. - OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps"))); + OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps"))); LLVM_DEBUG(dbgs() << "********** Fault Map Output **********\n"); // Header - OS.EmitIntValue(FaultMapVersion, 1); // Version. - OS.EmitIntValue(0, 1); // Reserved. - OS.EmitIntValue(0, 2); // Reserved. + OS.emitIntValue(FaultMapVersion, 1); // Version. + OS.emitIntValue(0, 1); // Reserved. + OS.emitInt16(0); // Reserved. LLVM_DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n"); - OS.EmitIntValue(FunctionInfos.size(), 4); + OS.emitInt32(FunctionInfos.size()); LLVM_DEBUG(dbgs() << WFMP << "functions:\n"); @@ -80,25 +80,25 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel, MCStreamer &OS = *AP.OutStreamer; LLVM_DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n"); - OS.EmitSymbolValue(FnLabel, 8); + OS.emitSymbolValue(FnLabel, 8); LLVM_DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n"); - OS.EmitIntValue(FFI.size(), 4); + OS.emitInt32(FFI.size()); - OS.EmitIntValue(0, 4); // Reserved + OS.emitInt32(0); // Reserved for (auto &Fault : FFI) { LLVM_DEBUG(dbgs() << WFMP << " fault type: " << faultTypeToString(Fault.Kind) << "\n"); - OS.EmitIntValue(Fault.Kind, 4); + OS.emitInt32(Fault.Kind); LLVM_DEBUG(dbgs() << WFMP << " faulting PC offset: " << *Fault.FaultingOffsetExpr << "\n"); - OS.EmitValue(Fault.FaultingOffsetExpr, 4); + OS.emitValue(Fault.FaultingOffsetExpr, 4); LLVM_DEBUG(dbgs() << WFMP << " fault handler PC offset: " << *Fault.HandlerOffsetExpr << "\n"); - OS.EmitValue(Fault.HandlerOffsetExpr, 4); + OS.emitValue(Fault.HandlerOffsetExpr, 4); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp new file mode 100644 index 000000000000..27319804049d --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -0,0 +1,311 @@ +//===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Statepoint instruction in deopt parameters contains values which are +/// meaningful to the runtime and should be able to be read at the moment the +/// call returns. So we can say that we need to encode the fact that these +/// values are "late read" by runtime. If we could express this notion for +/// register allocator it would produce the right form for us. +/// The need to fixup (i.e this pass) is specifically handling the fact that +/// we cannot describe such a late read for the register allocator. +/// Register allocator may put the value on a register clobbered by the call. +/// This pass forces the spill of such registers and replaces corresponding +/// statepoint operands to added spill slots. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "fixup-statepoint-caller-saved" +STATISTIC(NumSpilledRegisters, "Number of spilled register"); +STATISTIC(NumSpillSlotsAllocated, "Number of spill slots allocated"); +STATISTIC(NumSpillSlotsExtended, "Number of spill slots extended"); + +static cl::opt<bool> FixupSCSExtendSlotSize( + "fixup-scs-extend-slot-size", cl::Hidden, cl::init(false), + cl::desc("Allow spill in spill slot of greater size than register size"), + cl::Hidden); + +namespace { + +class FixupStatepointCallerSaved : public MachineFunctionPass { +public: + static char ID; + + FixupStatepointCallerSaved() : MachineFunctionPass(ID) { + initializeFixupStatepointCallerSavedPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return "Fixup Statepoint Caller Saved"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char FixupStatepointCallerSaved::ID = 0; +char &llvm::FixupStatepointCallerSavedID = FixupStatepointCallerSaved::ID; + +INITIALIZE_PASS_BEGIN(FixupStatepointCallerSaved, DEBUG_TYPE, + "Fixup Statepoint Caller Saved", false, false) +INITIALIZE_PASS_END(FixupStatepointCallerSaved, DEBUG_TYPE, + "Fixup Statepoint Caller Saved", false, false) + +// Utility function to get size of the register. +static unsigned getRegisterSize(const TargetRegisterInfo &TRI, Register Reg) { + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + return TRI.getSpillSize(*RC); +} + +namespace { +// Cache used frame indexes during statepoint re-write to re-use them in +// processing next statepoint instruction. +// Two strategies. One is to preserve the size of spill slot while another one +// extends the size of spill slots to reduce the number of them, causing +// the less total frame size. But unspill will have "implicit" any extend. +class FrameIndexesCache { +private: + struct FrameIndexesPerSize { + // List of used frame indexes during processing previous statepoints. + SmallVector<int, 8> Slots; + // Current index of un-used yet frame index. + unsigned Index = 0; + }; + MachineFrameInfo &MFI; + const TargetRegisterInfo &TRI; + // Map size to list of frame indexes of this size. If the mode is + // FixupSCSExtendSlotSize then the key 0 is used to keep all frame indexes. + // If the size of required spill slot is greater than in a cache then the + // size will be increased. + DenseMap<unsigned, FrameIndexesPerSize> Cache; + +public: + FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI) + : MFI(MFI), TRI(TRI) {} + // Reset the current state of used frame indexes. After invocation of + // this function all frame indexes are available for allocation. + void reset() { + for (auto &It : Cache) + It.second.Index = 0; + } + // Get frame index to spill the register. + int getFrameIndex(Register Reg) { + unsigned Size = getRegisterSize(TRI, Reg); + // In FixupSCSExtendSlotSize mode the bucket with 0 index is used + // for all sizes. + unsigned Bucket = FixupSCSExtendSlotSize ? 0 : Size; + FrameIndexesPerSize &Line = Cache[Bucket]; + if (Line.Index < Line.Slots.size()) { + int FI = Line.Slots[Line.Index++]; + // If all sizes are kept together we probably need to extend the + // spill slot size. + if (MFI.getObjectSize(FI) < Size) { + MFI.setObjectSize(FI, Size); + MFI.setObjectAlignment(FI, Align(Size)); + NumSpillSlotsExtended++; + } + return FI; + } + int FI = MFI.CreateSpillStackObject(Size, Align(Size)); + NumSpillSlotsAllocated++; + Line.Slots.push_back(FI); + ++Line.Index; + return FI; + } + // Sort all registers to spill in descendent order. In the + // FixupSCSExtendSlotSize mode it will minimize the total frame size. + // In non FixupSCSExtendSlotSize mode we can skip this step. + void sortRegisters(SmallVectorImpl<Register> &Regs) { + if (!FixupSCSExtendSlotSize) + return; + llvm::sort(Regs.begin(), Regs.end(), [&](Register &A, Register &B) { + return getRegisterSize(TRI, A) > getRegisterSize(TRI, B); + }); + } +}; + +// Describes the state of the current processing statepoint instruction. +class StatepointState { +private: + // statepoint instruction. + MachineInstr &MI; + MachineFunction &MF; + const TargetRegisterInfo &TRI; + const TargetInstrInfo &TII; + MachineFrameInfo &MFI; + // Mask with callee saved registers. + const uint32_t *Mask; + // Cache of frame indexes used on previous instruction processing. + FrameIndexesCache &CacheFI; + // Operands with physical registers requiring spilling. + SmallVector<unsigned, 8> OpsToSpill; + // Set of register to spill. + SmallVector<Register, 8> RegsToSpill; + // Map Register to Frame Slot index. + DenseMap<Register, int> RegToSlotIdx; + +public: + StatepointState(MachineInstr &MI, const uint32_t *Mask, + FrameIndexesCache &CacheFI) + : MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()), + TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()), + Mask(Mask), CacheFI(CacheFI) {} + // Return true if register is callee saved. + bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; } + // Iterates over statepoint meta args to find caller saver registers. + // Also cache the size of found registers. + // Returns true if caller save registers found. + bool findRegistersToSpill() { + SmallSet<Register, 8> VisitedRegs; + for (unsigned Idx = StatepointOpers(&MI).getVarIdx(), + EndIdx = MI.getNumOperands(); + Idx < EndIdx; ++Idx) { + MachineOperand &MO = MI.getOperand(Idx); + if (!MO.isReg() || MO.isImplicit()) + continue; + Register Reg = MO.getReg(); + assert(Reg.isPhysical() && "Only physical regs are expected"); + if (isCalleeSaved(Reg)) + continue; + if (VisitedRegs.insert(Reg).second) + RegsToSpill.push_back(Reg); + OpsToSpill.push_back(Idx); + } + CacheFI.sortRegisters(RegsToSpill); + return !RegsToSpill.empty(); + } + // Spill all caller saved registers right before statepoint instruction. + // Remember frame index where register is spilled. + void spillRegisters() { + for (Register Reg : RegsToSpill) { + int FI = CacheFI.getFrameIndex(Reg); + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*MI.getParent(), MI, Reg, true /*is_Kill*/, FI, + RC, &TRI); + NumSpilledRegisters++; + RegToSlotIdx[Reg] = FI; + } + } + // Re-write statepoint machine instruction to replace caller saved operands + // with indirect memory location (frame index). + void rewriteStatepoint() { + MachineInstr *NewMI = + MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true); + MachineInstrBuilder MIB(MF, NewMI); + + // Add End marker. + OpsToSpill.push_back(MI.getNumOperands()); + unsigned CurOpIdx = 0; + + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + MachineOperand &MO = MI.getOperand(I); + if (I == OpsToSpill[CurOpIdx]) { + int FI = RegToSlotIdx[MO.getReg()]; + MIB.addImm(StackMaps::IndirectMemRefOp); + MIB.addImm(getRegisterSize(TRI, MO.getReg())); + assert(MO.isReg() && "Should be register"); + assert(MO.getReg().isPhysical() && "Should be physical register"); + MIB.addFrameIndex(FI); + MIB.addImm(0); + ++CurOpIdx; + } else + MIB.add(MO); + } + assert(CurOpIdx == (OpsToSpill.size() - 1) && "Not all operands processed"); + // Add mem operands. + NewMI->setMemRefs(MF, MI.memoperands()); + for (auto It : RegToSlotIdx) { + int FrameIndex = It.second; + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + getRegisterSize(TRI, It.first), + MFI.getObjectAlign(FrameIndex)); + NewMI->addMemOperand(MF, MMO); + } + // Insert new statepoint and erase old one. + MI.getParent()->insert(MI, NewMI); + MI.eraseFromParent(); + } +}; + +class StatepointProcessor { +private: + MachineFunction &MF; + const TargetRegisterInfo &TRI; + FrameIndexesCache CacheFI; + +public: + StatepointProcessor(MachineFunction &MF) + : MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()), + CacheFI(MF.getFrameInfo(), TRI) {} + + bool process(MachineInstr &MI) { + StatepointOpers SO(&MI); + uint64_t Flags = SO.getFlags(); + // Do nothing for LiveIn, it supports all registers. + if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn) + return false; + CallingConv::ID CC = SO.getCallingConv(); + const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC); + CacheFI.reset(); + StatepointState SS(MI, Mask, CacheFI); + + if (!SS.findRegistersToSpill()) + return false; + + SS.spillRegisters(); + SS.rewriteStatepoint(); + return true; + } +}; +} // namespace + +bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + const Function &F = MF.getFunction(); + if (!F.hasGC()) + return false; + + SmallVector<MachineInstr *, 16> Statepoints; + for (MachineBasicBlock &BB : MF) + for (MachineInstr &I : BB) + if (I.getOpcode() == TargetOpcode::STATEPOINT) + Statepoints.push_back(&I); + + if (Statepoints.empty()) + return false; + + bool Changed = false; + StatepointProcessor SPP(MF); + for (MachineInstr *I : Statepoints) + Changed |= SPP.process(*I); + return Changed; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp index 600d662e0f99..7c96d838d992 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp @@ -153,7 +153,7 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) { for (auto& Entry : GCRegistry::entries()) { if (Name == Entry.getName()) { std::unique_ptr<GCStrategy> S = Entry.instantiate(); - S->Name = Name; + S->Name = std::string(Name); GCStrategyMap[Name] = S.get(); GCStrategyList.push_back(std::move(S)); return GCStrategyList.back().get(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp index 90e5f32f53b3..c6730aa6b00d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp @@ -57,7 +57,6 @@ public: /// GCMetadata record for each function. class GCMachineCodeAnalysis : public MachineFunctionPass { GCFunctionInfo *FI; - MachineModuleInfo *MMI; const TargetInstrInfo *TII; void FindSafePoints(MachineFunction &MF); @@ -160,10 +159,9 @@ static bool InsertRootInitializers(Function &F, ArrayRef<AllocaInst *> Roots) { for (AllocaInst *Root : Roots) if (!InitedRoots.count(Root)) { - StoreInst *SI = new StoreInst( + new StoreInst( ConstantPointerNull::get(cast<PointerType>(Root->getAllocatedType())), - Root); - SI->insertAfter(Root); + Root, Root->getNextNode()); MadeChange = true; } @@ -189,12 +187,12 @@ bool LowerIntrinsics::runOnFunction(Function &F) { /// need to be able to ensure each root has been initialized by the point the /// first safepoint is reached. This really should have been done by the /// frontend, but the old API made this non-obvious, so we do a potentially -/// redundant store just in case. +/// redundant store just in case. bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) { SmallVector<AllocaInst *, 32> Roots; bool MadeChange = false; - for (BasicBlock &BB : F) + for (BasicBlock &BB : F) for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) { IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++); if (!CI) @@ -250,7 +248,6 @@ GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {} void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); - AU.addRequired<MachineModuleInfoWrapperPass>(); AU.addRequired<GCModuleInfo>(); } @@ -297,7 +294,7 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { if (MF.getFrameInfo().isDeadObjectIndex(RI->Num)) { RI = FI->removeStackRoot(RI); } else { - unsigned FrameReg; // FIXME: surely GCRoot ought to store the + Register FrameReg; // FIXME: surely GCRoot ought to store the // register that the offset is from? RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg); ++RI; @@ -311,7 +308,6 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { return false; FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(MF.getFunction()); - MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); TII = MF.getSubtarget().getInstrInfo(); // Find the size of the stack frame. There may be no correct static frame diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index e6abfcdb92cb..c4d8777615d2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -52,6 +52,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_SREM: case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_IMPLICIT_DEF: case TargetOpcode::G_ZEXT: case TargetOpcode::G_SEXT: case TargetOpcode::G_ANYEXT: @@ -64,7 +65,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { } bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) { - return Opc == TargetOpcode::G_CONSTANT; + return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_IMPLICIT_DEF; } std::unique_ptr<CSEConfigBase> @@ -216,9 +217,6 @@ void GISelCSEInfo::handleRecordedInsts() { } bool GISelCSEInfo::shouldCSE(unsigned Opc) const { - // Only GISel opcodes are CSEable - if (!isPreISelGenericOpcode(Opc)) - return false; assert(CSEOpt.get() && "CSEConfig not set"); return CSEOpt->shouldCSEOpc(Opc); } @@ -260,6 +258,39 @@ void GISelCSEInfo::releaseMemory() { #endif } +Error GISelCSEInfo::verify() { +#ifndef NDEBUG + handleRecordedInsts(); + // For each instruction in map from MI -> UMI, + // Profile(MI) and make sure UMI is found for that profile. + for (auto &It : InstrMapping) { + FoldingSetNodeID TmpID; + GISelInstProfileBuilder(TmpID, *MRI).addNodeID(It.first); + void *InsertPos; + UniqueMachineInstr *FoundNode = + CSEMap.FindNodeOrInsertPos(TmpID, InsertPos); + if (FoundNode != It.second) + return createStringError(std::errc::not_supported, + "CSEMap mismatch, InstrMapping has MIs without " + "corresponding Nodes in CSEMap"); + } + + // For every node in the CSEMap, make sure that the InstrMapping + // points to it. + for (auto It = CSEMap.begin(), End = CSEMap.end(); It != End; ++It) { + const UniqueMachineInstr &UMI = *It; + if (!InstrMapping.count(UMI.MI)) + return createStringError(std::errc::not_supported, + "Node in CSE without InstrMapping", UMI.MI); + + if (InstrMapping[UMI.MI] != &UMI) + return createStringError(std::make_error_code(std::errc::not_supported), + "Mismatch in CSE mapping"); + } +#endif + return Error::success(); +} + void GISelCSEInfo::print() { LLVM_DEBUG(for (auto &It : OpcodeHitTable) { @@ -286,7 +317,7 @@ GISelInstProfileBuilder::addNodeIDOpcode(unsigned Opc) const { } const GISelInstProfileBuilder & -GISelInstProfileBuilder::addNodeIDRegType(const LLT &Ty) const { +GISelInstProfileBuilder::addNodeIDRegType(const LLT Ty) const { uint64_t Val = Ty.getUniqueRAWLLTData(); ID.AddInteger(Val); return *this; @@ -311,13 +342,13 @@ GISelInstProfileBuilder::addNodeIDImmediate(int64_t Imm) const { } const GISelInstProfileBuilder & -GISelInstProfileBuilder::addNodeIDRegNum(unsigned Reg) const { +GISelInstProfileBuilder::addNodeIDRegNum(Register Reg) const { ID.AddInteger(Reg); return *this; } const GISelInstProfileBuilder & -GISelInstProfileBuilder::addNodeIDRegType(const unsigned Reg) const { +GISelInstProfileBuilder::addNodeIDRegType(const Register Reg) const { addNodeIDMachineOperand(MachineOperand::CreateReg(Reg, false)); return *this; } @@ -344,12 +375,14 @@ const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( LLT Ty = MRI.getType(Reg); if (Ty.isValid()) addNodeIDRegType(Ty); - auto *RB = MRI.getRegBankOrNull(Reg); - if (RB) - addNodeIDRegType(RB); - auto *RC = MRI.getRegClassOrNull(Reg); - if (RC) - addNodeIDRegType(RC); + + if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) { + if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>()) + addNodeIDRegType(RB); + else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>()) + addNodeIDRegType(RC); + } + assert(!MO.isImplicit() && "Unhandled case"); } else if (MO.isImm()) ID.AddInteger(MO.getImm()); @@ -369,6 +402,7 @@ GISelCSEInfo & GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfigBase> CSEOpt, bool Recompute) { if (!AlreadyComputed || Recompute) { + Info.releaseMemory(); Info.setCSEConfig(std::move(CSEOpt)); Info.analyze(*MF); AlreadyComputed = true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 51a74793f029..88173dc4d302 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -129,7 +129,7 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps, if (DstOps.size() == 1) { const DstOp &Op = DstOps[0]; if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg) - return buildCopy(Op.getReg(), MIB->getOperand(0).getReg()); + return buildCopy(Op.getReg(), MIB.getReg(0)); } return MIB; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 4c2dbdd905f3..a7146515c4c9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "call-lowering" @@ -29,48 +30,50 @@ using namespace llvm; void CallLowering::anchor() {} -bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, +bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, ArrayRef<Register> ResRegs, ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg, std::function<unsigned()> GetCalleeReg) const { CallLoweringInfo Info; - auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout(); + const DataLayout &DL = MIRBuilder.getDataLayout(); // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that // we'll pass to the assigner function. unsigned i = 0; - unsigned NumFixedArgs = CS.getFunctionType()->getNumParams(); - for (auto &Arg : CS.args()) { + unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); + for (auto &Arg : CB.args()) { ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; - setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); + setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); Info.OrigArgs.push_back(OrigArg); ++i; } - if (const Function *F = CS.getCalledFunction()) + // Try looking through a bitcast from one function type to another. + // Commonly happens with calls to objc_msgSend(). + const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts(); + if (const Function *F = dyn_cast<Function>(CalleeV)) Info.Callee = MachineOperand::CreateGA(F, 0); else Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - Info.OrigRet = ArgInfo{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; + Info.OrigRet = ArgInfo{ResRegs, CB.getType(), ISD::ArgFlagsTy{}}; if (!Info.OrigRet.Ty->isVoidTy()) - setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CS); + setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); - Info.KnownCallees = - CS.getInstruction()->getMetadata(LLVMContext::MD_callees); - Info.CallConv = CS.getCallingConv(); + MachineFunction &MF = MIRBuilder.getMF(); + Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); + Info.CallConv = CB.getCallingConv(); Info.SwiftErrorVReg = SwiftErrorVReg; - Info.IsMustTailCall = CS.isMustTailCall(); - Info.IsTailCall = CS.isTailCall() && - isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()) && - (MIRBuilder.getMF() - .getFunction() - .getFnAttribute("disable-tail-calls") - .getValueAsString() != "true"); - Info.IsVarArg = CS.getFunctionType()->isVarArg(); + Info.IsMustTailCall = CB.isMustTailCall(); + Info.IsTailCall = + CB.isTailCall() && isInTailCallPosition(CB, MF.getTarget()) && + (MF.getFunction() + .getFnAttribute("disable-tail-calls") + .getValueAsString() != "true"); + Info.IsVarArg = CB.getFunctionType()->isVarArg(); return lowerCall(MIRBuilder, Info); } @@ -94,10 +97,12 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, Flags.setSwiftError(); if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) Flags.setByVal(); + if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated)) + Flags.setPreallocated(); if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) Flags.setInAlloca(); - if (Flags.isByVal() || Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType(); auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); @@ -105,16 +110,16 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. - unsigned FrameAlign; - if (FuncInfo.getParamAlignment(OpIdx - 2)) - FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); + Align FrameAlign; + if (auto ParamAlign = FuncInfo.getParamAlign(OpIdx - 2)) + FrameAlign = *ParamAlign; else - FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(Align(FrameAlign)); + FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); + Flags.setByValAlign(FrameAlign); } if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) Flags.setNest(); - Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); + Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); } template void @@ -123,9 +128,9 @@ CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx, const Function &FuncInfo) const; template void -CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx, +CallLowering::setArgFlags<CallBase>(CallLowering::ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, - const CallInst &FuncInfo) const; + const CallBase &FuncInfo) const; Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy, MachineIRBuilder &MIRBuilder) const { @@ -157,7 +162,7 @@ void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg, MachineIRBuilder &MIRBuilder) const { assert(DstRegs.size() > 1 && "Nothing to unpack"); - const DataLayout &DL = MIRBuilder.getMF().getDataLayout(); + const DataLayout &DL = MIRBuilder.getDataLayout(); SmallVector<LLT, 8> LLTs; SmallVector<uint64_t, 8> Offsets; @@ -189,11 +194,11 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { - MVT CurVT = MVT::getVT(Args[i].Ty); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], - Args[i].Flags[0], CCInfo)) { - if (!CurVT.isValid()) - return false; + EVT CurVT = EVT::getEVT(Args[i].Ty); + if (!CurVT.isSimple() || + Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(), + CCValAssign::Full, Args[i], Args[i].Flags[0], + CCInfo)) { MVT NewVT = TLI->getRegisterTypeForCallingConv( F.getContext(), F.getCallingConv(), EVT(CurVT)); @@ -239,7 +244,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, if (Part == 0) { Flags.setSplit(); } else { - Flags.setOrigAlign(Align::None()); + Flags.setOrigAlign(Align(1)); if (Part == NumParts - 1) Flags.setSplitEnd(); } @@ -272,7 +277,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, if (PartIdx == 0) { Flags.setSplit(); } else { - Flags.setOrigAlign(Align::None()); + Flags.setOrigAlign(Align(1)); if (PartIdx == NumParts - 1) Flags.setSplitEnd(); } @@ -293,15 +298,21 @@ bool CallLowering::handleAssignments(CCState &CCInfo, assert(VA.getValNo() == i && "Location doesn't correspond to current arg"); if (VA.needsCustom()) { - j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j)); + unsigned NumArgRegs = + Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j)); + if (!NumArgRegs) + return false; + j += NumArgRegs; continue; } // FIXME: Pack registers if we have more than one. Register ArgReg = Args[i].Regs[0]; - MVT OrigVT = MVT::getVT(Args[i].Ty); - MVT VAVT = VA.getValVT(); + EVT OrigVT = EVT::getEVT(Args[i].Ty); + EVT VAVT = VA.getValVT(); + const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); + if (VA.isRegLoc()) { if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) { if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) { @@ -323,7 +334,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); continue; } - const LLT VATy(VAVT); + const LLT VATy(VAVT.getSimpleVT()); Register NewReg = MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); @@ -331,7 +342,6 @@ bool CallLowering::handleAssignments(CCState &CCInfo, // or do an unmerge to get the lower block of elements. if (VATy.isVector() && VATy.getNumElements() > OrigVT.getVectorNumElements()) { - const LLT OrigTy(OrigVT); // Just handle the case where the VA type is 2 * original type. if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { LLVM_DEBUG(dbgs() @@ -371,7 +381,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned Offset = VA.getLocMemOffset(); MachinePointerInfo MPO; Register StackAddr = Handler.getStackAddress(Size, Offset, MPO); - Handler.assignValueToAddress(ArgReg, StackAddr, Size, MPO, VA); + Handler.assignValueToAddress(Args[i], StackAddr, Size, MPO, VA); } else { // FIXME: Support byvals and other weirdness return false; @@ -456,10 +466,19 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info, } Register CallLowering::ValueHandler::extendRegister(Register ValReg, - CCValAssign &VA) { + CCValAssign &VA, + unsigned MaxSizeBits) { LLT LocTy{VA.getLocVT()}; - if (LocTy.getSizeInBits() == MRI.getType(ValReg).getSizeInBits()) + LLT ValTy = MRI.getType(ValReg); + if (LocTy.getSizeInBits() == ValTy.getSizeInBits()) return ValReg; + + if (LocTy.isScalar() && MaxSizeBits && MaxSizeBits < LocTy.getSizeInBits()) { + if (MaxSizeBits <= ValTy.getSizeInBits()) + return ValReg; + LocTy = LLT::scalar(MaxSizeBits); + } + switch (VA.getLocInfo()) { default: break; case CCValAssign::Full: @@ -469,7 +488,7 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg, return ValReg; case CCValAssign::AExt: { auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg); - return MIB->getOperand(0).getReg(); + return MIB.getReg(0); } case CCValAssign::SExt: { Register NewReg = MRI.createGenericVirtualRegister(LocTy); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index a103e8e4e6e0..194961ae3b21 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -9,6 +9,8 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" @@ -17,11 +19,13 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "gi-combiner" using namespace llvm; +using namespace MIPatternMatch; // Option to allow testing of the combiner while no targets know about indexed // addressing. @@ -33,9 +37,10 @@ static cl::opt<bool> CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, GISelKnownBits *KB, - MachineDominatorTree *MDT) + MachineDominatorTree *MDT, + const LegalizerInfo *LI) : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), - KB(KB), MDT(MDT) { + KB(KB), MDT(MDT), LI(LI) { (void)this->KB; } @@ -74,36 +79,7 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { return false; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - - // Give up if either DstReg or SrcReg is a physical register. - if (Register::isPhysicalRegister(DstReg) || - Register::isPhysicalRegister(SrcReg)) - return false; - - // Give up the types don't match. - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); - // Give up if one has a valid LLT, but the other doesn't. - if (DstTy.isValid() != SrcTy.isValid()) - return false; - // Give up if the types don't match. - if (DstTy.isValid() && SrcTy.isValid() && DstTy != SrcTy) - return false; - - // Get the register banks and classes. - const RegisterBank *DstBank = MRI.getRegBankOrNull(DstReg); - const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg); - const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg); - const TargetRegisterClass *SrcRC = MRI.getRegClassOrNull(SrcReg); - - // Replace if the register constraints match. - if ((SrcRC == DstRC) && (SrcBank == DstBank)) - return true; - // Replace if DstReg has no constraints. - if (!DstBank && !DstRC) - return true; - - return false; + return canReplaceReg(DstReg, SrcReg, MRI); } void CombinerHelper::applyCombineCopy(MachineInstr &MI) { Register DstReg = MI.getOperand(0).getReg(); @@ -294,7 +270,7 @@ namespace { /// Select a preference between two uses. CurrentUse is the current preference /// while *ForCandidate is attributes of the candidate under consideration. PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, - const LLT &TyForCandidate, + const LLT TyForCandidate, unsigned OpcodeForCandidate, MachineInstr *MIForCandidate) { if (!CurrentUse.Ty.isValid()) { @@ -428,10 +404,23 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; Preferred = {LLT(), PreferredOpcode, nullptr}; - for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) { + for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) { if (UseMI.getOpcode() == TargetOpcode::G_SEXT || UseMI.getOpcode() == TargetOpcode::G_ZEXT || - UseMI.getOpcode() == TargetOpcode::G_ANYEXT) { + (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { + // Check for legality. + if (LI) { + LegalityQuery::MemDesc MMDesc; + const auto &MMO = **MI.memoperands_begin(); + MMDesc.SizeInBits = MMO.getSizeInBits(); + MMDesc.AlignInBits = MMO.getAlign().value() * 8; + MMDesc.Ordering = MMO.getOrdering(); + LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action != + LegalizeActions::Legal) + continue; + } Preferred = ChoosePreferredUse(Preferred, MRI.getType(UseMI.getOperand(0).getReg()), UseMI.getOpcode(), &UseMI); @@ -498,7 +487,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { Register UseDstReg = UseMI->getOperand(0).getReg(); MachineOperand &UseSrcMO = UseMI->getOperand(1); - const LLT &UseDstTy = MRI.getType(UseDstReg); + const LLT UseDstTy = MRI.getType(UseDstReg); if (UseDstReg != ChosenDstReg) { if (Preferred.Ty == UseDstTy) { // If the use has the same type as the preferred use, then merge @@ -559,7 +548,10 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, Observer.changedInstr(MI); } -bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) { +bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, + const MachineInstr &UseMI) { + assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && + "shouldn't consider debug uses"); assert(DefMI.getParent() == UseMI.getParent()); if (&DefMI == &UseMI) return false; @@ -572,7 +564,10 @@ bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) { llvm_unreachable("Block must contain instructions"); } -bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) { +bool CombinerHelper::dominates(const MachineInstr &DefMI, + const MachineInstr &UseMI) { + assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && + "shouldn't consider debug uses"); if (MDT) return MDT->dominates(&DefMI, &UseMI); else if (DefMI.getParent() != UseMI.getParent()) @@ -581,6 +576,24 @@ bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) { return isPredecessor(DefMI, UseMI); } +bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register SrcReg = MI.getOperand(1).getReg(); + unsigned SrcSignBits = KB->computeNumSignBits(SrcReg); + unsigned NumSextBits = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() - + MI.getOperand(2).getImm(); + return SrcSignBits >= NumSextBits; +} + +bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + MachineIRBuilder MIB(MI); + MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, Register &Offset) { auto &MF = *MI.getParent()->getParent(); @@ -599,7 +612,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); - for (auto &Use : MRI.use_instructions(Base)) { + for (auto &Use : MRI.use_nodbg_instructions(Base)) { if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) continue; @@ -626,7 +639,8 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, // forming an indexed one. bool MemOpDominatesAddrUses = true; - for (auto &PtrAddUse : MRI.use_instructions(Use.getOperand(0).getReg())) { + for (auto &PtrAddUse : + MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) { if (!dominates(MI, PtrAddUse)) { MemOpDominatesAddrUses = false; break; @@ -661,7 +675,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, Addr = MI.getOperand(1).getReg(); MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI); - if (!AddrDef || MRI.hasOneUse(Addr)) + if (!AddrDef || MRI.hasOneNonDBGUse(Addr)) return false; Base = AddrDef->getOperand(1).getReg(); @@ -699,7 +713,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, // FIXME: check whether all uses of the base pointer are constant PtrAdds. // That might allow us to end base's liveness here by adjusting the constant. - for (auto &UseMI : MRI.use_instructions(Addr)) { + for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) { if (!dominates(MI, UseMI)) { LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); return false; @@ -811,7 +825,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP || - !MRI.hasOneUse(CmpMI->getOperand(0).getReg())) + !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg())) return false; return true; } @@ -854,38 +868,32 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { // Returns a list of types to use for memory op lowering in MemOps. A partial // port of findOptimalMemOpLowering in TargetLowering. -static bool findGISelOptimalMemOpLowering( - std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - bool AllowOverlap, unsigned DstAS, unsigned SrcAS, - const AttributeList &FuncAttributes, const TargetLowering &TLI) { - // If 'SrcAlign' is zero, that means the memory operation does not need to - // load the value, i.e. memset or memcpy from constant string. Otherwise, - // it's the inferred alignment of the source. 'DstAlign', on the other hand, - // is the specified alignment of the memory operation. If it is zero, that - // means it's possible to change the alignment of the destination. - // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does - // not need to be loaded. - if (SrcAlign != 0 && SrcAlign < DstAlign) +static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, + unsigned Limit, const MemOp &Op, + unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes, + const TargetLowering &TLI) { + if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) return false; - LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset, - ZeroMemset, MemcpyStrSrc, FuncAttributes); + LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); if (Ty == LLT()) { // Use the largest scalar type whose alignment constraints are satisfied. // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). Ty = LLT::scalar(64); - while (DstAlign && DstAlign < Ty.getSizeInBytes() && - !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign)) - Ty = LLT::scalar(Ty.getSizeInBytes()); + if (Op.isFixedDstAlign()) + while (Op.getDstAlign() < Ty.getSizeInBytes() && + !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) + Ty = LLT::scalar(Ty.getSizeInBytes()); assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); // FIXME: check for the largest legal type we can load/store to. } unsigned NumMemOps = 0; - while (Size != 0) { + uint64_t Size = Op.size(); + while (Size) { unsigned TySize = Ty.getSizeInBytes(); while (TySize > Size) { // For now, only use non-vector load / store's for the left-over pieces. @@ -903,9 +911,10 @@ static bool findGISelOptimalMemOpLowering( bool Fast; // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). MVT VT = getMVTForLLT(Ty); - if (NumMemOps && AllowOverlap && NewTySize < Size && + if (NumMemOps && Op.allowOverlap() && NewTySize < Size && TLI.allowsMisalignedMemoryAccesses( - VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) && + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, + MachineMemOperand::MONone, &Fast) && Fast) TySize = Size; else { @@ -926,8 +935,8 @@ static bool findGISelOptimalMemOpLowering( static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { if (Ty.isVector()) - return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), - Ty.getNumElements()); + return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), + Ty.getNumElements()); return IntegerType::get(C, Ty.getSizeInBits()); } @@ -942,12 +951,14 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { APInt SplatVal = APInt::getSplat(NumBits, Scalar); return MIB.buildConstant(Ty, SplatVal).getReg(0); } - // FIXME: for vector types create a G_BUILD_VECTOR. - if (Ty.isVector()) - return Register(); // Extend the byte value to the larger type, and then multiply by a magic // value 0x010101... in order to replicate it across every byte. + // Unless it's zero, in which case just emit a larger G_CONSTANT 0. + if (ValVRegAndVal && ValVRegAndVal->Value == 0) { + return MIB.buildConstant(Ty, 0).getReg(0); + } + LLT ExtType = Ty.getScalarType(); auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); if (NumBits > 8) { @@ -956,13 +967,16 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); } - assert(ExtType == Ty && "Vector memset value type not supported yet"); + // For vector types create a G_BUILD_VECTOR. + if (Ty.isVector()) + Val = MIB.buildSplatVector(Ty, Val).getReg(0); + return Val; } -bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val, - unsigned KnownLen, unsigned Align, - bool IsVolatile) { +bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, + Register Val, unsigned KnownLen, + Align Alignment, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); auto &DL = MF.getDataLayout(); @@ -987,24 +1001,25 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; - if (!findGISelOptimalMemOpLowering( - MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0, - /*IsMemset=*/true, - /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u, - MF.getFunction().getAttributes(), TLI)) + if (!findGISelOptimalMemOpLowering(MemOps, Limit, + MemOp::Set(KnownLen, DstAlignCanChange, + Alignment, + /*IsZeroMemset=*/IsZeroVal, + /*IsVolatile=*/IsVolatile), + DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes(), TLI)) return false; if (DstAlignCanChange) { // Get an estimate of the type from the LLT. Type *IRTy = getTypeForLLT(MemOps[0], C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); - if (NewAlign > Align) { - Align = NewAlign; + Align NewAlign = DL.getABITypeAlign(IRTy); + if (NewAlign > Alignment) { + Alignment = NewAlign; unsigned FI = FIDef->getOperand(1).getIndex(); // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI) < Align) - MFI.setObjectAlignment(FI, Align); + if (MFI.getObjectAlign(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); } } @@ -1072,10 +1087,9 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val return true; } - bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src, unsigned KnownLen, - unsigned DstAlign, unsigned SrcAlign, + Align DstAlign, Align SrcAlign, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); @@ -1087,7 +1101,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, bool DstAlignCanChange = false; MachineFrameInfo &MFI = MF.getFrameInfo(); bool OptSize = shouldLowerMemFuncForSize(MF); - unsigned Alignment = MinAlign(DstAlign, SrcAlign); + Align Alignment = commonAlignment(DstAlign, SrcAlign); MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) @@ -1106,32 +1120,30 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); if (!findGISelOptimalMemOpLowering( - MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), - SrcAlign, - /*IsMemset=*/false, - /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + MemOps, Limit, + MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, + IsVolatile), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes(), TLI)) return false; if (DstAlignCanChange) { // Get an estimate of the type from the LLT. Type *IRTy = getTypeForLLT(MemOps[0], C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + Align NewAlign = DL.getABITypeAlign(IRTy); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Alignment && - DL.exceedsNaturalStackAlignment(Align(NewAlign))) - NewAlign /= 2; + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign / 2; if (NewAlign > Alignment) { Alignment = NewAlign; unsigned FI = FIDef->getOperand(1).getIndex(); // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI) < Alignment) + if (MFI.getObjectAlign(FI) < Alignment) MFI.setObjectAlignment(FI, Alignment); } } @@ -1156,7 +1168,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, // Construct MMOs for the accesses. auto *LoadMMO = MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); - auto *StoreMMO = + auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); // Create the load. @@ -1182,9 +1194,9 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, } bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, - Register Src, unsigned KnownLen, - unsigned DstAlign, unsigned SrcAlign, - bool IsVolatile) { + Register Src, unsigned KnownLen, + Align DstAlign, Align SrcAlign, + bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); auto &DL = MF.getDataLayout(); @@ -1195,7 +1207,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, bool DstAlignCanChange = false; MachineFrameInfo &MFI = MF.getFrameInfo(); bool OptSize = shouldLowerMemFuncForSize(MF); - unsigned Alignment = MinAlign(DstAlign, SrcAlign); + Align Alignment = commonAlignment(DstAlign, SrcAlign); MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) @@ -1213,32 +1225,30 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, // to a bug in it's findOptimalMemOpLowering implementation. For now do the // same thing here. if (!findGISelOptimalMemOpLowering( - MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), - SrcAlign, - /*IsMemset=*/false, - /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + MemOps, Limit, + MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, + /*IsVolatile*/ true), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes(), TLI)) return false; if (DstAlignCanChange) { // Get an estimate of the type from the LLT. Type *IRTy = getTypeForLLT(MemOps[0], C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + Align NewAlign = DL.getABITypeAlign(IRTy); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Alignment && - DL.exceedsNaturalStackAlignment(Align(NewAlign))) - NewAlign /= 2; + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign / 2; if (NewAlign > Alignment) { Alignment = NewAlign; unsigned FI = FIDef->getOperand(1).getIndex(); // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI) < Alignment) + if (MFI.getObjectAlign(FI) < Alignment) MFI.setObjectAlignment(FI, Alignment); } } @@ -1304,8 +1314,8 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { if (IsVolatile) return false; - unsigned DstAlign = MemOp->getBaseAlignment(); - unsigned SrcAlign = 0; + Align DstAlign = MemOp->getBaseAlign(); + Align SrcAlign; Register Dst = MI.getOperand(1).getReg(); Register Src = MI.getOperand(2).getReg(); Register Len = MI.getOperand(3).getReg(); @@ -1313,7 +1323,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { if (ID != Intrinsic::memset) { assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); MemOp = *(++MMOIt); - SrcAlign = MemOp->getBaseAlignment(); + SrcAlign = MemOp->getBaseAlign(); } // See if this is a constant length copy @@ -1385,6 +1395,338 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, return true; } +bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, + unsigned &ShiftVal) { + assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); + auto MaybeImmVal = + getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value)) + return false; + ShiftVal = Log2_64(MaybeImmVal->Value); + return true; +} + +bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, + unsigned &ShiftVal) { + assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); + MachineIRBuilder MIB(MI); + LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg()); + auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal); + Observer.changingInstr(MI); + MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL)); + MI.getOperand(2).setReg(ShiftCst.getReg(0)); + Observer.changedInstr(MI); + return true; +} + +bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, + unsigned TargetShiftSize, + unsigned &ShiftVal) { + assert((MI.getOpcode() == TargetOpcode::G_SHL || + MI.getOpcode() == TargetOpcode::G_LSHR || + MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift"); + + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (Ty.isVector()) // TODO: + return false; + + // Don't narrow further than the requested size. + unsigned Size = Ty.getSizeInBits(); + if (Size <= TargetShiftSize) + return false; + + auto MaybeImmVal = + getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (!MaybeImmVal) + return false; + + ShiftVal = MaybeImmVal->Value; + return ShiftVal >= Size / 2 && ShiftVal < Size; +} + +bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, + const unsigned &ShiftVal) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(SrcReg); + unsigned Size = Ty.getSizeInBits(); + unsigned HalfSize = Size / 2; + assert(ShiftVal >= HalfSize); + + LLT HalfTy = LLT::scalar(HalfSize); + + Builder.setInstr(MI); + auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg); + unsigned NarrowShiftAmt = ShiftVal - HalfSize; + + if (MI.getOpcode() == TargetOpcode::G_LSHR) { + Register Narrowed = Unmerge.getReg(1); + + // dst = G_LSHR s64:x, C for C >= 32 + // => + // lo, hi = G_UNMERGE_VALUES x + // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0 + + if (NarrowShiftAmt != 0) { + Narrowed = Builder.buildLShr(HalfTy, Narrowed, + Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); + } + + auto Zero = Builder.buildConstant(HalfTy, 0); + Builder.buildMerge(DstReg, { Narrowed, Zero }); + } else if (MI.getOpcode() == TargetOpcode::G_SHL) { + Register Narrowed = Unmerge.getReg(0); + // dst = G_SHL s64:x, C for C >= 32 + // => + // lo, hi = G_UNMERGE_VALUES x + // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32) + if (NarrowShiftAmt != 0) { + Narrowed = Builder.buildShl(HalfTy, Narrowed, + Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); + } + + auto Zero = Builder.buildConstant(HalfTy, 0); + Builder.buildMerge(DstReg, { Zero, Narrowed }); + } else { + assert(MI.getOpcode() == TargetOpcode::G_ASHR); + auto Hi = Builder.buildAShr( + HalfTy, Unmerge.getReg(1), + Builder.buildConstant(HalfTy, HalfSize - 1)); + + if (ShiftVal == HalfSize) { + // (G_ASHR i64:x, 32) -> + // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31) + Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi }); + } else if (ShiftVal == Size - 1) { + // Don't need a second shift. + // (G_ASHR i64:x, 63) -> + // %narrowed = (G_ASHR hi_32(x), 31) + // G_MERGE_VALUES %narrowed, %narrowed + Builder.buildMerge(DstReg, { Hi, Hi }); + } else { + auto Lo = Builder.buildAShr( + HalfTy, Unmerge.getReg(1), + Builder.buildConstant(HalfTy, ShiftVal - HalfSize)); + + // (G_ASHR i64:x, C) ->, for C >= 32 + // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31) + Builder.buildMerge(DstReg, { Lo, Hi }); + } + } + + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, + unsigned TargetShiftAmount) { + unsigned ShiftAmt; + if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) { + applyCombineShiftToUnmerge(MI, ShiftAmt); + return true; + } + + return false; +} + +bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { + return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { + return MO.isReg() && + getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); + }); +} + +bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) { + return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) { + return !MO.isReg() || + getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); + }); +} + +bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); + return all_of(Mask, [](int Elt) { return Elt < 0; }); +} + +bool CombinerHelper::matchUndefStore(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_STORE); + return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(), + MRI); +} + +bool CombinerHelper::eraseInst(MachineInstr &MI) { + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, + const MachineOperand &MOP2) { + if (!MOP1.isReg() || !MOP2.isReg()) + return false; + MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); + if (!I1) + return false; + MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); + if (!I2) + return false; + + // Handle a case like this: + // + // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>) + // + // Even though %0 and %1 are produced by the same instruction they are not + // the same values. + if (I1 == I2) + return MOP1.getReg() == MOP2.getReg(); + + // If we have an instruction which loads or stores, we can't guarantee that + // it is identical. + // + // For example, we may have + // + // %x1 = G_LOAD %addr (load N from @somewhere) + // ... + // call @foo + // ... + // %x2 = G_LOAD %addr (load N from @somewhere) + // ... + // %or = G_OR %x1, %x2 + // + // It's possible that @foo will modify whatever lives at the address we're + // loading from. To be safe, let's just assume that all loads and stores + // are different (unless we have something which is guaranteed to not + // change.) + if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) + return false; + + // Check for physical registers on the instructions first to avoid cases + // like this: + // + // %a = COPY $physreg + // ... + // SOMETHING implicit-def $physreg + // ... + // %b = COPY $physreg + // + // These copies are not equivalent. + if (any_of(I1->uses(), [](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isPhysical(); + })) { + // Check if we have a case like this: + // + // %a = COPY $physreg + // %b = COPY %a + // + // In this case, I1 and I2 will both be equal to %a = COPY $physreg. + // From that, we know that they must have the same value, since they must + // have come from the same COPY. + return I1->isIdenticalTo(*I2); + } + + // We don't have any physical registers, so we don't necessarily need the + // same vreg defs. + // + // On the off-chance that there's some target instruction feeding into the + // instruction, let's use produceSameValue instead of isIdenticalTo. + return Builder.getTII().produceSameValue(*I1, *I2, &MRI); +} + +bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { + if (!MOP.isReg()) + return false; + // MIPatternMatch doesn't let us look through G_ZEXT etc. + auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI); + return ValAndVReg && ValAndVReg->Value == C; +} + +bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, + unsigned OpIdx) { + assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); + Register OldReg = MI.getOperand(0).getReg(); + Register Replacement = MI.getOperand(OpIdx).getReg(); + assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); + MI.eraseFromParent(); + replaceRegWith(MRI, OldReg, Replacement); + return true; +} + +bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + // Match (cond ? x : x) + return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && + canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(), + MRI); +} + +bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) { + return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) && + canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), + MRI); +} + +bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { + return matchConstantOp(MI.getOperand(OpIdx), 0) && + canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(), + MRI); +} + +bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { + assert(MI.getNumDefs() == 1 && "Expected only one def?"); + Builder.setInstr(MI); + Builder.buildFConstant(MI.getOperand(0), C); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { + assert(MI.getNumDefs() == 1 && "Expected only one def?"); + Builder.setInstr(MI); + Builder.buildConstant(MI.getOperand(0), C); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { + assert(MI.getNumDefs() == 1 && "Expected only one def?"); + Builder.setInstr(MI); + Builder.buildUndef(MI.getOperand(0)); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchSimplifyAddToSub( + MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + Register &NewLHS = std::get<0>(MatchInfo); + Register &NewRHS = std::get<1>(MatchInfo); + + // Helper lambda to check for opportunities for + // ((0-A) + B) -> B - A + // (A + (0-B)) -> A - B + auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { + int64_t Cst; + if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) || + Cst != 0) + return false; + NewLHS = MaybeNewLHS; + return true; + }; + + return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); +} + +bool CombinerHelper::applySimplifyAddToSub( + MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { + Builder.setInstr(MI); + Register SubLHS, SubRHS; + std::tie(SubLHS, SubRHS) = MatchInfo; + Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp index 62b903c30b89..bdaa6378e901 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp @@ -38,3 +38,11 @@ RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF, } RAIIDelegateInstaller::~RAIIDelegateInstaller() { MF.resetDelegate(Delegate); } + +RAIIMFObserverInstaller::RAIIMFObserverInstaller(MachineFunction &MF, + GISelChangeObserver &Observer) + : MF(MF) { + MF.setObserver(&Observer); +} + +RAIIMFObserverInstaller::~RAIIMFObserverInstaller() { MF.setObserver(nullptr); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 64023ecfad82..0e9c6e4fab9f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -11,6 +11,7 @@ // //===------------------ #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,54 +25,50 @@ using namespace llvm; char llvm::GISelKnownBitsAnalysis::ID = 0; -INITIALIZE_PASS_BEGIN(GISelKnownBitsAnalysis, DEBUG_TYPE, - "Analysis for ComputingKnownBits", false, true) -INITIALIZE_PASS_END(GISelKnownBitsAnalysis, DEBUG_TYPE, - "Analysis for ComputingKnownBits", false, true) +INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE, + "Analysis for ComputingKnownBits", false, true) -GISelKnownBits::GISelKnownBits(MachineFunction &MF) +GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth) : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), - DL(MF.getFunction().getParent()->getDataLayout()) {} + DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {} -Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset, - const MachineFunction &MF) { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset); - // TODO: How to handle cases with Base + Offset? -} - -MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) { - if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) { - int FrameIdx = MI.getOperand(1).getIndex(); - return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF()); +Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { + const MachineInstr *MI = MRI.getVRegDef(R); + switch (MI->getOpcode()) { + case TargetOpcode::COPY: + return computeKnownAlignment(MI->getOperand(1).getReg(), Depth); + case TargetOpcode::G_FRAME_INDEX: { + int FrameIdx = MI->getOperand(1).getIndex(); + return MF.getFrameInfo().getObjectAlign(FrameIdx); + } + case TargetOpcode::G_INTRINSIC: + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + default: + return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1); } - return None; -} - -void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known, - const APInt &DemandedElts, - unsigned Depth) { - const MachineInstr &MI = *MRI.getVRegDef(R); - computeKnownBitsForAlignment(Known, inferPtrAlignment(MI)); -} - -void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known, - MaybeAlign Alignment) { - if (Alignment) - // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2(Alignment)); } KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { + assert(MI.getNumExplicitDefs() == 1 && + "expected single return generic instruction"); return getKnownBits(MI.getOperand(0).getReg()); } KnownBits GISelKnownBits::getKnownBits(Register R) { - KnownBits Known; - LLT Ty = MRI.getType(R); + const LLT Ty = MRI.getType(R); APInt DemandedElts = Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1); + return getKnownBits(R, DemandedElts); +} + +KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts, + unsigned Depth) { + // For now, we only maintain the cache during one request. + assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared"); + + KnownBits Known; computeKnownBitsImpl(R, Known, DemandedElts); + ComputeKnownBitsCache.clear(); return Known; } @@ -87,6 +84,17 @@ APInt GISelKnownBits::getKnownZeroes(Register R) { APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } +LLVM_ATTRIBUTE_UNUSED static void +dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { + dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth + << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" + << (Known.Zero | Known.One).toString(16, false) << "\n" + << "[" << Depth << "] Zero: 0x" << Known.Zero.toString(16, false) + << "\n" + << "[" << Depth << "] One: 0x" << Known.One.toString(16, false) + << "\n"; +} + void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth) { @@ -104,12 +112,28 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, } unsigned BitWidth = DstTy.getSizeInBits(); + auto CacheEntry = ComputeKnownBitsCache.find(R); + if (CacheEntry != ComputeKnownBitsCache.end()) { + Known = CacheEntry->second; + LLVM_DEBUG(dbgs() << "Cache hit at "); + LLVM_DEBUG(dumpResult(MI, Known, Depth)); + assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match"); + return; + } Known = KnownBits(BitWidth); // Don't know anything if (DstTy.isVector()) return; // TODO: Handle vectors. - if (Depth == getMaxDepth()) + // Depth may get bigger than max depth if it gets passed to a different + // GISelKnownBits object. + // This may happen when say a generic part uses a GISelKnownBits object + // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr + // which creates a new GISelKnownBits object with a different and smaller + // depth. If we just check for equality, we would never exit if the depth + // that is passed down to the target specific GISelKnownBits object is + // already bigger than its max depth. + if (Depth >= getMaxDepth()) return; if (!DemandedElts) @@ -122,20 +146,53 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, Depth); break; - case TargetOpcode::COPY: { - MachineOperand Dst = MI.getOperand(0); - MachineOperand Src = MI.getOperand(1); - // Look through trivial copies but don't look through trivial copies of the - // form `%1:(s32) = OP %0:gpr32` known-bits analysis is currently unable to - // determine the bit width of a register class. - // - // We can't use NoSubRegister by name as it's defined by each target but - // it's always defined to be 0 by tablegen. - if (Dst.getSubReg() == 0 /*NoSubRegister*/ && Src.getReg().isVirtual() && - Src.getSubReg() == 0 /*NoSubRegister*/ && - MRI.getType(Src.getReg()).isValid()) { - // Don't increment Depth for this one since we didn't do any work. - computeKnownBitsImpl(Src.getReg(), Known, DemandedElts, Depth); + case TargetOpcode::COPY: + case TargetOpcode::G_PHI: + case TargetOpcode::PHI: { + Known.One = APInt::getAllOnesValue(BitWidth); + Known.Zero = APInt::getAllOnesValue(BitWidth); + // Destination registers should not have subregisters at this + // point of the pipeline, otherwise the main live-range will be + // defined more than once, which is against SSA. + assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?"); + // Record in the cache that we know nothing for MI. + // This will get updated later and in the meantime, if we reach that + // phi again, because of a loop, we will cut the search thanks to this + // cache entry. + // We could actually build up more information on the phi by not cutting + // the search, but that additional information is more a side effect + // than an intended choice. + // Therefore, for now, save on compile time until we derive a proper way + // to derive known bits for PHIs within loops. + ComputeKnownBitsCache[R] = KnownBits(BitWidth); + // PHI's operand are a mix of registers and basic blocks interleaved. + // We only care about the register ones. + for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { + const MachineOperand &Src = MI.getOperand(Idx); + Register SrcReg = Src.getReg(); + // Look through trivial copies and phis but don't look through trivial + // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits + // analysis is currently unable to determine the bit width of a + // register class. + // + // We can't use NoSubRegister by name as it's defined by each target but + // it's always defined to be 0 by tablegen. + if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && + MRI.getType(SrcReg).isValid()) { + // For COPYs we don't do anything, don't increase the depth. + computeKnownBitsImpl(SrcReg, Known2, DemandedElts, + Depth + (Opcode != TargetOpcode::COPY)); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + // If we reach a point where we don't know anything + // just stop looking through the operands. + if (Known.One == 0 && Known.Zero == 0) + break; + } else { + // We know nothing. + Known = KnownBits(BitWidth); + break; + } } break; } @@ -148,22 +205,17 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, break; } case TargetOpcode::G_FRAME_INDEX: { - computeKnownBitsForFrameIndex(R, Known, DemandedElts); + int FrameIdx = MI.getOperand(1).getIndex(); + TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF); break; } case TargetOpcode::G_SUB: { - // If low bits are known to be zero in both operands, then we know they are - // going to be 0 in the result. Both addition and complement operations - // preserve the low zero bits. - computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); - unsigned KnownZeroLow = Known2.countMinTrailingZeros(); - if (KnownZeroLow == 0) - break; computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, Depth + 1); - KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); - Known.Zero.setLowBits(KnownZeroLow); + Known = KnownBits::computeForAddSub(/*Add*/ false, /*NSW*/ false, Known, + Known2); break; } case TargetOpcode::G_XOR: { @@ -172,11 +224,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, Depth + 1); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); - Known.Zero = KnownZeroOut; + Known ^= Known2; break; } case TargetOpcode::G_PTR_ADD: { @@ -187,24 +235,12 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, LLVM_FALLTHROUGH; } case TargetOpcode::G_ADD: { - // Output known-0 bits are known if clear or set in both the low clear bits - // common to both LHS & RHS. For example, 8+(X<<3) is known to have the - // low 3 bits clear. - // Output known-0 bits are also known if the top bits of each input are - // known to be clear. For example, if one input has the top 10 bits clear - // and the other has the top 8 bits clear, we know the top 7 bits of the - // output must be clear. - computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); - unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); - unsigned KnownZeroLow = Known2.countMinTrailingZeros(); computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, Depth + 1); - KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); - KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); - Known.Zero.setLowBits(KnownZeroLow); - if (KnownZeroHigh > 1) - Known.Zero.setHighBits(KnownZeroHigh - 1); + Known = + KnownBits::computeForAddSub(/*Add*/ true, /*NSW*/ false, Known, Known2); break; } case TargetOpcode::G_AND: { @@ -214,10 +250,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, Depth + 1); - // Output known-1 bits are only known if set in both the LHS & RHS. - Known.One &= Known2.One; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - Known.Zero |= Known2.Zero; + Known &= Known2; break; } case TargetOpcode::G_OR: { @@ -227,10 +260,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, Depth + 1); - // Output known-0 bits are only known if clear in both the LHS & RHS. - Known.Zero &= Known2.Zero; - // Output known-1 are known to be set if set in either the LHS | RHS. - Known.One |= Known2.One; + Known |= Known2; break; } case TargetOpcode::G_MUL: { @@ -287,7 +317,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, case TargetOpcode::G_ANYEXT: { computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); - Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zext(BitWidth); break; } case TargetOpcode::G_LOAD: { @@ -353,9 +383,9 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, ? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) : SrcTy.getSizeInBits(); assert(SrcBitWidth && "SrcBitWidth can't be zero"); - Known = Known.zextOrTrunc(SrcBitWidth, true); + Known = Known.zextOrTrunc(SrcBitWidth); computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); - Known = Known.zextOrTrunc(BitWidth, true); + Known = Known.zextOrTrunc(BitWidth); if (BitWidth > SrcBitWidth) Known.Zero.setBitsFrom(SrcBitWidth); break; @@ -363,14 +393,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" - << Depth << "] Computed for: " << MI << "[" << Depth - << "] Known: 0x" - << (Known.Zero | Known.One).toString(16, false) << "\n" - << "[" << Depth << "] Zero: 0x" - << Known.Zero.toString(16, false) << "\n" - << "[" << Depth << "] One: 0x" - << Known.One.toString(16, false) << "\n"); + LLVM_DEBUG(dumpResult(MI, Known, Depth)); + + // Update the cache. + ComputeKnownBitsCache[R] = Known; } unsigned GISelKnownBits::computeNumSignBits(Register R, @@ -389,6 +415,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, return 1; // No demanded elts, better to assume we don't know anything. LLT DstTy = MRI.getType(R); + const unsigned TyBits = DstTy.getScalarSizeInBits(); // Handle the case where this is called on a register that does not have a // type constraint. This is unlikely to occur except by looking through copies @@ -397,6 +424,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, if (!DstTy.isValid()) return 1; + unsigned FirstAnswer = 1; switch (Opcode) { case TargetOpcode::COPY: { MachineOperand &Src = MI.getOperand(1); @@ -414,6 +442,16 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; } + case TargetOpcode::G_SEXTLOAD: { + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + // TODO: add vector support + if (Ty.isVector()) + break; + if (MI.hasOneMemOperand()) + return Ty.getSizeInBits() - (*MI.memoperands_begin())->getSizeInBits(); + break; + } case TargetOpcode::G_TRUNC: { Register Src = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(Src); @@ -426,13 +464,34 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, return NumSrcSignBits - (NumSrcBits - DstTyBits); break; } - default: + case TargetOpcode::G_INTRINSIC: + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + default: { + unsigned NumBits = + TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth); + if (NumBits > 1) + FirstAnswer = std::max(FirstAnswer, NumBits); break; } + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + KnownBits Known = getKnownBits(R, DemandedElts, Depth); + APInt Mask; + if (Known.isNonNegative()) { // sign bit is 0 + Mask = Known.Zero; + } else if (Known.isNegative()) { // sign bit is 1; + Mask = Known.One; + } else { + // Nothing known. + return FirstAnswer; + } - // TODO: Handle target instructions - // TODO: Fall back to known bits - return 1; + // Okay, we know that the sign bit in Mask is set. Use CLO to determine + // the number of identical bits in the top of the input value. + Mask <<= Mask.getBitWidth() - TyBits; + return std::max(FirstAnswer, Mask.countLeadingOnes()); } unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 96e794b15a44..8f6643b2f193 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -16,12 +16,13 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -47,7 +48,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -232,46 +232,35 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { // Always allocate at least one byte. Size = std::max<uint64_t>(Size, 1u); - unsigned Alignment = AI.getAlignment(); - if (!Alignment) - Alignment = DL->getABITypeAlignment(AI.getAllocatedType()); - int &FI = FrameIndices[&AI]; - FI = MF->getFrameInfo().CreateStackObject(Size, Alignment, false, &AI); + FI = MF->getFrameInfo().CreateStackObject(Size, AI.getAlign(), false, &AI); return FI; } -unsigned IRTranslator::getMemOpAlignment(const Instruction &I) { - unsigned Alignment = 0; - Type *ValTy = nullptr; - if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) { - Alignment = SI->getAlignment(); - ValTy = SI->getValueOperand()->getType(); - } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) { - Alignment = LI->getAlignment(); - ValTy = LI->getType(); - } else if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) { +Align IRTranslator::getMemOpAlign(const Instruction &I) { + if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) + return SI->getAlign(); + if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) { + return LI->getAlign(); + } + if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) { // TODO(PR27168): This instruction has no alignment attribute, but unlike // the default alignment for load/store, the default here is to assume // it has NATURAL alignment, not DataLayout-specified alignment. const DataLayout &DL = AI->getModule()->getDataLayout(); - Alignment = DL.getTypeStoreSize(AI->getCompareOperand()->getType()); - ValTy = AI->getCompareOperand()->getType(); - } else if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) { + return Align(DL.getTypeStoreSize(AI->getCompareOperand()->getType())); + } + if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) { // TODO(PR27168): This instruction has no alignment attribute, but unlike // the default alignment for load/store, the default here is to assume // it has NATURAL alignment, not DataLayout-specified alignment. const DataLayout &DL = AI->getModule()->getDataLayout(); - Alignment = DL.getTypeStoreSize(AI->getValOperand()->getType()); - ValTy = AI->getType(); - } else { - OptimizationRemarkMissed R("gisel-irtranslator", "", &I); - R << "unable to translate memop: " << ore::NV("Opcode", &I); - reportTranslationError(*MF, *TPC, *ORE, R); - return 1; + return Align(DL.getTypeStoreSize(AI->getValOperand()->getType())); } - - return Alignment ? Alignment : DL->getABITypeAlignment(ValTy); + OptimizationRemarkMissed R("gisel-irtranslator", "", &I); + R << "unable to translate memop: " << ore::NV("Opcode", &I); + reportTranslationError(*MF, *TPC, *ORE, R); + return Align(1); } MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) { @@ -316,7 +305,7 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { Flags = MachineInstr::copyFlagsFromInstruction(I); } // Negate the last operand of the FSUB - MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op1}, Flags); + MIRBuilder.buildFNeg(Res, Op1, Flags); return true; } return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); @@ -330,7 +319,7 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { const Instruction &I = cast<Instruction>(U); Flags = MachineInstr::copyFlagsFromInstruction(I); } - MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op0}, Flags); + MIRBuilder.buildFNeg(Res, Op0, Flags); return true; } @@ -353,8 +342,8 @@ bool IRTranslator::translateCompare(const User &U, Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); else { assert(CI && "Instruction should be CmpInst"); - MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1}, - MachineInstr::copyFlagsFromInstruction(*CI)); + MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, + MachineInstr::copyFlagsFromInstruction(*CI)); } return true; @@ -603,7 +592,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, Cond = MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0); } else { - const LLT &CmpTy = MRI->getType(CmpOpReg); + const LLT CmpTy = MRI->getType(CmpOpReg); auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS); auto Diff = MIB.buildConstant(CmpTy, High - Low); Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0); @@ -631,8 +620,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, if (CB.TrueBB == CB.ThisBB->getNextNode()) { std::swap(CB.TrueBB, CB.FalseBB); auto True = MIB.buildConstant(i1Ty, 1); - Cond = MIB.buildInstr(TargetOpcode::G_XOR, {i1Ty}, {Cond, True}, None) - .getReg(0); + Cond = MIB.buildXor(i1Ty, Cond, True).getReg(0); } MIB.buildBrCond(Cond, *CB.TrueBB); @@ -842,9 +830,16 @@ bool IRTranslator::translateIndirectBr(const User &U, MIRBuilder.buildBrIndirect(Tgt); // Link successors. + SmallPtrSet<const BasicBlock *, 32> AddedSuccessors; MachineBasicBlock &CurBB = MIRBuilder.getMBB(); - for (const BasicBlock *Succ : successors(&BrInst)) + for (const BasicBlock *Succ : successors(&BrInst)) { + // It's legal for indirectbr instructions to have duplicate blocks in the + // destination list. We don't allow this in MIR. Skip anything that's + // already a successor. + if (!AddedSuccessors.insert(Succ).second) + continue; CurBB.addSuccessor(&getMBB(*Succ)); + } return true; } @@ -859,11 +854,6 @@ static bool isSwiftError(const Value *V) { bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { const LoadInst &LI = cast<LoadInst>(U); - - auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile - : MachineMemOperand::MONone; - Flags |= MachineMemOperand::MOLoad; - if (DL->getTypeStoreSize(LI.getType()) == 0) return true; @@ -882,6 +872,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { return true; } + auto &TLI = *MF->getSubtarget().getTargetLowering(); + MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL); + const MDNode *Ranges = Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { @@ -889,12 +882,12 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); - unsigned BaseAlign = getMemOpAlignment(LI); + Align BaseAlign = getMemOpAlign(LI); AAMDNodes AAMetadata; LI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( - Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, - MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges, + Ptr, Flags, MRI->getType(Regs[i]).getSizeInBytes(), + commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -904,10 +897,6 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { const StoreInst &SI = cast<StoreInst>(U); - auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile - : MachineMemOperand::MONone; - Flags |= MachineMemOperand::MOStore; - if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) return true; @@ -927,17 +916,20 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { return true; } + auto &TLI = *MF->getSubtarget().getTargetLowering(); + MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL); + for (unsigned i = 0; i < Vals.size(); ++i) { Register Addr; MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); - unsigned BaseAlign = getMemOpAlignment(SI); + Align BaseAlign = getMemOpAlign(SI); AAMDNodes AAMetadata; SI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( - Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8, - MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr, + Ptr, Flags, MRI->getType(Vals[i]).getSizeInBytes(), + commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr, SI.getSyncScopeID(), SI.getOrdering()); MIRBuilder.buildStore(Vals[i], Addr, *MMO); } @@ -1010,36 +1002,39 @@ bool IRTranslator::translateSelect(const User &U, ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); - const SelectInst &SI = cast<SelectInst>(U); uint16_t Flags = 0; - if (const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition())) - Flags = MachineInstr::copyFlagsFromInstruction(*Cmp); + if (const SelectInst *SI = dyn_cast<SelectInst>(&U)) + Flags = MachineInstr::copyFlagsFromInstruction(*SI); for (unsigned i = 0; i < ResRegs.size(); ++i) { - MIRBuilder.buildInstr(TargetOpcode::G_SELECT, {ResRegs[i]}, - {Tst, Op0Regs[i], Op1Regs[i]}, Flags); + MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags); } return true; } +bool IRTranslator::translateCopy(const User &U, const Value &V, + MachineIRBuilder &MIRBuilder) { + Register Src = getOrCreateVReg(V); + auto &Regs = *VMap.getVRegs(U); + if (Regs.empty()) { + Regs.push_back(Src); + VMap.getOffsets(U)->push_back(0); + } else { + // If we already assigned a vreg for this instruction, we can't change that. + // Emit a copy to satisfy the users we already emitted. + MIRBuilder.buildCopy(Regs[0], Src); + } + return true; +} + bool IRTranslator::translateBitCast(const User &U, MachineIRBuilder &MIRBuilder) { // If we're bitcasting to the source type, we can reuse the source vreg. if (getLLTForType(*U.getOperand(0)->getType(), *DL) == - getLLTForType(*U.getType(), *DL)) { - Register SrcReg = getOrCreateVReg(*U.getOperand(0)); - auto &Regs = *VMap.getVRegs(U); - // If we already assigned a vreg for this bitcast, we can't change that. - // Emit a copy to satisfy the users we already emitted. - if (!Regs.empty()) - MIRBuilder.buildCopy(Regs[0], SrcReg); - else { - Regs.push_back(SrcReg); - VMap.getOffsets(U)->push_back(0); - } - return true; - } + getLLTForType(*U.getType(), *DL)) + return translateCopy(U, *U.getOperand(0), MIRBuilder); + return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); } @@ -1053,10 +1048,6 @@ bool IRTranslator::translateCast(unsigned Opcode, const User &U, bool IRTranslator::translateGetElementPtr(const User &U, MachineIRBuilder &MIRBuilder) { - // FIXME: support vector GEPs. - if (U.getType()->isVectorTy()) - return false; - Value &Op0 = *U.getOperand(0); Register BaseReg = getOrCreateVReg(Op0); Type *PtrIRTy = Op0.getType(); @@ -1064,6 +1055,24 @@ bool IRTranslator::translateGetElementPtr(const User &U, Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + // Normalize Vector GEP - all scalar operands should be converted to the + // splat vector. + unsigned VectorWidth = 0; + if (auto *VT = dyn_cast<VectorType>(U.getType())) + VectorWidth = cast<FixedVectorType>(VT)->getNumElements(); + + // We might need to splat the base pointer into a vector if the offsets + // are vectors. + if (VectorWidth && !PtrTy.isVector()) { + BaseReg = + MIRBuilder.buildSplatVector(LLT::vector(VectorWidth, PtrTy), BaseReg) + .getReg(0); + PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth); + PtrTy = getLLTForType(*PtrIRTy, *DL); + OffsetIRTy = DL->getIntPtrType(PtrIRTy); + OffsetTy = getLLTForType(*OffsetIRTy, *DL); + } + int64_t Offset = 0; for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U); GTI != E; ++GTI) { @@ -1083,7 +1092,6 @@ bool IRTranslator::translateGetElementPtr(const User &U, } if (Offset != 0) { - LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0)) .getReg(0); @@ -1091,8 +1099,15 @@ bool IRTranslator::translateGetElementPtr(const User &U, } Register IdxReg = getOrCreateVReg(*Idx); - if (MRI->getType(IdxReg) != OffsetTy) + LLT IdxTy = MRI->getType(IdxReg); + if (IdxTy != OffsetTy) { + if (!IdxTy.isVector() && VectorWidth) { + IdxReg = MIRBuilder.buildSplatVector( + OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); + } + IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); + } // N = N + Idx * ElementSize; // Avoid doing it for ElementSize of 1. @@ -1101,7 +1116,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, auto ElementSizeMIB = MIRBuilder.buildConstant( getLLTForType(*OffsetIRTy, *DL), ElementSize); GepOffsetReg = - MIRBuilder.buildMul(OffsetTy, ElementSizeMIB, IdxReg).getReg(0); + MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0); } else GepOffsetReg = IdxReg; @@ -1111,7 +1126,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, if (Offset != 0) { auto OffsetMIB = - MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset); + MIRBuilder.buildConstant(OffsetTy, Offset); MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0)); return true; } @@ -1133,20 +1148,21 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) ICall.addUse(getOrCreateVReg(**AI)); - unsigned DstAlign = 0, SrcAlign = 0; + Align DstAlign; + Align SrcAlign; unsigned IsVol = cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1)) ->getZExtValue(); if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) { - DstAlign = std::max<unsigned>(MCI->getDestAlignment(), 1); - SrcAlign = std::max<unsigned>(MCI->getSourceAlignment(), 1); + DstAlign = MCI->getDestAlign().valueOrOne(); + SrcAlign = MCI->getSourceAlign().valueOrOne(); } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) { - DstAlign = std::max<unsigned>(MMI->getDestAlignment(), 1); - SrcAlign = std::max<unsigned>(MMI->getSourceAlignment(), 1); + DstAlign = MMI->getDestAlign().valueOrOne(); + SrcAlign = MMI->getSourceAlign().valueOrOne(); } else { auto *MSI = cast<MemSetInst>(&CI); - DstAlign = std::max<unsigned>(MSI->getDestAlignment(), 1); + DstAlign = MSI->getDestAlign().valueOrOne(); } // We need to propagate the tail call flag from the IR inst as an argument. @@ -1171,8 +1187,8 @@ void IRTranslator::getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF)); - auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD); - MIB.addDef(DstReg); + auto MIB = + MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {}); auto &TLI = *MF->getSubtarget().getTargetLowering(); Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent()); @@ -1184,18 +1200,16 @@ void IRTranslator::getStackGuard(Register DstReg, MachineMemOperand::MODereferenceable; MachineMemOperand *MemRef = MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, - DL->getPointerABIAlignment(0).value()); + DL->getPointerABIAlignment(0)); MIB.setMemRefs({MemRef}); } bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MachineIRBuilder &MIRBuilder) { ArrayRef<Register> ResRegs = getOrCreateVRegs(CI); - MIRBuilder.buildInstr(Op) - .addDef(ResRegs[0]) - .addDef(ResRegs[1]) - .addUse(getOrCreateVReg(*CI.getOperand(0))) - .addUse(getOrCreateVReg(*CI.getOperand(1))); + MIRBuilder.buildInstr( + Op, {ResRegs[0], ResRegs[1]}, + {getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))}); return true; } @@ -1206,8 +1220,12 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { break; case Intrinsic::bswap: return TargetOpcode::G_BSWAP; - case Intrinsic::bitreverse: + case Intrinsic::bitreverse: return TargetOpcode::G_BITREVERSE; + case Intrinsic::fshl: + return TargetOpcode::G_FSHL; + case Intrinsic::fshr: + return TargetOpcode::G_FSHR; case Intrinsic::ceil: return TargetOpcode::G_FCEIL; case Intrinsic::cos: @@ -1258,6 +1276,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_INTRINSIC_TRUNC; case Intrinsic::readcyclecounter: return TargetOpcode::G_READCYCLECOUNTER; + case Intrinsic::ptrmask: + return TargetOpcode::G_PTRMASK; } return Intrinsic::not_intrinsic; } @@ -1282,6 +1302,51 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI, return true; } +// TODO: Include ConstainedOps.def when all strict instructions are defined. +static unsigned getConstrainedOpcode(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::experimental_constrained_fadd: + return TargetOpcode::G_STRICT_FADD; + case Intrinsic::experimental_constrained_fsub: + return TargetOpcode::G_STRICT_FSUB; + case Intrinsic::experimental_constrained_fmul: + return TargetOpcode::G_STRICT_FMUL; + case Intrinsic::experimental_constrained_fdiv: + return TargetOpcode::G_STRICT_FDIV; + case Intrinsic::experimental_constrained_frem: + return TargetOpcode::G_STRICT_FREM; + case Intrinsic::experimental_constrained_fma: + return TargetOpcode::G_STRICT_FMA; + case Intrinsic::experimental_constrained_sqrt: + return TargetOpcode::G_STRICT_FSQRT; + default: + return 0; + } +} + +bool IRTranslator::translateConstrainedFPIntrinsic( + const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) { + fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + + unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID()); + if (!Opcode) + return false; + + unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI); + if (EB == fp::ExceptionBehavior::ebIgnore) + Flags |= MachineInstr::NoFPExcept; + + SmallVector<llvm::SrcOp, 4> VRegs; + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0))); + if (!FPI.isUnaryOp()) + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1))); + if (FPI.isTernaryOp()) + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2))); + + MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags); + return true; +} + bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { @@ -1369,10 +1434,10 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8; // FIXME: Get alignment - MIRBuilder.buildInstr(TargetOpcode::G_VASTART) - .addUse(getOrCreateVReg(*Ptr)) - .addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 1)); + MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)}) + .addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr), + MachineMemOperand::MOStore, + ListSize, Align(1))); return true; } case Intrinsic::dbg_value: { @@ -1411,6 +1476,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder); case Intrinsic::smul_with_overflow: return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder); + case Intrinsic::uadd_sat: + return translateBinaryOp(TargetOpcode::G_UADDSAT, CI, MIRBuilder); + case Intrinsic::sadd_sat: + return translateBinaryOp(TargetOpcode::G_SADDSAT, CI, MIRBuilder); + case Intrinsic::usub_sat: + return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder); + case Intrinsic::ssub_sat: + return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder); case Intrinsic::fmuladd: { const TargetMachine &TM = MF->getTarget(); const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); @@ -1423,14 +1496,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, TLI.getValueType(*DL, CI.getType()))) { // TODO: Revisit this to see if we should move this part of the // lowering to the combiner. - MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2}, - MachineInstr::copyFlagsFromInstruction(CI)); + MIRBuilder.buildFMA(Dst, Op0, Op1, Op2, + MachineInstr::copyFlagsFromInstruction(CI)); } else { LLT Ty = getLLTForType(*CI.getType(), *DL); - auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1}, - MachineInstr::copyFlagsFromInstruction(CI)); - MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2}, - MachineInstr::copyFlagsFromInstruction(CI)); + auto FMul = MIRBuilder.buildFMul( + Ty, Op0, Op1, MachineInstr::copyFlagsFromInstruction(CI)); + MIRBuilder.buildFAdd(Dst, FMul, Op2, + MachineInstr::copyFlagsFromInstruction(CI)); } return true; } @@ -1468,7 +1541,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, - PtrTy.getSizeInBits() / 8, 8)); + PtrTy.getSizeInBits() / 8, Align(8))); return true; } case Intrinsic::stacksave: { @@ -1508,9 +1581,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, : TargetOpcode::G_CTTZ_ZERO_UNDEF : Cst->isZero() ? TargetOpcode::G_CTLZ : TargetOpcode::G_CTLZ_ZERO_UNDEF; - MIRBuilder.buildInstr(Opcode) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)}, + {getOrCreateVReg(*CI.getArgOperand(0))}); return true; } case Intrinsic::invariant_start: { @@ -1526,54 +1598,63 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::sideeffect: // Discard annotate attributes, assumptions, and artificial side-effects. return true; + case Intrinsic::read_volatile_register: case Intrinsic::read_register: { Value *Arg = CI.getArgOperand(0); - MIRBuilder.buildInstr(TargetOpcode::G_READ_REGISTER) - .addDef(getOrCreateVReg(CI)) - .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata())); + MIRBuilder + .buildInstr(TargetOpcode::G_READ_REGISTER, {getOrCreateVReg(CI)}, {}) + .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata())); + return true; + } + case Intrinsic::write_register: { + Value *Arg = CI.getArgOperand(0); + MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER) + .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata())) + .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; } +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ + case Intrinsic::INTRINSIC: +#include "llvm/IR/ConstrainedOps.def" + return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI), + MIRBuilder); + } return false; } -bool IRTranslator::translateInlineAsm(const CallInst &CI, +bool IRTranslator::translateInlineAsm(const CallBase &CB, MachineIRBuilder &MIRBuilder) { - const InlineAsm &IA = cast<InlineAsm>(*CI.getCalledValue()); - if (!IA.getConstraintString().empty()) - return false; - unsigned ExtraInfo = 0; - if (IA.hasSideEffects()) - ExtraInfo |= InlineAsm::Extra_HasSideEffects; - if (IA.getDialect() == InlineAsm::AD_Intel) - ExtraInfo |= InlineAsm::Extra_AsmDialect; + const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering(); - MIRBuilder.buildInstr(TargetOpcode::INLINEASM) - .addExternalSymbol(IA.getAsmString().c_str()) - .addImm(ExtraInfo); + if (!ALI) { + LLVM_DEBUG( + dbgs() << "Inline asm lowering is not supported for this target yet\n"); + return false; + } - return true; + return ALI->lowerInlineAsm( + MIRBuilder, CB, [&](const Value &Val) { return getOrCreateVRegs(Val); }); } -bool IRTranslator::translateCallSite(const ImmutableCallSite &CS, +bool IRTranslator::translateCallBase(const CallBase &CB, MachineIRBuilder &MIRBuilder) { - const Instruction &I = *CS.getInstruction(); - ArrayRef<Register> Res = getOrCreateVRegs(I); + ArrayRef<Register> Res = getOrCreateVRegs(CB); SmallVector<ArrayRef<Register>, 8> Args; Register SwiftInVReg = 0; Register SwiftErrorVReg = 0; - for (auto &Arg : CS.args()) { + for (auto &Arg : CB.args()) { if (CLI->supportSwiftError() && isSwiftError(Arg)) { assert(SwiftInVReg == 0 && "Expected only one swift error argument"); LLT Ty = getLLTForType(*Arg->getType(), *DL); SwiftInVReg = MRI->createGenericVirtualRegister(Ty); MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( - &I, &MIRBuilder.getMBB(), Arg)); + &CB, &MIRBuilder.getMBB(), Arg)); Args.emplace_back(makeArrayRef(SwiftInVReg)); SwiftErrorVReg = - SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); + SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg); continue; } Args.push_back(getOrCreateVRegs(*Arg)); @@ -1583,8 +1664,8 @@ bool IRTranslator::translateCallSite(const ImmutableCallSite &CS, // optimize into tail calls. Instead, we defer that to selection where a final // scan is done to check if any instructions are calls. bool Success = - CLI->lowerCall(MIRBuilder, CS, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*CS.getCalledValue()); }); + CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg, + [&]() { return getOrCreateVReg(*CB.getCalledOperand()); }); // Check if we just inserted a tail call. if (Success) { @@ -1622,7 +1703,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { } if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) - return translateCallSite(&CI, MIRBuilder); + return translateCallBase(CI, MIRBuilder); assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); @@ -1670,14 +1751,12 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { - MaybeAlign Align = Info.align; - if (!Align) - Align = MaybeAlign( - DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()))); + Align Alignment = Info.align.getValueOr( + DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); uint64_t Size = Info.memVT.getStoreSize(); - MIB.addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(Info.ptrVal), Info.flags, Size, Align->value())); + MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), + Info.flags, Size, Alignment)); } return true; @@ -1691,9 +1770,8 @@ bool IRTranslator::translateInvoke(const User &U, const BasicBlock *ReturnBB = I.getSuccessor(0); const BasicBlock *EHPadBB = I.getSuccessor(1); - const Value *Callee = I.getCalledValue(); - const Function *Fn = dyn_cast<Function>(Callee); - if (isa<InlineAsm>(Callee)) + const Function *Fn = I.getCalledFunction(); + if (I.isInlineAsm()) return false; // FIXME: support invoking patchpoint and statepoint intrinsics. @@ -1717,7 +1795,7 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - if (!translateCallSite(&I, MIRBuilder)) + if (!translateCallBase(I, MIRBuilder)) return false; MCSymbol *EndSymbol = Context.createTempSymbol(); @@ -1817,12 +1895,7 @@ bool IRTranslator::translateAlloca(const User &U, return false; // Now we're in the harder dynamic case. - Type *Ty = AI.getAllocatedType(); - unsigned Align = - std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment()); - Register NumElts = getOrCreateVReg(*AI.getArraySize()); - Type *IntPtrIRTy = DL->getIntPtrType(AI.getType()); LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL); if (MRI->getType(NumElts) != IntPtrTy) { @@ -1831,29 +1904,30 @@ bool IRTranslator::translateAlloca(const User &U, NumElts = ExtElts; } + Type *Ty = AI.getAllocatedType(); + Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); Register TySize = getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty))); MIRBuilder.buildMul(AllocSize, NumElts, TySize); - unsigned StackAlign = - MF->getSubtarget().getFrameLowering()->getStackAlignment(); - if (Align <= StackAlign) - Align = 0; - // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. This doesn't overflow because we're computing // an address inside an alloca. - auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign - 1); + Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign(); + auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1); auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne, MachineInstr::NoUWrap); auto AlignCst = - MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign - 1)); + MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1)); auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst); - MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Align); + Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty)); + if (Alignment <= StackAlign) + Alignment = Align(1); + MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment); - MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI); + MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI); assert(MF->getFrameInfo().hasVarSizedObjects()); return true; } @@ -1863,10 +1937,9 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) { // we're completely discarding the i64/double distinction here (amongst // others). Fortunately the ABIs I know of where that matters don't use va_arg // anyway but that's not guaranteed. - MIRBuilder.buildInstr(TargetOpcode::G_VAARG) - .addDef(getOrCreateVReg(U)) - .addUse(getOrCreateVReg(*U.getOperand(0))) - .addImm(DL->getABITypeAlignment(U.getType())); + MIRBuilder.buildInstr(TargetOpcode::G_VAARG, {getOrCreateVReg(U)}, + {getOrCreateVReg(*U.getOperand(0)), + DL->getABITypeAlign(U.getType()).value()}); return true; } @@ -1874,17 +1947,8 @@ bool IRTranslator::translateInsertElement(const User &U, MachineIRBuilder &MIRBuilder) { // If it is a <1 x Ty> vector, use the scalar as it is // not a legal vector type in LLT. - if (U.getType()->getVectorNumElements() == 1) { - Register Elt = getOrCreateVReg(*U.getOperand(1)); - auto &Regs = *VMap.getVRegs(U); - if (Regs.empty()) { - Regs.push_back(Elt); - VMap.getOffsets(U)->push_back(0); - } else { - MIRBuilder.buildCopy(Regs[0], Elt); - } - return true; - } + if (cast<FixedVectorType>(U.getType())->getNumElements() == 1) + return translateCopy(U, *U.getOperand(1), MIRBuilder); Register Res = getOrCreateVReg(U); Register Val = getOrCreateVReg(*U.getOperand(0)); @@ -1898,17 +1962,9 @@ bool IRTranslator::translateExtractElement(const User &U, MachineIRBuilder &MIRBuilder) { // If it is a <1 x Ty> vector, use the scalar as it is // not a legal vector type in LLT. - if (U.getOperand(0)->getType()->getVectorNumElements() == 1) { - Register Elt = getOrCreateVReg(*U.getOperand(0)); - auto &Regs = *VMap.getVRegs(U); - if (Regs.empty()) { - Regs.push_back(Elt); - VMap.getOffsets(U)->push_back(0); - } else { - MIRBuilder.buildCopy(Regs[0], Elt); - } - return true; - } + if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1) + return translateCopy(U, *U.getOperand(0), MIRBuilder); + Register Res = getOrCreateVReg(U); Register Val = getOrCreateVReg(*U.getOperand(0)); const auto &TLI = *MF->getSubtarget().getTargetLowering(); @@ -1924,8 +1980,8 @@ bool IRTranslator::translateExtractElement(const User &U, if (!Idx) Idx = getOrCreateVReg(*U.getOperand(1)); if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { - const LLT &VecIdxTy = LLT::scalar(PreferredVecIdxWidth); - Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx)->getOperand(0).getReg(); + const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0); } MIRBuilder.buildExtractVectorElement(Res, Val, Idx); return true; @@ -1933,13 +1989,16 @@ bool IRTranslator::translateExtractElement(const User &U, bool IRTranslator::translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder) { - SmallVector<int, 8> Mask; - ShuffleVectorInst::getShuffleMask(cast<Constant>(U.getOperand(2)), Mask); + ArrayRef<int> Mask; + if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U)) + Mask = SVI->getShuffleMask(); + else + Mask = cast<ConstantExpr>(U).getShuffleMask(); ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask); - MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR) - .addDef(getOrCreateVReg(U)) - .addUse(getOrCreateVReg(*U.getOperand(0))) - .addUse(getOrCreateVReg(*U.getOperand(1))) + MIRBuilder + .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)}, + {getOrCreateVReg(*U.getOperand(0)), + getOrCreateVReg(*U.getOperand(1))}) .addShuffleMask(MaskAlloc); return true; } @@ -1961,12 +2020,8 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder) { const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U); - if (I.isWeak()) - return false; - - auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile - : MachineMemOperand::MONone; - Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + auto &TLI = *MF->getSubtarget().getTargetLowering(); + auto Flags = TLI.getAtomicMemOperandFlags(I, *DL); Type *ResType = I.getType(); Type *ValType = ResType->Type::getStructElementType(0); @@ -1983,21 +2038,18 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, MIRBuilder.buildAtomicCmpXchgWithSuccess( OldValRes, SuccessRes, Addr, Cmp, NewVal, - *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), - Flags, DL->getTypeStoreSize(ValType), - getMemOpAlignment(I), AAMetadata, nullptr, - I.getSyncScopeID(), I.getSuccessOrdering(), - I.getFailureOrdering())); + *MF->getMachineMemOperand( + MachinePointerInfo(I.getPointerOperand()), Flags, + DL->getTypeStoreSize(ValType), getMemOpAlign(I), AAMetadata, nullptr, + I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering())); return true; } bool IRTranslator::translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder) { const AtomicRMWInst &I = cast<AtomicRMWInst>(U); - - auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile - : MachineMemOperand::MONone; - Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + auto &TLI = *MF->getSubtarget().getTargetLowering(); + auto Flags = TLI.getAtomicMemOperandFlags(I, *DL); Type *ResType = I.getType(); @@ -2057,8 +2109,8 @@ bool IRTranslator::translateAtomicRMW(const User &U, Opcode, Res, Addr, Val, *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, DL->getTypeStoreSize(ResType), - getMemOpAlignment(I), AAMetadata, - nullptr, I.getSyncScopeID(), I.getOrdering())); + getMemOpAlign(I), AAMetadata, nullptr, + I.getSyncScopeID(), I.getOrdering())); return true; } @@ -2070,6 +2122,21 @@ bool IRTranslator::translateFence(const User &U, return true; } +bool IRTranslator::translateFreeze(const User &U, + MachineIRBuilder &MIRBuilder) { + const ArrayRef<Register> DstRegs = getOrCreateVRegs(U); + const ArrayRef<Register> SrcRegs = getOrCreateVRegs(*U.getOperand(0)); + + assert(DstRegs.size() == SrcRegs.size() && + "Freeze with different source and destination type?"); + + for (unsigned I = 0; I < DstRegs.size(); ++I) { + MIRBuilder.buildFreeze(DstRegs[I], SrcRegs[I]); + } + + return true; +} + void IRTranslator::finishPendingPhis() { #ifndef NDEBUG DILocationVerifier Verifier; @@ -2122,6 +2189,10 @@ bool IRTranslator::translate(const Instruction &Inst) { else EntryBuilder->setDebugLoc(DebugLoc()); + auto &TLI = *MF->getSubtarget().getTargetLowering(); + if (TLI.fallBackToDAGISel(Inst)) + return false; + switch (Inst.getOpcode()) { #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: \ @@ -2139,22 +2210,16 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { EntryBuilder->buildFConstant(Reg, *CF); else if (isa<UndefValue>(C)) EntryBuilder->buildUndef(Reg); - else if (isa<ConstantPointerNull>(C)) { - // As we are trying to build a constant val of 0 into a pointer, - // insert a cast to make them correct with respect to types. - unsigned NullSize = DL->getTypeSizeInBits(C.getType()); - auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize); - auto *ZeroVal = ConstantInt::get(ZeroTy, 0); - Register ZeroReg = getOrCreateVReg(*ZeroVal); - EntryBuilder->buildCast(Reg, ZeroReg); - } else if (auto GV = dyn_cast<GlobalValue>(&C)) + else if (isa<ConstantPointerNull>(C)) + EntryBuilder->buildConstant(Reg, 0); + else if (auto GV = dyn_cast<GlobalValue>(&C)) EntryBuilder->buildGlobalValue(Reg, GV); else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) { if (!CAZ->getType()->isVectorTy()) return false; // Return the scalar if it is a <1 x Ty> vector. if (CAZ->getNumElements() == 1) - return translate(*CAZ->getElementValue(0u), Reg); + return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get()); SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CAZ->getNumElements(); ++i) { Constant &Elt = *CAZ->getElementValue(i); @@ -2164,7 +2229,8 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) { // Return the scalar if it is a <1 x Ty> vector. if (CV->getNumElements() == 1) - return translate(*CV->getElementAsConstant(0), Reg); + return translateCopy(C, *CV->getElementAsConstant(0), + *EntryBuilder.get()); SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CV->getNumElements(); ++i) { Constant &Elt = *CV->getElementAsConstant(i); @@ -2182,7 +2248,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { } } else if (auto CV = dyn_cast<ConstantVector>(&C)) { if (CV->getNumOperands() == 1) - return translate(*CV->getOperand(0), Reg); + return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get()); SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CV->getNumOperands(); ++i) { Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); @@ -2319,10 +2385,18 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Make our arguments/constants entry block fallthrough to the IR entry block. EntryBB->addSuccessor(&getMBB(F.front())); + if (CLI->fallBackToDAGISel(F)) { + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + F.getSubprogram(), &F.getEntryBlock()); + R << "unable to lower function: " << ore::NV("Prototype", F.getType()); + reportTranslationError(*MF, *TPC, *ORE, R); + return false; + } + // Lower the actual args into this basic block. SmallVector<ArrayRef<Register>, 8> VRegArgs; for (const Argument &Arg: F.args()) { - if (DL->getTypeStoreSize(Arg.getType()) == 0) + if (DL->getTypeStoreSize(Arg.getType()).isZero()) continue; // Don't handle zero sized types. ArrayRef<Register> VRegs = getOrCreateVRegs(Arg); VRegArgs.push_back(VRegs); @@ -2352,6 +2426,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { WrapperObserver.addObserver(&Verifier); #endif // ifndef NDEBUG RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver); + RAIIMFObserverInstaller ObsInstall(*MF, WrapperObserver); for (const BasicBlock *BB : RPOT) { MachineBasicBlock &MBB = getMBB(*BB); // Set the insertion point of all the following translations to diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp new file mode 100644 index 000000000000..2ce1d414e755 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -0,0 +1,667 @@ +//===-- lib/CodeGen/GlobalISel/InlineAsmLowering.cpp ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the lowering from LLVM IR inline asm to MIR INLINEASM +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" + +#define DEBUG_TYPE "inline-asm-lowering" + +using namespace llvm; + +void InlineAsmLowering::anchor() {} + +namespace { + +/// GISelAsmOperandInfo - This contains information for each constraint that we +/// are lowering. +class GISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { +public: + /// Regs - If this is a register or register class operand, this + /// contains the set of assigned registers corresponding to the operand. + SmallVector<Register, 1> Regs; + + explicit GISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &Info) + : TargetLowering::AsmOperandInfo(Info) {} +}; + +using GISelAsmOperandInfoVector = SmallVector<GISelAsmOperandInfo, 16>; + +class ExtraFlags { + unsigned Flags = 0; + +public: + explicit ExtraFlags(const CallBase &CB) { + const InlineAsm *IA = cast<InlineAsm>(CB.getCalledOperand()); + if (IA->hasSideEffects()) + Flags |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + Flags |= InlineAsm::Extra_IsAlignStack; + if (CB.isConvergent()) + Flags |= InlineAsm::Extra_IsConvergent; + Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + } + + void update(const TargetLowering::AsmOperandInfo &OpInfo) { + // Ideally, we would only check against memory constraints. However, the + // meaning of an Other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for Other constraints as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + Flags |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + Flags |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); + } + } + + unsigned get() const { return Flags; } +}; + +} // namespace + +/// Assign virtual/physical registers for the specified register operand. +static void getRegistersForValue(MachineFunction &MF, + MachineIRBuilder &MIRBuilder, + GISelAsmOperandInfo &OpInfo, + GISelAsmOperandInfo &RefOpInfo) { + + const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + // No work to do for memory operations. + if (OpInfo.ConstraintType == TargetLowering::C_Memory) + return; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + Register AssignedReg; + const TargetRegisterClass *RC; + std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint( + &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); + // RC is unset only on failure. Return immediately. + if (!RC) + return; + + // No need to allocate a matching input constraint since the constraint it's + // matching to has already been allocated. + if (OpInfo.isMatchingInputConstraint()) + return; + + // Initialize NumRegs. + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) + NumRegs = + TLI.getNumRegisters(MF.getFunction().getContext(), OpInfo.ConstraintVT); + + // If this is a constraint for a specific physical register, but the type of + // the operand requires more than one register to be passed, we allocate the + // required amount of physical registers, starting from the selected physical + // register. + // For this, first retrieve a register iterator for the given register class + TargetRegisterClass::iterator I = RC->begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + // Advance the iterator to the assigned register (if set) + if (AssignedReg) { + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "AssignedReg should be a member of provided RC"); + } + + // Finally, assign the registers. If the AssignedReg isn't set, create virtual + // registers with the provided register class + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC); + OpInfo.Regs.push_back(R); + } +} + +/// Return an integer indicating how general CT is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + case TargetLowering::C_Immediate: + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } + llvm_unreachable("Invalid constraint type"); +} + +static void chooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, + const TargetLowering *TLI) { + assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); + unsigned BestIdx = 0; + TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; + int BestGenerality = -1; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { + TargetLowering::ConstraintType CType = + TLI->getConstraintType(OpInfo.Codes[i]); + + // Indirect 'other' or 'immediate' constraints are not allowed. + if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory || + CType == TargetLowering::C_Register || + CType == TargetLowering::C_RegisterClass)) + continue; + + // If this is an 'other' or 'immediate' constraint, see if the operand is + // valid for it. For example, on X86 we might have an 'rI' constraint. If + // the operand is an integer in the range [0..31] we want to use I (saving a + // load of a register), otherwise we must use 'r'. + if (CType == TargetLowering::C_Other || + CType == TargetLowering::C_Immediate) { + assert(OpInfo.Codes[i].size() == 1 && + "Unhandled multi-letter 'other' constraint"); + // FIXME: prefer immediate constraints if the target allows it + } + + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + + // This constraint letter is more general than the previous one, use it. + int Generality = getConstraintGenerality(CType); + if (Generality > BestGenerality) { + BestType = CType; + BestIdx = i; + BestGenerality = Generality; + } + } + + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; + OpInfo.ConstraintType = BestType; +} + +static void computeConstraintToUse(const TargetLowering *TLI, + TargetLowering::AsmOperandInfo &OpInfo) { + assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); + + // Single-letter constraints ('r') are very common. + if (OpInfo.Codes.size() == 1) { + OpInfo.ConstraintCode = OpInfo.Codes[0]; + OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode); + } else { + chooseConstraint(OpInfo, TLI); + } + + // 'X' matches anything. + if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { + // Labels and constants are handled elsewhere ('X' is the only thing + // that matches labels). For Functions, the type here is the type of + // the result, which is not what we want to look at; leave them alone. + Value *Val = OpInfo.CallOperandVal; + if (isa<BasicBlock>(Val) || isa<ConstantInt>(Val) || isa<Function>(Val)) + return; + + // Otherwise, try to resolve it to something we know about by looking at + // the actual operand type. + if (const char *Repl = TLI->LowerXConstraint(OpInfo.ConstraintVT)) { + OpInfo.ConstraintCode = Repl; + OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode); + } + } +} + +static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) { + unsigned Flag = I.getOperand(OpIdx).getImm(); + return InlineAsm::getNumOperandRegisters(Flag); +} + +static bool buildAnyextOrCopy(Register Dst, Register Src, + MachineIRBuilder &MIRBuilder) { + const TargetRegisterInfo *TRI = + MIRBuilder.getMF().getSubtarget().getRegisterInfo(); + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + + auto SrcTy = MRI->getType(Src); + if (!SrcTy.isValid()) { + LLVM_DEBUG(dbgs() << "Source type for copy is not valid\n"); + return false; + } + unsigned SrcSize = TRI->getRegSizeInBits(Src, *MRI); + unsigned DstSize = TRI->getRegSizeInBits(Dst, *MRI); + + if (DstSize < SrcSize) { + LLVM_DEBUG(dbgs() << "Input can't fit in destination reg class\n"); + return false; + } + + // Attempt to anyext small scalar sources. + if (DstSize > SrcSize) { + if (!SrcTy.isScalar()) { + LLVM_DEBUG(dbgs() << "Can't extend non-scalar input to size of" + "destination register class\n"); + return false; + } + Src = MIRBuilder.buildAnyExt(LLT::scalar(DstSize), Src).getReg(0); + } + + MIRBuilder.buildCopy(Dst, Src); + return true; +} + +bool InlineAsmLowering::lowerInlineAsm( + MachineIRBuilder &MIRBuilder, const CallBase &Call, + std::function<ArrayRef<Register>(const Value &Val)> GetOrCreateVRegs) + const { + const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); + + /// ConstraintOperands - Information about all of the constraints. + GISelAsmOperandInfoVector ConstraintOperands; + + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); + const DataLayout &DL = F.getParent()->getDataLayout(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI->ParseConstraints(DL, TRI, Call); + + ExtraFlags ExtraInfo(Call); + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + for (auto &T : TargetConstraints) { + ConstraintOperands.push_back(GISelAsmOperandInfo(T)); + GISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Compute the value type for each operand. + if (OpInfo.Type == InlineAsm::isInput || + (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { + + OpInfo.CallOperandVal = const_cast<Value *>(Call.getArgOperand(ArgNo++)); + + if (isa<BasicBlock>(OpInfo.CallOperandVal)) { + LLVM_DEBUG(dbgs() << "Basic block input operands not supported yet\n"); + return false; + } + + Type *OpTy = OpInfo.CallOperandVal->getType(); + + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (OpInfo.isIndirect) { + PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + + // FIXME: Support aggregate input operands + if (!OpTy->isSingleValueType()) { + LLVM_DEBUG( + dbgs() << "Aggregate input operands are not supported yet\n"); + return false; + } + + OpInfo.ConstraintVT = TLI->getValueType(DL, OpTy, true).getSimpleVT(); + + } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { + assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(Call.getType())) { + OpInfo.ConstraintVT = + TLI->getSimpleValueType(DL, STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpInfo.ConstraintVT = TLI->getSimpleValueType(DL, Call.getType()); + } + ++ResNo; + } else { + OpInfo.ConstraintVT = MVT::Other; + } + + // Compute the constraint code and ConstraintType to use. + computeConstraintToUse(TLI, OpInfo); + + // The selected constraint type might expose new sideeffects + ExtraInfo.update(OpInfo); + } + + // At this point, all operand types are decided. + // Create the MachineInstr, but don't insert it yet since input + // operands still need to insert instructions before this one + auto Inst = MIRBuilder.buildInstrNoInsert(TargetOpcode::INLINEASM) + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo.get()); + + // Starting from this operand: flag followed by register(s) will be added as + // operands to Inst for each constraint. Used for matching input constraints. + unsigned StartIdx = Inst->getNumOperands(); + + // Collects the output operands for later processing + GISelAsmOperandInfoVector OutputOperands; + + for (auto &OpInfo : ConstraintOperands) { + GISelAsmOperandInfo &RefOpInfo = + OpInfo.isMatchingInputConstraint() + ? ConstraintOperands[OpInfo.getMatchedOperand()] + : OpInfo; + + // Assign registers for register operands + getRegistersForValue(MF, MIRBuilder, OpInfo, RefOpInfo); + + switch (OpInfo.Type) { + case InlineAsm::isOutput: + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + unsigned ConstraintID = + TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + + // Add information to the INLINEASM instruction to know about this + // output. + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + Inst.addImm(OpFlags); + ArrayRef<Register> SourceRegs = + GetOrCreateVRegs(*OpInfo.CallOperandVal); + assert( + SourceRegs.size() == 1 && + "Expected the memory output to fit into a single virtual register"); + Inst.addReg(SourceRegs[0]); + } else { + // Otherwise, this outputs to a register (directly for C_Register / + // C_RegisterClass. Find a register that we can use. + assert(OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_RegisterClass); + + if (OpInfo.Regs.empty()) { + LLVM_DEBUG(dbgs() + << "Couldn't allocate output register for constraint\n"); + return false; + } + + // Add information to the INLINEASM instruction to know that this + // register is set. + unsigned Flag = InlineAsm::getFlagWord( + OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + OpInfo.Regs.size()); + if (OpInfo.Regs.front().isVirtual()) { + // Put the register class of the virtual registers in the flag word. + // That way, later passes can recompute register class constraints for + // inline assembly as well as normal instructions. Don't do this for + // tied operands that can use the regclass information from the def. + const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front()); + Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + } + + Inst.addImm(Flag); + + for (Register Reg : OpInfo.Regs) { + Inst.addReg(Reg, + RegState::Define | getImplRegState(Reg.isPhysical()) | + (OpInfo.isEarlyClobber ? RegState::EarlyClobber : 0)); + } + + // Remember this output operand for later processing + OutputOperands.push_back(OpInfo); + } + + break; + case InlineAsm::isInput: { + if (OpInfo.isMatchingInputConstraint()) { + unsigned DefIdx = OpInfo.getMatchedOperand(); + // Find operand with register def that corresponds to DefIdx. + unsigned InstFlagIdx = StartIdx; + for (unsigned i = 0; i < DefIdx; ++i) + InstFlagIdx += getNumOpRegs(*Inst, InstFlagIdx) + 1; + assert(getNumOpRegs(*Inst, InstFlagIdx) == 1 && "Wrong flag"); + + unsigned MatchedOperandFlag = Inst->getOperand(InstFlagIdx).getImm(); + if (InlineAsm::isMemKind(MatchedOperandFlag)) { + LLVM_DEBUG(dbgs() << "Matching input constraint to mem operand not " + "supported. This should be target specific.\n"); + return false; + } + if (!InlineAsm::isRegDefKind(MatchedOperandFlag) && + !InlineAsm::isRegDefEarlyClobberKind(MatchedOperandFlag)) { + LLVM_DEBUG(dbgs() << "Unknown matching constraint\n"); + return false; + } + + // We want to tie input to register in next operand. + unsigned DefRegIdx = InstFlagIdx + 1; + Register Def = Inst->getOperand(DefRegIdx).getReg(); + + // Copy input to new vreg with same reg class as Def + const TargetRegisterClass *RC = MRI->getRegClass(Def); + ArrayRef<Register> SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); + assert(SrcRegs.size() == 1 && "Single register is expected here"); + Register Tmp = MRI->createVirtualRegister(RC); + if (!buildAnyextOrCopy(Tmp, SrcRegs[0], MIRBuilder)) + return false; + + // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def. + unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1); + unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx); + Inst.addImm(Flag); + Inst.addReg(Tmp); + Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1); + break; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect) { + LLVM_DEBUG(dbgs() << "Indirect input operands with unknown constraint " + "not supported yet\n"); + return false; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) { + + std::vector<MachineOperand> Ops; + if (!lowerAsmOperandForConstraint(OpInfo.CallOperandVal, + OpInfo.ConstraintCode, Ops, + MIRBuilder)) { + LLVM_DEBUG(dbgs() << "Don't support constraint: " + << OpInfo.ConstraintCode << " yet\n"); + return false; + } + + assert(Ops.size() > 0 && + "Expected constraint to be lowered to at least one operand"); + + // Add information to the INLINEASM node to know about this input. + unsigned OpFlags = + InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); + Inst.addImm(OpFlags); + Inst.add(Ops); + break; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + + if (!OpInfo.isIndirect) { + LLVM_DEBUG(dbgs() + << "Cannot indirectify memory input operands yet\n"); + return false; + } + + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + + unsigned ConstraintID = + TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode); + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + Inst.addImm(OpFlags); + ArrayRef<Register> SourceRegs = + GetOrCreateVRegs(*OpInfo.CallOperandVal); + assert( + SourceRegs.size() == 1 && + "Expected the memory input to fit into a single virtual register"); + Inst.addReg(SourceRegs[0]); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register) && + "Unknown constraint type!"); + + if (OpInfo.isIndirect) { + LLVM_DEBUG(dbgs() << "Can't handle indirect register inputs yet " + "for constraint '" + << OpInfo.ConstraintCode << "'\n"); + return false; + } + + // Copy the input into the appropriate registers. + if (OpInfo.Regs.empty()) { + LLVM_DEBUG( + dbgs() + << "Couldn't allocate input register for register constraint\n"); + return false; + } + + unsigned NumRegs = OpInfo.Regs.size(); + ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); + assert(NumRegs == SourceRegs.size() && + "Expected the number of input registers to match the number of " + "source registers"); + + if (NumRegs > 1) { + LLVM_DEBUG(dbgs() << "Input operands with multiple input registers are " + "not supported yet\n"); + return false; + } + + unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs); + Inst.addImm(Flag); + if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder)) + return false; + Inst.addReg(OpInfo.Regs[0]); + break; + } + + case InlineAsm::isClobber: { + + unsigned NumRegs = OpInfo.Regs.size(); + if (NumRegs > 0) { + unsigned Flag = + InlineAsm::getFlagWord(InlineAsm::Kind_Clobber, NumRegs); + Inst.addImm(Flag); + + for (Register Reg : OpInfo.Regs) { + Inst.addReg(Reg, RegState::Define | RegState::EarlyClobber | + getImplRegState(Reg.isPhysical())); + } + } + break; + } + } + } + + if (const MDNode *SrcLoc = Call.getMetadata("srcloc")) + Inst.addMetadata(SrcLoc); + + // All inputs are handled, insert the instruction now + MIRBuilder.insertInstr(Inst); + + // Finally, copy the output operands into the output registers + ArrayRef<Register> ResRegs = GetOrCreateVRegs(Call); + if (ResRegs.size() != OutputOperands.size()) { + LLVM_DEBUG(dbgs() << "Expected the number of output registers to match the " + "number of destination registers\n"); + return false; + } + for (unsigned int i = 0, e = ResRegs.size(); i < e; i++) { + GISelAsmOperandInfo &OpInfo = OutputOperands[i]; + + if (OpInfo.Regs.empty()) + continue; + + switch (OpInfo.ConstraintType) { + case TargetLowering::C_Register: + case TargetLowering::C_RegisterClass: { + if (OpInfo.Regs.size() > 1) { + LLVM_DEBUG(dbgs() << "Output operands with multiple defining " + "registers are not supported yet\n"); + return false; + } + + Register SrcReg = OpInfo.Regs[0]; + unsigned SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI); + if (MRI->getType(ResRegs[i]).getSizeInBits() < SrcSize) { + // First copy the non-typed virtual register into a generic virtual + // register + Register Tmp1Reg = + MRI->createGenericVirtualRegister(LLT::scalar(SrcSize)); + MIRBuilder.buildCopy(Tmp1Reg, SrcReg); + // Need to truncate the result of the register + MIRBuilder.buildTrunc(ResRegs[i], Tmp1Reg); + } else { + MIRBuilder.buildCopy(ResRegs[i], SrcReg); + } + break; + } + case TargetLowering::C_Immediate: + case TargetLowering::C_Other: + LLVM_DEBUG( + dbgs() << "Cannot lower target specific output constraints yet\n"); + return false; + case TargetLowering::C_Memory: + break; // Already handled. + case TargetLowering::C_Unknown: + LLVM_DEBUG(dbgs() << "Unexpected unknown constraint\n"); + return false; + } + } + + return true; +} + +bool InlineAsmLowering::lowerAsmOperandForConstraint( + Value *Val, StringRef Constraint, std::vector<MachineOperand> &Ops, + MachineIRBuilder &MIRBuilder) const { + if (Constraint.size() > 1) + return false; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: + return false; + case 'i': // Simple Integer or Relocatable Constant + if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { + assert(CI->getBitWidth() <= 64 && + "expected immediate to fit into 64-bits"); + // Boolean constants should be zero-extended, others are sign-extended + bool IsBool = CI->getBitWidth() == 1; + int64_t ExtVal = IsBool ? CI->getZExtValue() : CI->getSExtValue(); + Ops.push_back(MachineOperand::CreateImm(ExtVal)); + return true; + } + return false; + } +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 7c4fd2d140d3..f32278d07052 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "instruction-select" @@ -175,7 +176,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { auto DstRC = MRI.getRegClass(DstReg); if (SrcRC == DstRC) { MRI.replaceRegWith(DstReg, SrcReg); - MI.eraseFromParentAndMarkDBGValuesForRemoval(); + MI.eraseFromParent(); } } } @@ -222,9 +223,6 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; } #endif - auto &TLI = *MF.getSubtarget().getTargetLowering(); - TLI.finalizeLowering(MF); - // Determine if there are any calls in this machine function. Ported from // SelectionDAG. MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -240,6 +238,9 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { } } + // FIXME: FinalizeISel pass calls finalizeLowering, so it's called twice. + auto &TLI = *MF.getSubtarget().getTargetLowering(); + TLI.finalizeLowering(MF); LLVM_DEBUG({ dbgs() << "Rules covered by selecting function: " << MF.getName() << ":"; @@ -248,11 +249,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { dbgs() << "\n\n"; }); CoverageInfo.emit(CoveragePrefix, - MF.getSubtarget() - .getTargetLowering() - ->getTargetMachine() - .getTarget() - .getBackendName()); + TLI.getTargetMachine().getTarget().getBackendName()); // If we successfully selected the function nothing is going to use the vreg // types after us (otherwise MIRPrinter would need them). Make sure the types diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index b9c90e69ddb2..2fedc034d315 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -42,7 +42,7 @@ bool InstructionSelector::constrainOperandRegToRegClass( MachineRegisterInfo &MRI = MF.getRegInfo(); return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC, - I.getOperand(OpIdx), OpIdx); + I.getOperand(OpIdx)); } bool InstructionSelector::isOperandImmEqual( diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 601d50e9806f..a83742f2138f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -80,22 +80,46 @@ LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx, }; } -LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx, - unsigned Size) { +LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx, + LLT EltTy) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isVector() && QueryTy.getElementType() == EltTy; + }; +} + +LegalityPredicate LegalityPredicates::scalarNarrowerThan(unsigned TypeIdx, + unsigned Size) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size; }; } -LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx, - unsigned Size) { +LegalityPredicate LegalityPredicates::scalarWiderThan(unsigned TypeIdx, + unsigned Size) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size; }; } +LegalityPredicate LegalityPredicates::smallerThan(unsigned TypeIdx0, + unsigned TypeIdx1) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx0].getSizeInBits() < + Query.Types[TypeIdx1].getSizeInBits(); + }; +} + +LegalityPredicate LegalityPredicates::largerThan(unsigned TypeIdx0, + unsigned TypeIdx1) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx0].getSizeInBits() > + Query.Types[TypeIdx1].getSizeInBits(); + }; +} + LegalityPredicate LegalityPredicates::scalarOrEltNarrowerThan(unsigned TypeIdx, unsigned Size) { return [=](const LegalityQuery &Query) { @@ -126,6 +150,12 @@ LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { }; } +LegalityPredicate LegalityPredicates::sizeIs(unsigned TypeIdx, unsigned Size) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].getSizeInBits() == Size; + }; +} + LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0, unsigned TypeIdx1) { return [=](const LegalityQuery &Query) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index e789e4a333dc..1d7be54de3b0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/GlobalISel/GISelWorkList.h" #include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -28,6 +29,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" #include "llvm/Target/TargetMachine.h" #include <iterator> @@ -41,6 +43,29 @@ static cl::opt<bool> cl::desc("Should enable CSE in Legalizer"), cl::Optional, cl::init(false)); +enum class DebugLocVerifyLevel { + None, + Legalizations, + LegalizationsAndArtifactCombiners, +}; +#ifndef NDEBUG +static cl::opt<DebugLocVerifyLevel> VerifyDebugLocs( + "verify-legalizer-debug-locs", + cl::desc("Verify that debug locations are handled"), + cl::values( + clEnumValN(DebugLocVerifyLevel::None, "none", "No verification"), + clEnumValN(DebugLocVerifyLevel::Legalizations, "legalizations", + "Verify legalizations"), + clEnumValN(DebugLocVerifyLevel::LegalizationsAndArtifactCombiners, + "legalizations+artifactcombiners", + "Verify legalizations and artifact combines")), + cl::init(DebugLocVerifyLevel::Legalizations)); +#else +// Always disable it for release builds by preventing the observer from being +// installed. +static const DebugLocVerifyLevel VerifyDebugLocs = DebugLocVerifyLevel::None; +#endif + char Legalizer::ID = 0; INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE, "Legalize the Machine IR a function's Machine IR", false, @@ -108,7 +133,6 @@ public: } void createdInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI); LLVM_DEBUG(NewMIs.push_back(&MI)); createdOrChangedInstr(MI); } @@ -143,7 +167,9 @@ public: Legalizer::MFResult Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, ArrayRef<GISelChangeObserver *> AuxObservers, + LostDebugLocObserver &LocObserver, MachineIRBuilder &MIRBuilder) { + MIRBuilder.setMF(MF); MachineRegisterInfo &MRI = MF.getRegInfo(); // Populate worklists. @@ -180,7 +206,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, // Now install the observer as the delegate to MF. // This will keep all the observers notified about new insertions/deletions. - RAIIDelegateInstaller DelInstall(MF, &WrapperObserver); + RAIIMFObsDelInstaller Installer(MF, WrapperObserver); LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder); LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI); auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) { @@ -199,6 +225,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, if (isTriviallyDead(MI, MRI)) { LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); MI.eraseFromParentAndMarkDBGValuesForRemoval(); + LocObserver.checkpoint(false); continue; } @@ -224,6 +251,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, return {Changed, &MI}; } WorkListObserver.printNewInstrs(); + LocObserver.checkpoint(); Changed |= Res == LegalizerHelper::Legalized; } // Try to combine the instructions in RetryList again if there @@ -238,6 +266,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, return {Changed, RetryList.front()}; } } + LocObserver.checkpoint(); while (!ArtifactList.empty()) { MachineInstr &MI = *ArtifactList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && @@ -246,6 +275,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, LLVM_DEBUG(dbgs() << MI << "Is dead\n"); RemoveDeadInstFromLists(&MI); MI.eraseFromParentAndMarkDBGValuesForRemoval(); + LocObserver.checkpoint(false); continue; } SmallVector<MachineInstr *, 4> DeadInstructions; @@ -258,6 +288,9 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, RemoveDeadInstFromLists(DeadMI); DeadMI->eraseFromParentAndMarkDBGValuesForRemoval(); } + LocObserver.checkpoint( + VerifyDebugLocs == + DebugLocVerifyLevel::LegalizationsAndArtifactCombiners); Changed = true; continue; } @@ -305,9 +338,14 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // We want CSEInfo in addition to WorkListObserver to observe all changes. AuxObservers.push_back(CSEInfo); } + assert(!CSEInfo || !errorToBool(CSEInfo->verify())); + LostDebugLocObserver LocObserver(DEBUG_TYPE); + if (VerifyDebugLocs > DebugLocVerifyLevel::None) + AuxObservers.push_back(&LocObserver); const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo(); - MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, *MIRBuilder); + MFResult Result = + legalizeMachineFunction(MF, LI, AuxObservers, LocObserver, *MIRBuilder); if (Result.FailedOn) { reportGISelFailure(MF, TPC, MORE, "gisel-legalize", @@ -324,5 +362,33 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { reportGISelFailure(MF, TPC, MORE, R); return false; } + + if (LocObserver.getNumLostDebugLocs()) { + MachineOptimizationRemarkMissed R("gisel-legalize", "LostDebugLoc", + MF.getFunction().getSubprogram(), + /*MBB=*/&*MF.begin()); + R << "lost " + << ore::NV("NumLostDebugLocs", LocObserver.getNumLostDebugLocs()) + << " debug locations during pass"; + reportGISelWarning(MF, TPC, MORE, R); + // Example remark: + // --- !Missed + // Pass: gisel-legalize + // Name: GISelFailure + // DebugLoc: { File: '.../legalize-urem.mir', Line: 1, Column: 0 } + // Function: test_urem_s32 + // Args: + // - String: 'lost ' + // - NumLostDebugLocs: '1' + // - String: ' debug locations during pass' + // ... + } + + // If for some reason CSE was not enabled, make sure that we invalidate the + // CSEInfo object (as we currently declare that the analysis is preserved). + // The next time get on the wrapper is called, it will force it to recompute + // the analysis. + if (!EnableCSE) + Wrapper.setComputed(false); return Result.Changed; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 667e1a04dc34..da519f99ad7e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -63,30 +63,48 @@ getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { return std::make_pair(NumParts, NumLeftover); } +static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) { + + if (!Ty.isScalar()) + return nullptr; + + switch (Ty.getSizeInBits()) { + case 16: + return Type::getHalfTy(Ctx); + case 32: + return Type::getFloatTy(Ctx); + case 64: + return Type::getDoubleTy(Ctx); + case 128: + return Type::getFP128Ty(Ctx); + default: + return nullptr; + } +} + LegalizerHelper::LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &Builder) - : MIRBuilder(Builder), MRI(MF.getRegInfo()), - LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { - MIRBuilder.setMF(MF); + : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()), + LI(*MF.getSubtarget().getLegalizerInfo()) { MIRBuilder.setChangeObserver(Observer); } LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, GISelChangeObserver &Observer, MachineIRBuilder &B) - : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { - MIRBuilder.setMF(MF); + : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI) { MIRBuilder.setChangeObserver(Observer); } LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { - LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); + LLVM_DEBUG(dbgs() << "Legalizing: " << MI); + + MIRBuilder.setInstrAndDebugLoc(MI); if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) - return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized - : UnableToLegalize; + return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize; auto Step = LI.getAction(MI, MRI); switch (Step.Action) { case Legal: @@ -101,6 +119,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { case WidenScalar: LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); return widenScalar(MI, Step.TypeIdx, Step.NewType); + case Bitcast: + LLVM_DEBUG(dbgs() << ".. Bitcast type\n"); + return bitcast(MI, Step.TypeIdx, Step.NewType); case Lower: LLVM_DEBUG(dbgs() << ".. Lower\n"); return lower(MI, Step.TypeIdx, Step.NewType); @@ -112,8 +133,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { return moreElementsVector(MI, Step.TypeIdx, Step.NewType); case Custom: LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); - return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized - : UnableToLegalize; + return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize; default: LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); return UnableToLegalize; @@ -172,26 +192,6 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } -static LLT getGCDType(LLT OrigTy, LLT TargetTy) { - if (OrigTy.isVector() && TargetTy.isVector()) { - assert(OrigTy.getElementType() == TargetTy.getElementType()); - int GCD = greatestCommonDivisor(OrigTy.getNumElements(), - TargetTy.getNumElements()); - return LLT::scalarOrVector(GCD, OrigTy.getElementType()); - } - - if (OrigTy.isVector() && !TargetTy.isVector()) { - assert(OrigTy.getElementType() == TargetTy); - return TargetTy; - } - - assert(!OrigTy.isVector() && !TargetTy.isVector()); - - int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), - TargetTy.getSizeInBits()); - return LLT::scalar(GCD); -} - void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef<Register> PartRegs, @@ -237,92 +237,222 @@ void LegalizerHelper::insertParts(Register DstReg, } } +/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs +static void getUnmergeResults(SmallVectorImpl<Register> &Regs, + const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); + + const int NumResults = MI.getNumOperands() - 1; + Regs.resize(NumResults); + for (int I = 0; I != NumResults; ++I) + Regs[I] = MI.getOperand(I).getReg(); +} + +LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, + LLT NarrowTy, Register SrcReg) { + LLT SrcTy = MRI.getType(SrcReg); + + LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy)); + if (SrcTy == GCDTy) { + // If the source already evenly divides the result type, we don't need to do + // anything. + Parts.push_back(SrcReg); + } else { + // Need to split into common type sized pieces. + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + getUnmergeResults(Parts, *Unmerge); + } + + return GCDTy; +} + +LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy, + SmallVectorImpl<Register> &VRegs, + unsigned PadStrategy) { + LLT LCMTy = getLCMType(DstTy, NarrowTy); + + int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); + int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits(); + int NumOrigSrc = VRegs.size(); + + Register PadReg; + + // Get a value we can use to pad the source value if the sources won't evenly + // cover the result type. + if (NumOrigSrc < NumParts * NumSubParts) { + if (PadStrategy == TargetOpcode::G_ZEXT) + PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0); + else if (PadStrategy == TargetOpcode::G_ANYEXT) + PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0); + else { + assert(PadStrategy == TargetOpcode::G_SEXT); + + // Shift the sign bit of the low register through the high register. + auto ShiftAmt = + MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1); + PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0); + } + } + + // Registers for the final merge to be produced. + SmallVector<Register, 4> Remerge(NumParts); + + // Registers needed for intermediate merges, which will be merged into a + // source for Remerge. + SmallVector<Register, 4> SubMerge(NumSubParts); + + // Once we've fully read off the end of the original source bits, we can reuse + // the same high bits for remaining padding elements. + Register AllPadReg; + + // Build merges to the LCM type to cover the original result type. + for (int I = 0; I != NumParts; ++I) { + bool AllMergePartsArePadding = true; + + // Build the requested merges to the requested type. + for (int J = 0; J != NumSubParts; ++J) { + int Idx = I * NumSubParts + J; + if (Idx >= NumOrigSrc) { + SubMerge[J] = PadReg; + continue; + } + + SubMerge[J] = VRegs[Idx]; + + // There are meaningful bits here we can't reuse later. + AllMergePartsArePadding = false; + } + + // If we've filled up a complete piece with padding bits, we can directly + // emit the natural sized constant if applicable, rather than a merge of + // smaller constants. + if (AllMergePartsArePadding && !AllPadReg) { + if (PadStrategy == TargetOpcode::G_ANYEXT) + AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0); + else if (PadStrategy == TargetOpcode::G_ZEXT) + AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0); + + // If this is a sign extension, we can't materialize a trivial constant + // with the right type and have to produce a merge. + } + + if (AllPadReg) { + // Avoid creating additional instructions if we're just adding additional + // copies of padding bits. + Remerge[I] = AllPadReg; + continue; + } + + if (NumSubParts == 1) + Remerge[I] = SubMerge[0]; + else + Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0); + + // In the sign extend padding case, re-use the first all-signbit merge. + if (AllMergePartsArePadding && !AllPadReg) + AllPadReg = Remerge[I]; + } + + VRegs = std::move(Remerge); + return LCMTy; +} + +void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, + ArrayRef<Register> RemergeRegs) { + LLT DstTy = MRI.getType(DstReg); + + // Create the merge to the widened source, and extract the relevant bits into + // the result. + + if (DstTy == LCMTy) { + MIRBuilder.buildMerge(DstReg, RemergeRegs); + return; + } + + auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs); + if (DstTy.isScalar() && LCMTy.isScalar()) { + MIRBuilder.buildTrunc(DstReg, Remerge); + return; + } + + if (LCMTy.isVector()) { + MIRBuilder.buildExtract(DstReg, Remerge, 0); + return; + } + + llvm_unreachable("unhandled case"); +} + static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { +#define RTLIBCASE(LibcallPrefix) \ + do { \ + switch (Size) { \ + case 32: \ + return RTLIB::LibcallPrefix##32; \ + case 64: \ + return RTLIB::LibcallPrefix##64; \ + case 128: \ + return RTLIB::LibcallPrefix##128; \ + default: \ + llvm_unreachable("unexpected size"); \ + } \ + } while (0) + + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Opcode) { case TargetOpcode::G_SDIV: - assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); - switch (Size) { - case 32: - return RTLIB::SDIV_I32; - case 64: - return RTLIB::SDIV_I64; - case 128: - return RTLIB::SDIV_I128; - default: - llvm_unreachable("unexpected size"); - } + RTLIBCASE(SDIV_I); case TargetOpcode::G_UDIV: - assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); - switch (Size) { - case 32: - return RTLIB::UDIV_I32; - case 64: - return RTLIB::UDIV_I64; - case 128: - return RTLIB::UDIV_I128; - default: - llvm_unreachable("unexpected size"); - } + RTLIBCASE(UDIV_I); case TargetOpcode::G_SREM: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; + RTLIBCASE(SREM_I); case TargetOpcode::G_UREM: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; + RTLIBCASE(UREM_I); case TargetOpcode::G_CTLZ_ZERO_UNDEF: - assert(Size == 32 && "Unsupported size"); - return RTLIB::CTLZ_I32; + RTLIBCASE(CTLZ_I); case TargetOpcode::G_FADD: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; + RTLIBCASE(ADD_F); case TargetOpcode::G_FSUB: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; + RTLIBCASE(SUB_F); case TargetOpcode::G_FMUL: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; + RTLIBCASE(MUL_F); case TargetOpcode::G_FDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; + RTLIBCASE(DIV_F); case TargetOpcode::G_FEXP: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; + RTLIBCASE(EXP_F); case TargetOpcode::G_FEXP2: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32; + RTLIBCASE(EXP2_F); case TargetOpcode::G_FREM: - return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; + RTLIBCASE(REM_F); case TargetOpcode::G_FPOW: - return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; + RTLIBCASE(POW_F); case TargetOpcode::G_FMA: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; + RTLIBCASE(FMA_F); case TargetOpcode::G_FSIN: - assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); - return Size == 128 ? RTLIB::SIN_F128 - : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; + RTLIBCASE(SIN_F); case TargetOpcode::G_FCOS: - assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); - return Size == 128 ? RTLIB::COS_F128 - : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; + RTLIBCASE(COS_F); case TargetOpcode::G_FLOG10: - assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); - return Size == 128 ? RTLIB::LOG10_F128 - : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; + RTLIBCASE(LOG10_F); case TargetOpcode::G_FLOG: - assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); - return Size == 128 ? RTLIB::LOG_F128 - : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; + RTLIBCASE(LOG_F); case TargetOpcode::G_FLOG2: - assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); - return Size == 128 ? RTLIB::LOG2_F128 - : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; + RTLIBCASE(LOG2_F); case TargetOpcode::G_FCEIL: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32; + RTLIBCASE(CEIL_F); case TargetOpcode::G_FFLOOR: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32; + RTLIBCASE(FLOOR_F); + case TargetOpcode::G_FMINNUM: + RTLIBCASE(FMIN_F); + case TargetOpcode::G_FMAXNUM: + RTLIBCASE(FMAX_F); + case TargetOpcode::G_FSQRT: + RTLIBCASE(SQRT_F); + case TargetOpcode::G_FRINT: + RTLIBCASE(RINT_F); + case TargetOpcode::G_FNEARBYINT: + RTLIBCASE(NEARBYINT_F); } llvm_unreachable("Unknown libcall function"); } @@ -330,7 +460,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { /// True if an instruction is in tail position in its caller. Intended for /// legalizing libcalls as tail calls when possible. static bool isLibCallInTailPosition(MachineInstr &MI) { - const Function &F = MI.getParent()->getParent()->getFunction(); + MachineBasicBlock &MBB = *MI.getParent(); + const Function &F = MBB.getParent()->getFunction(); // Conservatively require the attributes of the call to match those of // the return. Ignore NoAlias and NonNull because they don't affect the @@ -349,23 +480,22 @@ static bool isLibCallInTailPosition(MachineInstr &MI) { // Only tail call if the following instruction is a standard return. auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); - MachineInstr *Next = MI.getNextNode(); - if (!Next || TII.isTailCall(*Next) || !Next->isReturn()) + auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); + if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn()) return false; return true; } LegalizerHelper::LegalizeResult -llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, +llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, - ArrayRef<CallLowering::ArgInfo> Args) { + ArrayRef<CallLowering::ArgInfo> Args, + const CallingConv::ID CC) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); - auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - const char *Name = TLI.getLibcallName(Libcall); CallLowering::CallLoweringInfo Info; - Info.CallConv = TLI.getLibcallCallingConv(Libcall); + Info.CallConv = CC; Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = Result; std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); @@ -375,6 +505,16 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, return LegalizerHelper::Legalized; } +LegalizerHelper::LegalizeResult +llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, + ArrayRef<CallLowering::ArgInfo> Args) { + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + const char *Name = TLI.getLibcallName(Libcall); + const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall); + return createLibcall(MIRBuilder, Name, Result, Args, CC); +} + // Useful for libcalls where all operands have the same type. static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, @@ -428,7 +568,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, } const char *Name = TLI.getLibcallName(RTLibcall); - MIRBuilder.setInstr(MI); + MIRBuilder.setInstrAndDebugLoc(MI); CallLowering::CallLoweringInfo Info; Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); @@ -443,14 +583,16 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, if (Info.LoweredTailCall) { assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); - // We must have a return following the call to get past + // We must have a return following the call (or debug insts) to get past // isLibCallInTailPosition. - assert(MI.getNextNode() && MI.getNextNode()->isReturn() && - "Expected instr following MI to be a return?"); - - // We lowered a tail call, so the call is now the return from the block. - // Delete the old return. - MI.getNextNode()->eraseFromParent(); + do { + MachineInstr *Next = MI.getNextNode(); + assert(Next && (Next->isReturn() || Next->isDebugInstr()) && + "Expected instr following MI to be return or debug inst?"); + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + Next->eraseFromParent(); + } while (MI.getNextNode()); } return LegalizerHelper::Legalized; @@ -492,8 +634,6 @@ LegalizerHelper::libcall(MachineInstr &MI) { unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); - MIRBuilder.setInstr(MI); - switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -523,37 +663,29 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_FEXP: case TargetOpcode::G_FEXP2: case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: { - if (Size > 64) { - LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_FNEARBYINT: { + Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); + if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) { + LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n"); return UnableToLegalize; } - Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); if (Status != Legalized) return Status; break; } - case TargetOpcode::G_FPEXT: { - // FIXME: Support other floating point types (half, fp128 etc) - unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if (ToSize != 64 || FromSize != 32) - return UnableToLegalize; - LegalizeResult Status = conversionLibcall( - MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); - if (Status != Legalized) - return Status; - break; - } + case TargetOpcode::G_FPEXT: case TargetOpcode::G_FPTRUNC: { - // FIXME: Support other floating point types (half, fp128 etc) - unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if (ToSize != 32 || FromSize != 64) + Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg())); + Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg())); + if (!FromTy || !ToTy) return UnableToLegalize; - LegalizeResult Status = conversionLibcall( - MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); + LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy ); if (Status != Legalized) return Status; break; @@ -597,8 +729,6 @@ LegalizerHelper::libcall(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - MIRBuilder.setInstr(MI); - uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); uint64_t NarrowSize = NarrowTy.getSizeInBits(); @@ -606,19 +736,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, default: return UnableToLegalize; case TargetOpcode::G_IMPLICIT_DEF: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + // If SizeOp0 is not an exact multiple of NarrowSize, emit + // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed. + // FIXME: Although this would also be legal for the general case, it causes + // a lot of regressions in the emitted code (superfluous COPYs, artifact + // combines not being hit). This seems to be a problem related to the + // artifact combiner. + if (SizeOp0 % NarrowSize != 0) { + LLT ImplicitTy = NarrowTy; + if (DstTy.isVector()) + ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy); + + Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0); + MIRBuilder.buildAnyExt(DstReg, ImplicitReg); + + MI.eraseFromParent(); + return Legalized; + } + int NumParts = SizeOp0 / NarrowSize; SmallVector<Register, 2> DstRegs; for (int i = 0; i < NumParts; ++i) - DstRegs.push_back( - MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); + DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0)); - Register DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) + if (DstTy.isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else MIRBuilder.buildMerge(DstReg, DstRegs); @@ -657,49 +802,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_SEXT: { - if (TypeIdx != 0) - return UnableToLegalize; - - Register SrcReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - - // FIXME: support the general case where the requested NarrowTy may not be - // the same as the source type. E.g. s128 = sext(s32) - if ((SrcTy.getSizeInBits() != SizeOp0 / 2) || - SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) { - LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n"); - return UnableToLegalize; - } - - // Shift the sign bit of the low register through the high register. - auto ShiftAmt = - MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1); - auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt); - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)}); - MI.eraseFromParent(); - return Legalized; - } - case TargetOpcode::G_ZEXT: { - if (TypeIdx != 0) - return UnableToLegalize; - - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - uint64_t SizeOp1 = SrcTy.getSizeInBits(); - if (SizeOp0 % SizeOp1 != 0) - return UnableToLegalize; - - // Generate a merge where the bottom bits are taken from the source, and - // zero everything else. - Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0); - unsigned NumParts = SizeOp0 / SizeOp1; - SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()}; - for (unsigned Part = 1; Part < NumParts; ++Part) - Srcs.push_back(ZeroReg); - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs); - MI.eraseFromParent(); - return Legalized; - } + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + return narrowScalarExt(MI, TypeIdx, NarrowTy); case TargetOpcode::G_TRUNC: { if (TypeIdx != 1) return UnableToLegalize; @@ -710,12 +816,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return UnableToLegalize; } - auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); - MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0)); + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); + MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0)); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_FREEZE: + return reduceOperationWidth(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_ADD: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. @@ -779,7 +888,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(DstReg); BorrowIn = BorrowOut; } - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); MI.eraseFromParent(); return Legalized; } @@ -800,7 +909,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (8 * MMO.getSize() != DstTy.getSizeInBits()) { Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); auto &MMO = **MI.memoperands_begin(); - MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); + MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO); MIRBuilder.buildAnyExt(DstReg, TmpReg); MI.eraseFromParent(); return Legalized; @@ -819,12 +928,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (MMO.getSizeInBits() == NarrowSize) { MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); } else { - unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD - : TargetOpcode::G_SEXTLOAD; - MIRBuilder.buildInstr(ExtLoad) - .addDef(TmpReg) - .addUse(PtrReg) - .addMemOperand(&MMO); + MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO); } if (ZExt) @@ -853,7 +957,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); auto &MMO = **MI.memoperands_begin(); MIRBuilder.buildTrunc(TmpReg, SrcReg); - MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); + MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO); MI.eraseFromParent(); return Legalized; } @@ -885,8 +989,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTPOP: - if (TypeIdx != 0) - return UnableToLegalize; // TODO + if (TypeIdx == 1) + switch (MI.getOpcode()) { + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTLZ_ZERO_UNDEF: + return narrowScalarCTLZ(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTTZ_ZERO_UNDEF: + return narrowScalarCTTZ(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_CTPOP: + return narrowScalarCTPOP(MI, TypeIdx, NarrowTy); + default: + return UnableToLegalize; + } Observer.changingInstr(MI); narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); @@ -910,10 +1025,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; case TargetOpcode::G_PHI: { unsigned NumParts = SizeOp0 / NarrowSize; - SmallVector<Register, 2> DstRegs; - SmallVector<SmallVector<Register, 2>, 2> SrcRegs; - DstRegs.resize(NumParts); - SrcRegs.resize(MI.getNumOperands() / 2); + SmallVector<Register, 2> DstRegs(NumParts); + SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2); Observer.changingInstr(MI); for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); @@ -931,7 +1044,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); } MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; @@ -955,11 +1068,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changingInstr(MI); Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg()); + MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2)); Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg()); + MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3)); CmpInst::Predicate Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); @@ -970,14 +1083,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); - MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); + MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero); } else { MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); - MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); + MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH); } Observer.changedInstr(MI); MI.eraseFromParent(); @@ -987,8 +1100,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (TypeIdx != 0) return UnableToLegalize; - if (!MI.getOperand(2).isImm()) - return UnableToLegalize; int64_t SizeInBits = MI.getOperand(2).getImm(); // So long as the new type has more bits than the bits we're extending we @@ -998,13 +1109,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // We don't lose any non-extension bits by truncating the src and // sign-extending the dst. MachineOperand &MO1 = MI.getOperand(1); - auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg()); - MO1.setReg(TruncMIB->getOperand(0).getReg()); + auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1); + MO1.setReg(TruncMIB.getReg(0)); MachineOperand &MO2 = MI.getOperand(0); Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt}); + MIRBuilder.buildSExt(MO2, DstExt); MO2.setReg(DstExt); Observer.changedInstr(MI); return Legalized; @@ -1031,12 +1142,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, } // Explode the big arguments into smaller chunks. - MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg()); + MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1)); Register AshrCstReg = MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) - ->getOperand(0) - .getReg(); + .getReg(0); Register FullExtensionReg = 0; Register PartialExtensionReg = 0; @@ -1051,11 +1161,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(FullExtensionReg); continue; } - DstRegs.push_back(MIRBuilder - .buildInstr(TargetOpcode::G_ASHR, {NarrowTy}, - {PartialExtensionReg, AshrCstReg}) - ->getOperand(0) - .getReg()); + DstRegs.push_back( + MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg) + .getReg(0)); FullExtensionReg = DstRegs.back(); } else { DstRegs.push_back( @@ -1063,8 +1171,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, .buildInstr( TargetOpcode::G_SEXT_INREG, {NarrowTy}, {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) - ->getOperand(0) - .getReg()); + .getReg(0)); PartialExtensionReg = DstRegs.back(); } } @@ -1091,28 +1198,57 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(DstPart.getReg(0)); } - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_PTRMASK: { + if (TypeIdx != 1) + return UnableToLegalize; + Observer.changingInstr(MI); + narrowScalarSrc(MI, NarrowTy, 2); + Observer.changedInstr(MI); + return Legalized; + } } } +Register LegalizerHelper::coerceToScalar(Register Val) { + LLT Ty = MRI.getType(Val); + if (Ty.isScalar()) + return Val; + + const DataLayout &DL = MIRBuilder.getDataLayout(); + LLT NewTy = LLT::scalar(Ty.getSizeInBits()); + if (Ty.isPointer()) { + if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) + return Register(); + return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0); + } + + Register NewVal = Val; + + assert(Ty.isVector()); + LLT EltTy = Ty.getElementType(); + if (EltTy.isPointer()) + NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0); + return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0); +} + void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode) { MachineOperand &MO = MI.getOperand(OpIdx); - auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); - MO.setReg(ExtB->getOperand(0).getReg()); + auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO}); + MO.setReg(ExtB.getReg(0)); } void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); - auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, - {MO.getReg()}); - MO.setReg(ExtB->getOperand(0).getReg()); + auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO); + MO.setReg(ExtB.getReg(0)); } void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, @@ -1120,7 +1256,7 @@ void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, MachineOperand &MO = MI.getOperand(OpIdx); Register DstExt = MRI.createGenericVirtualRegister(WideTy); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); + MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt}); MO.setReg(DstExt); } @@ -1129,7 +1265,7 @@ void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, MachineOperand &MO = MI.getOperand(OpIdx); Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); + MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc}); MO.setReg(DstTrunc); } @@ -1138,7 +1274,7 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, MachineOperand &MO = MI.getOperand(OpIdx); Register DstExt = MRI.createGenericVirtualRegister(WideTy); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MIRBuilder.buildExtract(MO.getReg(), DstExt, 0); + MIRBuilder.buildExtract(MO, DstExt, 0); MO.setReg(DstExt); } @@ -1172,6 +1308,19 @@ void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, MO.setReg(MoreReg); } +void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { + MachineOperand &Op = MI.getOperand(OpIdx); + Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0)); +} + +void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + Register CastDst = MRI.createGenericVirtualRegister(CastTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildBitcast(MO, CastDst); + MO.setReg(CastDst); +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { @@ -1300,10 +1449,10 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, if (TypeIdx != 0) return UnableToLegalize; - unsigned NumDst = MI.getNumOperands() - 1; + int NumDst = MI.getNumOperands() - 1; Register SrcReg = MI.getOperand(NumDst).getReg(); LLT SrcTy = MRI.getType(SrcReg); - if (!SrcTy.isScalar()) + if (SrcTy.isVector()) return UnableToLegalize; Register Dst0Reg = MI.getOperand(0).getReg(); @@ -1311,26 +1460,90 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, if (!DstTy.isScalar()) return UnableToLegalize; - unsigned NewSrcSize = NumDst * WideTy.getSizeInBits(); - LLT NewSrcTy = LLT::scalar(NewSrcSize); - unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits(); + if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) { + if (SrcTy.isPointer()) { + const DataLayout &DL = MIRBuilder.getDataLayout(); + if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) { + LLVM_DEBUG( + dbgs() << "Not casting non-integral address space integer\n"); + return UnableToLegalize; + } + + SrcTy = LLT::scalar(SrcTy.getSizeInBits()); + SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0); + } + + // Widen SrcTy to WideTy. This does not affect the result, but since the + // user requested this size, it is probably better handled than SrcTy and + // should reduce the total number of legalization artifacts + if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { + SrcTy = WideTy; + SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); + } - auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg); + // Theres no unmerge type to target. Directly extract the bits from the + // source type + unsigned DstSize = DstTy.getSizeInBits(); - for (unsigned I = 1; I != NumDst; ++I) { - auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I); - auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt); - WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl); + MIRBuilder.buildTrunc(Dst0Reg, SrcReg); + for (int I = 1; I != NumDst; ++I) { + auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I); + auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt); + MIRBuilder.buildTrunc(MI.getOperand(I), Shr); + } + + MI.eraseFromParent(); + return Legalized; } - Observer.changingInstr(MI); + // Extend the source to a wider type. + LLT LCMTy = getLCMType(SrcTy, WideTy); - MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg()); - for (unsigned I = 0; I != NumDst; ++I) - widenScalarDst(MI, WideTy, I); + Register WideSrc = SrcReg; + if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) { + // TODO: If this is an integral address space, cast to integer and anyext. + if (SrcTy.isPointer()) { + LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n"); + return UnableToLegalize; + } - Observer.changedInstr(MI); + WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0); + } + + auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc); + // Create a sequence of unmerges to the original results. since we may have + // widened the source, we will need to pad the results with dead defs to cover + // the source register. + // e.g. widen s16 to s32: + // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48) + // + // => + // %4:_(s64) = G_ANYEXT %0:_(s48) + // %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge + // %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs + // %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def + + const int NumUnmerge = Unmerge->getNumOperands() - 1; + const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) { + int Idx = I * PartsPerUnmerge + J; + if (Idx < NumDst) + MIB.addDef(MI.getOperand(Idx).getReg()); + else { + // Create dead def for excess components. + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + } + } + + MIB.addUse(Unmerge.getReg(I)); + } + + MI.eraseFromParent(); return Legalized; } @@ -1426,9 +1639,45 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult -LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - MIRBuilder.setInstr(MI); +LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT || + MI.getOpcode() == TargetOpcode::G_SSUBSAT; + // We can convert this to: + // 1. Any extend iN to iM + // 2. SHL by M-N + // 3. [US][ADD|SUB]SAT + // 4. L/ASHR by M-N + // + // It may be more efficient to lower this to a min and a max operation in + // the higher precision arithmetic if the promoted operation isn't legal, + // but this decision is up to the target's lowering request. + Register DstReg = MI.getOperand(0).getReg(); + + unsigned NewBits = WideTy.getScalarSizeInBits(); + unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits(); + + auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1)); + auto RHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); + auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount); + auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK); + auto ShiftR = MIRBuilder.buildShl(WideTy, RHS, ShiftK); + + auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, + {ShiftL, ShiftR}, MI.getFlags()); + + // Use a shift that will preserve the number of sign bits when the trunc is + // folded away. + auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK) + : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK); + MIRBuilder.buildTrunc(DstReg, Result); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -1444,28 +1693,30 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_USUBO: { if (TypeIdx == 1) return UnableToLegalize; // TODO - auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, - {MI.getOperand(2).getReg()}); - auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, - {MI.getOperand(3).getReg()}); + auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2)); + auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3)); unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO ? TargetOpcode::G_ADD : TargetOpcode::G_SUB; // Do the arithmetic in the larger type. auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); - APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); - auto AndOp = MIRBuilder.buildInstr( - TargetOpcode::G_AND, {WideTy}, - {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); + APInt Mask = + APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits()); + auto AndOp = MIRBuilder.buildAnd( + WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask)); // There is no overflow if the AndOp is the same as NewOp. - MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, - AndOp); + MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp); // Now trunc the NewOp to the original result. - MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); + MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_SSUBSAT: + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_USUBSAT: + return widenScalarAddSubSat(MI, TypeIdx, WideTy); case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -1500,9 +1751,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { // The correct result is NewOp - (Difference in widety and current ty). unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); - MIBNewOp = MIRBuilder.buildInstr( - TargetOpcode::G_SUB, {WideTy}, - {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); + MIBNewOp = MIRBuilder.buildSub( + WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)); } MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); @@ -1525,10 +1775,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { LLT Ty = MRI.getType(DstReg); unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); - MIRBuilder.buildInstr(TargetOpcode::G_LSHR) - .addDef(ShrReg) - .addUse(DstExt) - .addUse(ShiftAmtReg); + MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg); MIRBuilder.buildTrunc(DstReg, ShrReg); Observer.changedInstr(MI); @@ -1552,6 +1799,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_FREEZE: + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -1844,9 +2098,10 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { // TODO: Probably should be zext widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); Observer.changedInstr(MI); + return Legalized; } - return Legalized; + return UnableToLegalize; } case TargetOpcode::G_FADD: case TargetOpcode::G_FMUL: @@ -1932,29 +2187,162 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_PTRMASK: { + if (TypeIdx != 1) + return UnableToLegalize; + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + } + } +} + +static void getUnmergePieces(SmallVectorImpl<Register> &Pieces, + MachineIRBuilder &B, Register Src, LLT Ty) { + auto Unmerge = B.buildUnmerge(Ty, Src); + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + Pieces.push_back(Unmerge.getReg(I)); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerBitcast(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (SrcTy.isVector()) { + LLT SrcEltTy = SrcTy.getElementType(); + SmallVector<Register, 8> SrcRegs; + + if (DstTy.isVector()) { + int NumDstElt = DstTy.getNumElements(); + int NumSrcElt = SrcTy.getNumElements(); + + LLT DstEltTy = DstTy.getElementType(); + LLT DstCastTy = DstEltTy; // Intermediate bitcast result type + LLT SrcPartTy = SrcEltTy; // Original unmerge result type. + + // If there's an element size mismatch, insert intermediate casts to match + // the result element type. + if (NumSrcElt < NumDstElt) { // Source element type is larger. + // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>) + // + // => + // + // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0 + // %3:_(<2 x s8>) = G_BITCAST %2 + // %4:_(<2 x s8>) = G_BITCAST %3 + // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4 + DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy); + SrcPartTy = SrcEltTy; + } else if (NumSrcElt > NumDstElt) { // Source element type is smaller. + // + // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>) + // + // => + // + // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0 + // %3:_(s16) = G_BITCAST %2 + // %4:_(s16) = G_BITCAST %3 + // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4 + SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy); + DstCastTy = DstEltTy; + } + + getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy); + for (Register &SrcReg : SrcRegs) + SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0); + } else + getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy); + + MIRBuilder.buildMerge(Dst, SrcRegs); + MI.eraseFromParent(); + return Legalized; + } + + if (DstTy.isVector()) { + SmallVector<Register, 8> SrcRegs; + getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType()); + MIRBuilder.buildMerge(Dst, SrcRegs); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { + switch (MI.getOpcode()) { + case TargetOpcode::G_LOAD: { + if (TypeIdx != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + bitcastDst(MI, CastTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_STORE: { + if (TypeIdx != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + bitcastSrc(MI, CastTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_SELECT: { + if (TypeIdx != 0) + return UnableToLegalize; + + if (MRI.getType(MI.getOperand(1).getReg()).isVector()) { + LLVM_DEBUG( + dbgs() << "bitcast action not implemented for vector select\n"); + return UnableToLegalize; + } + + Observer.changingInstr(MI); + bitcastSrc(MI, CastTy, 2); + bitcastSrc(MI, CastTy, 3); + bitcastDst(MI, CastTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: { + Observer.changingInstr(MI); + bitcastSrc(MI, CastTy, 1); + bitcastSrc(MI, CastTy, 2); + bitcastDst(MI, CastTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + default: + return UnableToLegalize; } } LegalizerHelper::LegalizeResult LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { using namespace TargetOpcode; - MIRBuilder.setInstr(MI); switch(MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_BITCAST: + return lowerBitcast(MI); case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { - Register QuotReg = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) - .addDef(QuotReg) - .addUse(MI.getOperand(1).getReg()) - .addUse(MI.getOperand(2).getReg()); - - Register ProdReg = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); - MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), - ProdReg); + auto Quot = + MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty}, + {MI.getOperand(1), MI.getOperand(2)}); + + auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2)); + MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod); MI.eraseFromParent(); return Legalized; } @@ -1970,36 +2358,30 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { Register LHS = MI.getOperand(2).getReg(); Register RHS = MI.getOperand(3).getReg(); - MIRBuilder.buildMul(Res, LHS, RHS); - unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO ? TargetOpcode::G_SMULH : TargetOpcode::G_UMULH; - Register HiPart = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildInstr(Opcode) - .addDef(HiPart) - .addUse(LHS) - .addUse(RHS); + Observer.changingInstr(MI); + const auto &TII = MIRBuilder.getTII(); + MI.setDesc(TII.get(TargetOpcode::G_MUL)); + MI.RemoveOperand(1); + Observer.changedInstr(MI); - Register Zero = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildConstant(Zero, 0); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + + auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); + auto Zero = MIRBuilder.buildConstant(Ty, 0); // For *signed* multiply, overflow is detected by checking: // (hi != (lo >> bitwidth-1)) if (Opcode == TargetOpcode::G_SMULH) { - Register Shifted = MRI.createGenericVirtualRegister(Ty); - Register ShiftAmt = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); - MIRBuilder.buildInstr(TargetOpcode::G_ASHR) - .addDef(Shifted) - .addUse(Res) - .addUse(ShiftAmt); + auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1); + auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt); MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); } else { MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); } - MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_FNEG: { @@ -2008,31 +2390,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { if (Ty.isVector()) return UnableToLegalize; Register Res = MI.getOperand(0).getReg(); - Type *ZeroTy; LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - switch (Ty.getSizeInBits()) { - case 16: - ZeroTy = Type::getHalfTy(Ctx); - break; - case 32: - ZeroTy = Type::getFloatTy(Ctx); - break; - case 64: - ZeroTy = Type::getDoubleTy(Ctx); - break; - case 128: - ZeroTy = Type::getFP128Ty(Ctx); - break; - default: - llvm_unreachable("unexpected floating-point type"); - } + Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty); + if (!ZeroTy) + return UnableToLegalize; ConstantFP &ZeroForNegation = *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); Register SubByReg = MI.getOperand(1).getReg(); - Register ZeroReg = Zero->getOperand(0).getReg(); - MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg}, - MI.getFlags()); + Register ZeroReg = Zero.getReg(0); + MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags()); MI.eraseFromParent(); return Legalized; } @@ -2046,13 +2413,15 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); Register Neg = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); - MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags()); + MIRBuilder.buildFNeg(Neg, RHS); + MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags()); MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_FMAD: return lowerFMad(MI); + case TargetOpcode::G_FFLOOR: + return lowerFFloor(MI); case TargetOpcode::G_INTRINSIC_ROUND: return lowerIntrinsicRound(MI); case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { @@ -2089,7 +2458,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // result values together, before truncating back down to the non-pow-2 // type. // E.g. v1 = i24 load => - // v2 = i32 load (2 byte) + // v2 = i32 zextload (2 byte) // v3 = i32 load (1 byte) // v4 = i32 shl v3, 16 // v5 = i32 or v4, v2 @@ -2110,11 +2479,11 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { LLT AnyExtTy = LLT::scalar(AnyExtSize); Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - auto LargeLoad = - MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO); + auto LargeLoad = MIRBuilder.buildLoadInstr( + TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); - auto OffsetCst = - MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); @@ -2186,8 +2555,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // Generate the PtrAdd and truncating stores. LLT PtrTy = MRI.getType(PtrReg); - auto OffsetCst = - MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); @@ -2226,12 +2595,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { Register LHS = MI.getOperand(2).getReg(); Register RHS = MI.getOperand(3).getReg(); Register CarryIn = MI.getOperand(4).getReg(); + LLT Ty = MRI.getType(Res); - Register TmpRes = MRI.createGenericVirtualRegister(Ty); - Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); - - MIRBuilder.buildAdd(TmpRes, LHS, RHS); - MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); + auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS); + auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn); MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); @@ -2256,17 +2623,15 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { Register LHS = MI.getOperand(2).getReg(); Register RHS = MI.getOperand(3).getReg(); Register BorrowIn = MI.getOperand(4).getReg(); + const LLT CondTy = MRI.getType(BorrowOut); + const LLT Ty = MRI.getType(Res); - Register TmpRes = MRI.createGenericVirtualRegister(Ty); - Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); - Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); - Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); - - MIRBuilder.buildSub(TmpRes, LHS, RHS); - MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); + auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS); + auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn); MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); - MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); - MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); + + auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS); + auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS); MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); MI.eraseFromParent(); @@ -2278,6 +2643,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerSITOFP(MI, TypeIdx, Ty); case G_FPTOUI: return lowerFPTOUI(MI, TypeIdx, Ty); + case G_FPTOSI: + return lowerFPTOSI(MI); + case G_FPTRUNC: + return lowerFPTRUNC(MI, TypeIdx, Ty); case G_SMIN: case G_SMAX: case G_UMIN: @@ -2288,6 +2657,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); + case G_MERGE_VALUES: + return lowerMergeValues(MI); case G_UNMERGE_VALUES: return lowerUnmergeValues(MI); case TargetOpcode::G_SEXT_INREG: { @@ -2300,8 +2671,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { Register TmpRes = MRI.createGenericVirtualRegister(DstTy); auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); - MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()}); - MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()}); + MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0)); + MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0)); MI.eraseFromParent(); return Legalized; } @@ -2318,7 +2689,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case G_BITREVERSE: return lowerBitreverse(MI); case G_READ_REGISTER: - return lowerReadRegister(MI); + case G_WRITE_REGISTER: + return lowerReadWriteRegister(MI); } } @@ -2350,99 +2722,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - const unsigned Opc = MI.getOpcode(); - const unsigned NumOps = MI.getNumOperands() - 1; - const unsigned NarrowSize = NarrowTy.getSizeInBits(); - const Register DstReg = MI.getOperand(0).getReg(); - const unsigned Flags = MI.getFlags(); - const LLT DstTy = MRI.getType(DstReg); - const unsigned Size = DstTy.getSizeInBits(); - const int NumParts = Size / NarrowSize; - const LLT EltTy = DstTy.getElementType(); - const unsigned EltSize = EltTy.getSizeInBits(); - const unsigned BitsForNumParts = NarrowSize * NumParts; - - // Check if we have any leftovers. If we do, then only handle the case where - // the leftover is one element. - if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) - return UnableToLegalize; - - if (BitsForNumParts != Size) { - Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy); - MIRBuilder.buildUndef(AccumDstReg); - - // Handle the pieces which evenly divide into the requested type with - // extract/op/insert sequence. - for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { - SmallVector<SrcOp, 4> SrcOps; - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); - SrcOps.push_back(PartOpReg); - } - - Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); - - Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy); - MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); - AccumDstReg = PartInsertReg; - } - - // Handle the remaining element sized leftover piece. - SmallVector<SrcOp, 4> SrcOps; - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register PartOpReg = MRI.createGenericVirtualRegister(EltTy); - MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), - BitsForNumParts); - SrcOps.push_back(PartOpReg); - } - - Register PartDstReg = MRI.createGenericVirtualRegister(EltTy); - MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); - MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); - MI.eraseFromParent(); - - return Legalized; - } - - SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; - - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); - - if (NumOps >= 2) - extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); - - if (NumOps >= 3) - extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); - - for (int i = 0; i < NumParts; ++i) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); - - if (NumOps == 1) - MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); - else if (NumOps == 2) { - MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); - } else if (NumOps == 3) { - MIRBuilder.buildInstr(Opc, {DstReg}, - {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); - } - - DstRegs.push_back(DstReg); - } - - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - - MI.eraseFromParent(); - return Legalized; -} - // Handle splitting vector operations which need to have the same number of // elements in each type index, but each type index may have a different element // type. @@ -2482,7 +2761,6 @@ LegalizerHelper::fewerElementsVectorMultiEltType( SmallVector<Register, 4> PartRegs, LeftoverRegs; for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - LLT LeftoverTy; Register SrcReg = MI.getOperand(I).getReg(); LLT SrcTyI = MRI.getType(SrcReg); LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); @@ -2571,9 +2849,8 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, for (unsigned I = 0; I < NumParts; ++I) { Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) - .addDef(DstReg) - .addUse(SrcRegs[I]); + MachineInstr *NewInst = + MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]}); NewInst->setFlags(MI.getFlags()); DstRegs.push_back(DstReg); @@ -2913,6 +3190,12 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, Register AddrReg = MI.getOperand(1).getReg(); LLT ValTy = MRI.getType(ValReg); + // FIXME: Do we need a distinct NarrowMemory legalize action? + if (ValTy.getSizeInBits() != 8 * MMO->getSize()) { + LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n"); + return UnableToLegalize; + } + int NumParts = -1; int NumLeftover = -1; LLT LeftoverTy; @@ -2981,14 +3264,147 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult +LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, + LLT NarrowTy) { + assert(TypeIdx == 0 && "only one type index expected"); + + const unsigned Opc = MI.getOpcode(); + const int NumOps = MI.getNumOperands() - 1; + const Register DstReg = MI.getOperand(0).getReg(); + const unsigned Flags = MI.getFlags(); + const unsigned NarrowSize = NarrowTy.getSizeInBits(); + const LLT NarrowScalarTy = LLT::scalar(NarrowSize); + + assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources"); + + // First of all check whether we are narrowing (changing the element type) + // or reducing the vector elements + const LLT DstTy = MRI.getType(DstReg); + const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType(); + + SmallVector<Register, 8> ExtractedRegs[3]; + SmallVector<Register, 8> Parts; + + unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + + // Break down all the sources into NarrowTy pieces we can operate on. This may + // involve creating merges to a wider type, padded with undef. + for (int I = 0; I != NumOps; ++I) { + Register SrcReg = MI.getOperand(I + 1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + // The type to narrow SrcReg to. For narrowing, this is a smaller scalar. + // For fewerElements, this is a smaller vector with the same element type. + LLT OpNarrowTy; + if (IsNarrow) { + OpNarrowTy = NarrowScalarTy; + + // In case of narrowing, we need to cast vectors to scalars for this to + // work properly + // FIXME: Can we do without the bitcast here if we're narrowing? + if (SrcTy.isVector()) { + SrcTy = LLT::scalar(SrcTy.getSizeInBits()); + SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0); + } + } else { + OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType()); + } + + LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); + + // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. + buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I], + TargetOpcode::G_ANYEXT); + } + + SmallVector<Register, 8> ResultRegs; + + // Input operands for each sub-instruction. + SmallVector<SrcOp, 4> InputRegs(NumOps, Register()); + + int NumParts = ExtractedRegs[0].size(); + const unsigned DstSize = DstTy.getSizeInBits(); + const LLT DstScalarTy = LLT::scalar(DstSize); + + // Narrowing needs to use scalar types + LLT DstLCMTy, NarrowDstTy; + if (IsNarrow) { + DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy); + NarrowDstTy = NarrowScalarTy; + } else { + DstLCMTy = getLCMType(DstTy, NarrowTy); + NarrowDstTy = NarrowTy; + } + + // We widened the source registers to satisfy merge/unmerge size + // constraints. We'll have some extra fully undef parts. + const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; + + for (int I = 0; I != NumRealParts; ++I) { + // Emit this instruction on each of the split pieces. + for (int J = 0; J != NumOps; ++J) + InputRegs[J] = ExtractedRegs[J][I]; + + auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); + ResultRegs.push_back(Inst.getReg(0)); + } + + // Fill out the widened result with undef instead of creating instructions + // with undef inputs. + int NumUndefParts = NumParts - NumRealParts; + if (NumUndefParts != 0) + ResultRegs.append(NumUndefParts, + MIRBuilder.buildUndef(NarrowDstTy).getReg(0)); + + // Extract the possibly padded result. Use a scratch register if we need to do + // a final bitcast, otherwise use the original result register. + Register MergeDstReg; + if (IsNarrow && DstTy.isVector()) + MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); + else + MergeDstReg = DstReg; + + buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs); + + // Recast to vector if we narrowed a vector + if (IsNarrow && DstTy.isVector()) + MIRBuilder.buildBitcast(DstReg, MergeDstReg); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + int64_t Imm = MI.getOperand(2).getImm(); + + LLT DstTy = MRI.getType(DstReg); + + SmallVector<Register, 8> Parts; + LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); + LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts); + + for (Register &R : Parts) + R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0); + + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { using namespace TargetOpcode; - MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { case G_IMPLICIT_DEF: return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); + case G_TRUNC: case G_AND: case G_OR: case G_XOR: @@ -3038,7 +3454,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FMAXNUM_IEEE: case G_FMINIMUM: case G_FMAXIMUM: - return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); + case G_FSHL: + case G_FSHR: + case G_FREEZE: + case G_SADDSAT: + case G_SSUBSAT: + case G_UADDSAT: + case G_USUBSAT: + return reduceOperationWidth(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: case G_ASHR: @@ -3076,6 +3499,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); + case G_SEXT_INREG: + return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); default: return UnableToLegalize; } @@ -3087,10 +3512,10 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, Register InL = MRI.createGenericVirtualRegister(HalfTy); Register InH = MRI.createGenericVirtualRegister(HalfTy); - MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); + MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); if (Amt.isNullValue()) { - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH}); + MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH}); MI.eraseFromParent(); return Legalized; } @@ -3163,7 +3588,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, } } - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()}); + MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi}); MI.eraseFromParent(); return Legalized; @@ -3211,7 +3636,7 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, Register InL = MRI.createGenericVirtualRegister(HalfTy); Register InH = MRI.createGenericVirtualRegister(HalfTy); - MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); + MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); @@ -3302,7 +3727,6 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy) { - MIRBuilder.setInstr(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { case TargetOpcode::G_IMPLICIT_DEF: @@ -3349,6 +3773,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_INSERT: + case TargetOpcode::G_FREEZE: if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -3479,10 +3904,10 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); - SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs; + SmallVector<Register, 2> Src1Parts, Src2Parts; + SmallVector<Register, 2> DstTmpRegs(DstTmpParts); extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); - DstTmpRegs.resize(DstTmpParts); multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); // Take only high half of registers if this is high mul. @@ -3550,10 +3975,12 @@ LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, } Register DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) + if (MRI.getType(DstReg).isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); - else + else if (DstRegs.size() > 1) MIRBuilder.buildMerge(DstReg, DstRegs); + else + MIRBuilder.buildCopy(DstReg, DstRegs[0]); MI.eraseFromParent(); return Legalized; } @@ -3657,14 +4084,14 @@ LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Src0Regs[I], Src1Regs[I]}); - DstRegs.push_back(Inst->getOperand(0).getReg()); + DstRegs.push_back(Inst.getReg(0)); } for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { auto Inst = MIRBuilder.buildInstr( MI.getOpcode(), {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); - DstLeftoverRegs.push_back(Inst->getOperand(0).getReg()); + DstLeftoverRegs.push_back(Inst.getReg(0)); } insertParts(DstReg, DstTy, NarrowTy, DstRegs, @@ -3675,6 +4102,28 @@ LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + + LLT DstTy = MRI.getType(DstReg); + if (DstTy.isVector()) + return UnableToLegalize; + + SmallVector<Register, 8> Parts; + LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); + LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode()); + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { if (TypeIdx != 0) @@ -3704,13 +4153,13 @@ LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { auto Select = MIRBuilder.buildSelect(NarrowTy, CondReg, Src1Regs[I], Src2Regs[I]); - DstRegs.push_back(Select->getOperand(0).getReg()); + DstRegs.push_back(Select.getReg(0)); } for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { auto Select = MIRBuilder.buildSelect( LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); - DstLeftoverRegs.push_back(Select->getOperand(0).getReg()); + DstLeftoverRegs.push_back(Select.getReg(0)); } insertParts(DstReg, DstTy, NarrowTy, DstRegs, @@ -3721,6 +4170,103 @@ LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + + if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { + const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF; + + MachineIRBuilder &B = MIRBuilder; + auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); + // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi) + auto C_0 = B.buildConstant(NarrowTy, 0); + auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), + UnmergeSrc.getReg(1), C_0); + auto LoCTLZ = IsUndef ? + B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) : + B.buildCTLZ(DstTy, UnmergeSrc.getReg(0)); + auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); + auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize); + auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)); + B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ); + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + + if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { + const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF; + + MachineIRBuilder &B = MIRBuilder; + auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); + // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo) + auto C_0 = B.buildConstant(NarrowTy, 0); + auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), + UnmergeSrc.getReg(0), C_0); + auto HiCTTZ = IsUndef ? + B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) : + B.buildCTTZ(DstTy, UnmergeSrc.getReg(1)); + auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); + auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize); + auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)); + B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ); + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + + if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { + auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); + + auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0)); + auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1)); + MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP); + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { unsigned Opc = MI.getOpcode(); auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); @@ -3739,18 +4285,20 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_CTLZ: { + Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + unsigned Len = SrcTy.getSizeInBits(); + + if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) { // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. - auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, - {Ty}, {SrcReg}); - auto MIBZero = MIRBuilder.buildConstant(Ty, 0); - auto MIBLen = MIRBuilder.buildConstant(Ty, Len); - auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), - SrcReg, MIBZero); - MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, - MIBCtlzZU); + auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg); + auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0); + auto ICmp = MIRBuilder.buildICmp( + CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc); + auto LenConst = MIRBuilder.buildConstant(DstTy, Len); + MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU); MI.eraseFromParent(); return Legalized; } @@ -3768,16 +4316,14 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { Register Op = SrcReg; unsigned NewLen = PowerOf2Ceil(Len); for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { - auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); - auto MIBOp = MIRBuilder.buildInstr( - TargetOpcode::G_OR, {Ty}, - {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, - {Op, MIBShiftAmt})}); - Op = MIBOp->getOperand(0).getReg(); + auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i); + auto MIBOp = MIRBuilder.buildOr( + SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt)); + Op = MIBOp.getReg(0); } - auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); - MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, - {MIRBuilder.buildConstant(Ty, Len), MIBPop}); + auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op); + MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len), + MIBPop); MI.eraseFromParent(); return Legalized; } @@ -3789,19 +4335,21 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_CTTZ: { + Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + + unsigned Len = SrcTy.getSizeInBits(); + if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) { // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with // zero. - auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, - {Ty}, {SrcReg}); - auto MIBZero = MIRBuilder.buildConstant(Ty, 0); - auto MIBLen = MIRBuilder.buildConstant(Ty, Len); - auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), - SrcReg, MIBZero); - MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, - MIBCttzZU); + auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg); + auto Zero = MIRBuilder.buildConstant(SrcTy, 0); + auto ICmp = MIRBuilder.buildICmp( + CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero); + auto LenConst = MIRBuilder.buildConstant(DstTy, Len); + MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU); MI.eraseFromParent(); return Legalized; } @@ -3810,24 +4358,70 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // { return 32 - nlz(~x & (x-1)); } // Ref: "Hacker's Delight" by Henry Warren auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); - auto MIBNot = - MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); - auto MIBTmp = MIRBuilder.buildInstr( - TargetOpcode::G_AND, {Ty}, - {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, - {SrcReg, MIBCstNeg1})}); + auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1); + auto MIBTmp = MIRBuilder.buildAnd( + Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1)); if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); - MIRBuilder.buildInstr( - TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, - {MIBCstLen, - MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); + MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen, + MIRBuilder.buildCTLZ(Ty, MIBTmp)); MI.eraseFromParent(); return Legalized; } MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); - MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); + MI.getOperand(1).setReg(MIBTmp.getReg(0)); + return Legalized; + } + case TargetOpcode::G_CTPOP: { + unsigned Size = Ty.getSizeInBits(); + MachineIRBuilder &B = MIRBuilder; + + // Count set bits in blocks of 2 bits. Default approach would be + // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 } + // We use following formula instead: + // B2Count = val - { (val >> 1) & 0x55555555 } + // since it gives same result in blocks of 2 with one instruction less. + auto C_1 = B.buildConstant(Ty, 1); + auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1); + APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55)); + auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0); + auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0); + auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi); + + // In order to get count in blocks of 4 add values from adjacent block of 2. + // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 } + auto C_2 = B.buildConstant(Ty, 2); + auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2); + APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33)); + auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0); + auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0); + auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0); + auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count); + + // For count in blocks of 8 bits we don't have to mask high 4 bits before + // addition since count value sits in range {0,...,8} and 4 bits are enough + // to hold such binary values. After addition high 4 bits still hold count + // of set bits in high 4 bit block, set them to zero and get 8 bit result. + // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F + auto C_4 = B.buildConstant(Ty, 4); + auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4); + auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count); + APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F)); + auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0); + auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0); + + assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm"); + // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this + // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks. + auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01))); + auto ResTmp = B.buildMul(Ty, B8Count, MulMask); + + // Shift count result from 8 high bits to low bits. + auto C_SizeM8 = B.buildConstant(Ty, Size - 8); + B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); + + MI.eraseFromParent(); return Legalized; } } @@ -3888,6 +4482,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); MIRBuilder.buildAdd(Dst, V, R); + MI.eraseFromParent(); return Legalized; } @@ -3960,6 +4555,7 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, MIRBuilder.buildConstant(S64, 0)); MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); + MI.eraseFromParent(); return Legalized; } @@ -4010,6 +4606,195 @@ LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + // FIXME: Only f32 to i64 conversions are supported. + if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64) + return UnableToLegalize; + + // Expand f32 -> i64 conversion + // This algorithm comes from compiler-rt's implementation of fixsfdi: + // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c + + unsigned SrcEltBits = SrcTy.getScalarSizeInBits(); + + auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000); + auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23); + + auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask); + auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit); + + auto SignMask = MIRBuilder.buildConstant(SrcTy, + APInt::getSignMask(SrcEltBits)); + auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask); + auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1); + auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit); + Sign = MIRBuilder.buildSExt(DstTy, Sign); + + auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF); + auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask); + auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000); + + auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K); + R = MIRBuilder.buildZExt(DstTy, R); + + auto Bias = MIRBuilder.buildConstant(SrcTy, 127); + auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias); + auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit); + auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent); + + auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent); + auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub); + + const LLT S1 = LLT::scalar(1); + auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, + S1, Exponent, ExponentLoBit); + + R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl); + + auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign); + auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign); + + auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0); + + auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, + S1, Exponent, ZeroSrcTy); + + auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0); + MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret); + + MI.eraseFromParent(); + return Legalized; +} + +// f64 -> f16 conversion using round-to-nearest-even rounding mode. +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + + if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. + return UnableToLegalize; + + const unsigned ExpMask = 0x7ff; + const unsigned ExpBiasf64 = 1023; + const unsigned ExpBiasf16 = 15; + const LLT S32 = LLT::scalar(32); + const LLT S1 = LLT::scalar(1); + + auto Unmerge = MIRBuilder.buildUnmerge(S32, Src); + Register U = Unmerge.getReg(0); + Register UH = Unmerge.getReg(1); + + auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20)); + E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask)); + + // Subtract the fp64 exponent bias (1023) to get the real exponent and + // add the f16 bias (15) to get the biased exponent for the f16 format. + E = MIRBuilder.buildAdd( + S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16)); + + auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8)); + M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe)); + + auto MaskedSig = MIRBuilder.buildAnd(S32, UH, + MIRBuilder.buildConstant(S32, 0x1ff)); + MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U); + + auto Zero = MIRBuilder.buildConstant(S32, 0); + auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero); + auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0); + M = MIRBuilder.buildOr(S32, M, Lo40Set); + + // (M != 0 ? 0x0200 : 0) | 0x7c00; + auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200); + auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero); + auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero); + + auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00); + auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00); + + // N = M | (E << 12); + auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12)); + auto N = MIRBuilder.buildOr(S32, M, EShl12); + + // B = clamp(1-E, 0, 13); + auto One = MIRBuilder.buildConstant(S32, 1); + auto OneSubExp = MIRBuilder.buildSub(S32, One, E); + auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero); + B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13)); + + auto SigSetHigh = MIRBuilder.buildOr(S32, M, + MIRBuilder.buildConstant(S32, 0x1000)); + + auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B); + auto D0 = MIRBuilder.buildShl(S32, D, B); + + auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, + D0, SigSetHigh); + auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh); + D = MIRBuilder.buildOr(S32, D, D1); + + auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One); + auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N); + + auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7)); + V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2)); + + auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3, + MIRBuilder.buildConstant(S32, 3)); + auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3); + + auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3, + MIRBuilder.buildConstant(S32, 5)); + auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5); + + V1 = MIRBuilder.buildOr(S32, V0, V1); + V = MIRBuilder.buildAdd(S32, V, V1); + + auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, + E, MIRBuilder.buildConstant(S32, 30)); + V = MIRBuilder.buildSelect(S32, CmpEGt30, + MIRBuilder.buildConstant(S32, 0x7c00), V); + + auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, + E, MIRBuilder.buildConstant(S32, 1039)); + V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V); + + // Extract the sign bit. + auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16)); + Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000)); + + // Insert the sign bit + V = MIRBuilder.buildOr(S32, Sign, V); + + MIRBuilder.buildTrunc(Dst, V); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + const LLT S64 = LLT::scalar(64); + const LLT S16 = LLT::scalar(16); + + if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64) + return lowerFPTRUNC_F64_TO_F16(MI); + + return UnableToLegalize; +} + static CmpInst::Predicate minMaxToCompare(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: @@ -4063,7 +4848,7 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MachineInstr *Or; if (Src0Ty == Src1Ty) { - auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask); + auto And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask); Or = MIRBuilder.buildOr(Dst, And0, And1); } else if (Src0Size > Src1Size) { auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); @@ -4136,6 +4921,39 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { Register DstReg = MI.getOperand(0).getReg(); + Register X = MI.getOperand(1).getReg(); + const unsigned Flags = MI.getFlags(); + const LLT Ty = MRI.getType(DstReg); + const LLT CondTy = Ty.changeElementSize(1); + + // round(x) => + // t = trunc(x); + // d = fabs(x - t); + // o = copysign(1.0f, x); + // return t + (d >= 0.5 ? o : 0.0); + + auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags); + + auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags); + auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags); + auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); + auto One = MIRBuilder.buildFConstant(Ty, 1.0); + auto Half = MIRBuilder.buildFConstant(Ty, 0.5); + auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X); + + auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, + Flags); + auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags); + + MIRBuilder.buildFAdd(DstReg, T, Sel, Flags); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFFloor(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); unsigned Flags = MI.getFlags(); LLT Ty = MRI.getType(DstReg); @@ -4145,8 +4963,8 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { // if (src < 0.0 && src != result) // result += -1.0. - auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags); + auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy, SrcReg, Zero, Flags); @@ -4155,7 +4973,48 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc); auto AddVal = MIRBuilder.buildSITOFP(Ty, And); - MIRBuilder.buildFAdd(DstReg, Trunc, AddVal); + MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMergeValues(MachineInstr &MI) { + const unsigned NumOps = MI.getNumOperands(); + Register DstReg = MI.getOperand(0).getReg(); + Register Src0Reg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(Src0Reg); + unsigned PartSize = SrcTy.getSizeInBits(); + + LLT WideTy = LLT::scalar(DstTy.getSizeInBits()); + Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0); + + for (unsigned I = 2; I != NumOps; ++I) { + const unsigned Offset = (I - 1) * PartSize; + + Register SrcReg = MI.getOperand(I).getReg(); + auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); + + Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : + MRI.createGenericVirtualRegister(WideTy); + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); + auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); + MIRBuilder.buildOr(NextResult, ResultReg, Shl); + ResultReg = NextResult; + } + + if (DstTy.isPointer()) { + if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace( + DstTy.getAddressSpace())) { + LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n"); + return UnableToLegalize; + } + + MIRBuilder.buildIntToPtr(DstReg, ResultReg); + } + MI.eraseFromParent(); return Legalized; } @@ -4163,34 +5022,31 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { const unsigned NumDst = MI.getNumOperands() - 1; - const Register SrcReg = MI.getOperand(NumDst).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - + Register SrcReg = MI.getOperand(NumDst).getReg(); Register Dst0Reg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(Dst0Reg); + if (DstTy.isPointer()) + return UnableToLegalize; // TODO + SrcReg = coerceToScalar(SrcReg); + if (!SrcReg) + return UnableToLegalize; // Expand scalarizing unmerge as bitcast to integer and shift. - if (!DstTy.isVector() && SrcTy.isVector() && - SrcTy.getElementType() == DstTy) { - LLT IntTy = LLT::scalar(SrcTy.getSizeInBits()); - Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0); - - MIRBuilder.buildTrunc(Dst0Reg, Cast); - - const unsigned DstSize = DstTy.getSizeInBits(); - unsigned Offset = DstSize; - for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { - auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); - auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt); - MIRBuilder.buildTrunc(MI.getOperand(I), Shift); - } + LLT IntTy = MRI.getType(SrcReg); - MI.eraseFromParent(); - return Legalized; + MIRBuilder.buildTrunc(Dst0Reg, SrcReg); + + const unsigned DstSize = DstTy.getSizeInBits(); + unsigned Offset = DstSize; + for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { + auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); + auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt); + MIRBuilder.buildTrunc(MI.getOperand(I), Shift); } - return UnableToLegalize; + MI.eraseFromParent(); + return Legalized; } LegalizerHelper::LegalizeResult @@ -4251,16 +5107,19 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + const auto &MF = *MI.getMF(); + const auto &TFI = *MF.getSubtarget().getFrameLowering(); + if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) + return UnableToLegalize; + Register Dst = MI.getOperand(0).getReg(); Register AllocSize = MI.getOperand(1).getReg(); - unsigned Align = MI.getOperand(2).getImm(); - - const auto &MF = *MI.getMF(); - const auto &TLI = *MF.getSubtarget().getTargetLowering(); + Align Alignment = assumeAligned(MI.getOperand(2).getImm()); LLT PtrTy = MRI.getType(Dst); LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); @@ -4269,8 +5128,8 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { // have to generate an extra instruction to negate the alloc and then use // G_PTR_ADD to add the negative offset. auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); - if (Align) { - APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true); + if (Alignment > Align(1)) { + APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true); AlignMask.negate(); auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); @@ -4326,34 +5185,47 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { LLT DstTy = MRI.getType(Src); LLT InsertTy = MRI.getType(InsertSrc); - if (InsertTy.isScalar() && - (DstTy.isScalar() || - (DstTy.isVector() && DstTy.getElementType() == InsertTy))) { - LLT IntDstTy = DstTy; - if (!DstTy.isScalar()) { - IntDstTy = LLT::scalar(DstTy.getSizeInBits()); - Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0); - } + if (InsertTy.isVector() || + (DstTy.isVector() && DstTy.getElementType() != InsertTy)) + return UnableToLegalize; - Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); - if (Offset != 0) { - auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); - ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); - } + const DataLayout &DL = MIRBuilder.getDataLayout(); + if ((DstTy.isPointer() && + DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) || + (InsertTy.isPointer() && + DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) { + LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n"); + return UnableToLegalize; + } - APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset, - InsertTy.getSizeInBits()); + LLT IntDstTy = DstTy; - auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); - auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); - auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + if (!DstTy.isScalar()) { + IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0); + } - MIRBuilder.buildBitcast(Dst, Or); - MI.eraseFromParent(); - return Legalized; + if (!InsertTy.isScalar()) { + const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits()); + InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0); } - return UnableToLegalize; + Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); + if (Offset != 0) { + auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); + ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); + } + + APInt MaskVal = APInt::getBitsSetWithWrap( + DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset); + + auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); + auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); + auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + + MIRBuilder.buildCast(Dst, Or); + MI.eraseFromParent(); + return Legalized; } LegalizerHelper::LegalizeResult @@ -4397,7 +5269,7 @@ LegalizerHelper::lowerBswap(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); const LLT Ty = MRI.getType(Src); - unsigned SizeInBytes = Ty.getSizeInBytes(); + unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8; unsigned BaseShiftAmt = (SizeInBytes - 1) * 8; // Swap most and least significant byte, set remaining bytes in Res to zero. @@ -4470,20 +5342,29 @@ LegalizerHelper::lowerBitreverse(MachineInstr &MI) { } LegalizerHelper::LegalizeResult -LegalizerHelper::lowerReadRegister(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - const LLT Ty = MRI.getType(Dst); - const MDString *RegStr = cast<MDString>( - cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0)); - +LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { MachineFunction &MF = MIRBuilder.getMF(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetLowering *TLI = STI.getTargetLowering(); - Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF); - if (!Reg.isValid()) + + bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER; + int NameOpIdx = IsRead ? 1 : 0; + int ValRegIndex = IsRead ? 0 : 1; + + Register ValReg = MI.getOperand(ValRegIndex).getReg(); + const LLT Ty = MRI.getType(ValReg); + const MDString *RegStr = cast<MDString>( + cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0)); + + Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF); + if (!PhysReg.isValid()) return UnableToLegalize; - MIRBuilder.buildCopy(Dst, Reg); + if (IsRead) + MIRBuilder.buildCopy(ValReg, PhysReg); + else + MIRBuilder.buildCopy(PhysReg, ValReg); + MI.eraseFromParent(); return Legalized; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 02f6b39e0905..4abd0c4df97a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -59,6 +59,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) { case MoreElements: OS << "MoreElements"; break; + case Bitcast: + OS << "Bitcast"; + break; case Lower: OS << "Lower"; break; @@ -173,6 +176,9 @@ static bool mutationIsSane(const LegalizeRule &Rule, return true; } + case Bitcast: { + return OldTy != NewTy && OldTy.getSizeInBits() == NewTy.getSizeInBits(); + } default: return true; } @@ -500,8 +506,7 @@ LegalizerInfo::getAction(const MachineInstr &MI, SmallVector<LegalityQuery::MemDesc, 2> MemDescrs; for (const auto &MMO : MI.memoperands()) MemDescrs.push_back({8 * MMO->getSize() /* in bits */, - 8 * MMO->getAlignment(), - MMO->getOrdering()}); + 8 * MMO->getAlign().value(), MMO->getOrdering()}); return getAction({MI.getOpcode(), Types, MemDescrs}); } @@ -519,12 +524,6 @@ bool LegalizerInfo::isLegalOrCustom(const MachineInstr &MI, return Action == Legal || Action == Custom; } -bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder, - GISelChangeObserver &Observer) const { - return false; -} - LegalizerInfo::SizeAndActionsVec LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest( const SizeAndActionsVec &v, LegalizeAction IncreaseAction, @@ -575,6 +574,7 @@ LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) { LegalizeAction Action = Vec[VecIdx].second; switch (Action) { case Legal: + case Bitcast: case Lower: case Libcall: case Custom: @@ -681,12 +681,6 @@ LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { IntermediateType.getScalarSizeInBits())}; } -bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - return true; -} - unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const { return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index 1c4a668e5f31..a07416d08614 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" @@ -40,60 +41,6 @@ void Localizer::init(MachineFunction &MF) { TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction()); } -bool Localizer::shouldLocalize(const MachineInstr &MI) { - // Assuming a spill and reload of a value has a cost of 1 instruction each, - // this helper function computes the maximum number of uses we should consider - // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We - // break even in terms of code size when the original MI has 2 users vs - // choosing to potentially spill. Any more than 2 users we we have a net code - // size increase. This doesn't take into account register pressure though. - auto maxUses = [](unsigned RematCost) { - // A cost of 1 means remats are basically free. - if (RematCost == 1) - return UINT_MAX; - if (RematCost == 2) - return 2U; - - // Remat is too expensive, only sink if there's one user. - if (RematCost > 2) - return 1U; - llvm_unreachable("Unexpected remat cost"); - }; - - // Helper to walk through uses and terminate if we've reached a limit. Saves - // us spending time traversing uses if all we want to know is if it's >= min. - auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { - unsigned NumUses = 0; - auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end(); - for (; UI != UE && NumUses < MaxUses; ++UI) { - NumUses++; - } - // If we haven't reached the end yet then there are more than MaxUses users. - return UI == UE; - }; - - switch (MI.getOpcode()) { - default: - return false; - // Constants-like instructions should be close to their users. - // We don't want long live-ranges for them. - case TargetOpcode::G_CONSTANT: - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FRAME_INDEX: - case TargetOpcode::G_INTTOPTR: - return true; - case TargetOpcode::G_GLOBAL_VALUE: { - unsigned RematCost = TTI->getGISelRematGlobalCost(); - Register Reg = MI.getOperand(0).getReg(); - unsigned MaxUses = maxUses(RematCost); - if (MaxUses == UINT_MAX) - return true; // Remats are "free" so always localize. - bool B = isUsesAtMost(Reg, MaxUses); - return B; - } - } -} - void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); @@ -119,9 +66,10 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, // we only localize instructions in the entry block here. This might change if // we start doing CSE across blocks. auto &MBB = MF.front(); + auto &TL = *MF.getSubtarget().getTargetLowering(); for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) { MachineInstr &MI = *RI; - if (!shouldLocalize(MI)) + if (!TL.shouldLocalize(MI, TTI)) continue; LLVM_DEBUG(dbgs() << "Should localize: " << MI); assert(MI.getDesc().getNumDefs() == 1 && @@ -138,8 +86,13 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); dbgs() << "Checking use: " << MIUse << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); - if (isLocalUse(MOUse, MI, InsertMBB)) + if (isLocalUse(MOUse, MI, InsertMBB)) { + // Even if we're in the same block, if the block is very large we could + // still have many long live ranges. Try to do intra-block localization + // too. + LocalizedInstrs.insert(&MI); continue; + } LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); Changed = true; auto MBBAndReg = std::make_pair(InsertMBB, Reg); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp new file mode 100644 index 000000000000..6d606e5550f1 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp @@ -0,0 +1,113 @@ +//===----- llvm/CodeGen/GlobalISel/LostDebugLocObserver.cpp -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Tracks DebugLocs between checkpoints and verifies that they are transferred. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" + +using namespace llvm; + +#define LOC_DEBUG(X) DEBUG_WITH_TYPE(DebugType.str().c_str(), X) + +void LostDebugLocObserver::analyzeDebugLocations() { + if (LostDebugLocs.empty()) { + LOC_DEBUG(dbgs() << ".. No debug info was present\n"); + return; + } + if (PotentialMIsForDebugLocs.empty()) { + LOC_DEBUG( + dbgs() << ".. No instructions to carry debug info (dead code?)\n"); + return; + } + + LOC_DEBUG(dbgs() << ".. Searching " << PotentialMIsForDebugLocs.size() + << " instrs for " << LostDebugLocs.size() << " locations\n"); + SmallPtrSet<MachineInstr *, 4> FoundIn; + for (MachineInstr *MI : PotentialMIsForDebugLocs) { + if (!MI->getDebugLoc()) + continue; + // Check this first in case there's a matching line-0 location on both input + // and output. + if (MI->getDebugLoc().getLine() == 0) { + LOC_DEBUG( + dbgs() << ".. Assuming line-0 location covers remainder (if any)\n"); + return; + } + if (LostDebugLocs.erase(MI->getDebugLoc())) { + LOC_DEBUG(dbgs() << ".. .. found " << MI->getDebugLoc() << " in " << *MI); + FoundIn.insert(MI); + continue; + } + } + if (LostDebugLocs.empty()) + return; + + NumLostDebugLocs += LostDebugLocs.size(); + LOC_DEBUG({ + dbgs() << ".. Lost locations:\n"; + for (const DebugLoc &Loc : LostDebugLocs) { + dbgs() << ".. .. "; + Loc.print(dbgs()); + dbgs() << "\n"; + } + dbgs() << ".. MIs with matched locations:\n"; + for (MachineInstr *MI : FoundIn) + if (PotentialMIsForDebugLocs.erase(MI)) + dbgs() << ".. .. " << *MI; + dbgs() << ".. Remaining MIs with unmatched/no locations:\n"; + for (const MachineInstr *MI : PotentialMIsForDebugLocs) + dbgs() << ".. .. " << *MI; + }); +} + +void LostDebugLocObserver::checkpoint(bool CheckDebugLocs) { + if (CheckDebugLocs) + analyzeDebugLocations(); + PotentialMIsForDebugLocs.clear(); + LostDebugLocs.clear(); +} + +void LostDebugLocObserver::createdInstr(MachineInstr &MI) { + PotentialMIsForDebugLocs.insert(&MI); +} + +static bool irTranslatorNeverAddsLocations(unsigned Opcode) { + switch (Opcode) { + default: + return false; + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_GLOBAL_VALUE: + return true; + } +} + +void LostDebugLocObserver::erasingInstr(MachineInstr &MI) { + if (irTranslatorNeverAddsLocations(MI.getOpcode())) + return; + + PotentialMIsForDebugLocs.erase(&MI); + if (MI.getDebugLoc()) + LostDebugLocs.insert(MI.getDebugLoc()); +} + +void LostDebugLocObserver::changingInstr(MachineInstr &MI) { + if (irTranslatorNeverAddsLocations(MI.getOpcode())) + return; + + PotentialMIsForDebugLocs.erase(&MI); + if (MI.getDebugLoc()) + LostDebugLocs.insert(MI.getDebugLoc()); +} + +void LostDebugLocObserver::changedInstr(MachineInstr &MI) { + PotentialMIsForDebugLocs.insert(&MI); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 3f6622723bdc..10f696d6a3b3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -33,48 +33,10 @@ void MachineIRBuilder::setMF(MachineFunction &MF) { State.Observer = nullptr; } -void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) { - State.MBB = &MBB; - State.II = MBB.end(); - assert(&getMF() == MBB.getParent() && - "Basic block is in a different function"); -} - -void MachineIRBuilder::setInstr(MachineInstr &MI) { - assert(MI.getParent() && "Instruction is not part of a basic block"); - setMBB(*MI.getParent()); - State.II = MI.getIterator(); -} - -void MachineIRBuilder::setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; } - -void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator II) { - assert(MBB.getParent() == &getMF() && - "Basic block is in a different function"); - State.MBB = &MBB; - State.II = II; -} - -void MachineIRBuilder::recordInsertion(MachineInstr *InsertedInstr) const { - if (State.Observer) - State.Observer->createdInstr(*InsertedInstr); -} - -void MachineIRBuilder::setChangeObserver(GISelChangeObserver &Observer) { - State.Observer = &Observer; -} - -void MachineIRBuilder::stopObservingChanges() { State.Observer = nullptr; } - //------------------------------------------------------------------------------ // Build instruction variants. //------------------------------------------------------------------------------ -MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) { - return insertInstr(buildInstrNoInsert(Opcode)); -} - MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) { MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode)); return MIB; @@ -135,7 +97,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, assert( cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); - auto MIB = buildInstr(TargetOpcode::DBG_VALUE); + auto MIB = buildInstrNoInsert(TargetOpcode::DBG_VALUE); if (auto *CI = dyn_cast<ConstantInt>(&C)) { if (CI->getBitWidth() > 64) MIB.addCImm(CI); @@ -148,7 +110,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, MIB.addReg(0U); } - return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); + MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); + return insertInstr(MIB); } MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { @@ -162,12 +125,12 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { MachineInstrBuilder MachineIRBuilder::buildDynStackAlloc(const DstOp &Res, const SrcOp &Size, - unsigned Align) { + Align Alignment) { assert(Res.getLLTTy(*getMRI()).isPointer() && "expected ptr dst type"); auto MIB = buildInstr(TargetOpcode::G_DYN_STACKALLOC); Res.addDefToMIB(*getMRI(), MIB); Size.addSrcToMIB(MIB); - MIB.addImm(Align); + MIB.addImm(Alignment.value()); return MIB; } @@ -199,14 +162,14 @@ MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy, .addJumpTableIndex(JTI); } -void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0, - const LLT &Op1) { +void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0, + const LLT Op1) { assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); assert((Res == Op0 && Res == Op1) && "type mismatch"); } -void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0, - const LLT &Op1) { +void MachineIRBuilder::validateShiftOp(const LLT Res, const LLT Op0, + const LLT Op1) { assert((Res.isScalar() || Res.isVector()) && "invalid operand type"); assert((Res == Op0) && "type mismatch"); } @@ -214,16 +177,16 @@ void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0, MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1) { - assert(Res.getLLTTy(*getMRI()).isPointer() && + assert(Res.getLLTTy(*getMRI()).getScalarType().isPointer() && Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); - assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type"); + assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type"); return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}); } Optional<MachineInstrBuilder> MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, - const LLT &ValueTy, uint64_t Value) { + const LLT ValueTy, uint64_t Value) { assert(Res == 0 && "Res is a result argument"); assert(ValueTy.isScalar() && "invalid offset type"); @@ -237,17 +200,14 @@ MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, return buildPtrAdd(Res, Op0, Cst.getReg(0)); } -MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res, - const SrcOp &Op0, - uint32_t NumBits) { - assert(Res.getLLTTy(*getMRI()).isPointer() && - Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); - - auto MIB = buildInstr(TargetOpcode::G_PTR_MASK); - Res.addDefToMIB(*getMRI(), MIB); - Op0.addSrcToMIB(MIB); - MIB.addImm(NumBits); - return MIB; +MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res, + const SrcOp &Op0, + uint32_t NumBits) { + LLT PtrTy = Res.getLLTTy(*getMRI()); + LLT MaskTy = LLT::scalar(PtrTy.getSizeInBits()); + Register MaskReg = getMRI()->createGenericVirtualRegister(MaskTy); + buildConstant(MaskReg, maskTrailingZeros<uint64_t>(NumBits)); + return buildPtrMask(Res, Op0, MaskReg); } MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { @@ -290,6 +250,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, } auto Const = buildInstr(TargetOpcode::G_CONSTANT); + Const->setDebugLoc(DebugLoc()); Res.addDefToMIB(*getMRI(), Const); Const.addCImm(&Val); return Const; @@ -323,6 +284,7 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res, } auto Const = buildInstr(TargetOpcode::G_FCONSTANT); + Const->setDebugLoc(DebugLoc()); Res.addDefToMIB(*getMRI(), Const); Const.addFPImm(&Val); return Const; @@ -377,6 +339,23 @@ MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode, return MIB; } +MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset( + const DstOp &Dst, const SrcOp &BasePtr, + MachineMemOperand &BaseMMO, int64_t Offset) { + LLT LoadTy = Dst.getLLTTy(*getMRI()); + MachineMemOperand *OffsetMMO = + getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy.getSizeInBytes()); + + if (Offset == 0) // This may be a size or type changing load. + return buildLoad(Dst, BasePtr, *OffsetMMO); + + LLT PtrTy = BasePtr.getLLTTy(*getMRI()); + LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + auto ConstOffset = buildConstant(OffsetTy, Offset); + auto Ptr = buildPtrAdd(PtrTy, BasePtr, ConstOffset); + return buildLoad(Dst, Ptr, *OffsetMMO); +} + MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO) { @@ -390,22 +369,6 @@ MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val, return MIB; } -MachineInstrBuilder MachineIRBuilder::buildUAddo(const DstOp &Res, - const DstOp &CarryOut, - const SrcOp &Op0, - const SrcOp &Op1) { - return buildInstr(TargetOpcode::G_UADDO, {Res, CarryOut}, {Op0, Op1}); -} - -MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res, - const DstOp &CarryOut, - const SrcOp &Op0, - const SrcOp &Op1, - const SrcOp &CarryIn) { - return buildInstr(TargetOpcode::G_UADDE, {Res, CarryOut}, - {Op0, Op1, CarryIn}); -} - MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res, const SrcOp &Op) { return buildInstr(TargetOpcode::G_ANYEXT, Res, Op); @@ -529,7 +492,7 @@ void MachineIRBuilder::buildSequence(Register Res, ArrayRef<Register> Ops, #ifndef NDEBUG assert(Ops.size() == Indices.size() && "incompatible args"); assert(!Ops.empty() && "invalid trivial sequence"); - assert(std::is_sorted(Indices.begin(), Indices.end()) && + assert(llvm::is_sorted(Indices) && "sequence offsets must be in ascending order"); assert(getMRI()->getType(Res).isValid() && "invalid operand type"); @@ -579,6 +542,13 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res, return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec); } +MachineInstrBuilder +MachineIRBuilder::buildMerge(const DstOp &Res, + std::initializer_list<SrcOp> Ops) { + assert(Ops.size() > 1); + return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, Ops); +} + MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res, const SrcOp &Op) { // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<DstOp>, @@ -642,22 +612,20 @@ MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) { return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec); } -MachineInstrBuilder MachineIRBuilder::buildInsert(Register Res, Register Src, - Register Op, unsigned Index) { - assert(Index + getMRI()->getType(Op).getSizeInBits() <= - getMRI()->getType(Res).getSizeInBits() && +MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res, + const SrcOp &Src, + const SrcOp &Op, + unsigned Index) { + assert(Index + Op.getLLTTy(*getMRI()).getSizeInBits() <= + Res.getLLTTy(*getMRI()).getSizeInBits() && "insertion past the end of a register"); - if (getMRI()->getType(Res).getSizeInBits() == - getMRI()->getType(Op).getSizeInBits()) { + if (Res.getLLTTy(*getMRI()).getSizeInBits() == + Op.getLLTTy(*getMRI()).getSizeInBits()) { return buildCast(Res, Op); } - return buildInstr(TargetOpcode::G_INSERT) - .addDef(Res) - .addUse(Src) - .addUse(Op) - .addImm(Index); + return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)}); } MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, @@ -907,7 +875,7 @@ MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) { return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA); } -void MachineIRBuilder::validateTruncExt(const LLT &DstTy, const LLT &SrcTy, +void MachineIRBuilder::validateTruncExt(const LLT DstTy, const LLT SrcTy, bool IsExtend) { #ifndef NDEBUG if (DstTy.isVector()) { @@ -926,8 +894,8 @@ void MachineIRBuilder::validateTruncExt(const LLT &DstTy, const LLT &SrcTy, #endif } -void MachineIRBuilder::validateSelectOp(const LLT &ResTy, const LLT &TstTy, - const LLT &Op0Ty, const LLT &Op1Ty) { +void MachineIRBuilder::validateSelectOp(const LLT ResTy, const LLT TstTy, + const LLT Op0Ty, const LLT Op1Ty) { #ifndef NDEBUG assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) && "invalid operand type"); @@ -970,7 +938,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, case TargetOpcode::G_SMIN: case TargetOpcode::G_SMAX: case TargetOpcode::G_UMIN: - case TargetOpcode::G_UMAX: { + case TargetOpcode::G_UMAX: + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_USUBSAT: + case TargetOpcode::G_SSUBSAT: { // All these are binary ops. assert(DstOps.size() == 1 && "Invalid Dst"); assert(SrcOps.size() == 2 && "Invalid Srcs"); @@ -1005,6 +977,13 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI()), false); break; } + case TargetOpcode::G_BITCAST: { + assert(DstOps.size() == 1 && "Invalid Dst"); + assert(SrcOps.size() == 1 && "Invalid Srcs"); + assert(DstOps[0].getLLTTy(*getMRI()).getSizeInBits() == + SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && "invalid bitcast"); + break; + } case TargetOpcode::COPY: assert(DstOps.size() == 1 && "Invalid Dst"); // If the caller wants to add a subreg source it has to be done separately diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 98e48f5fc1d5..356e0e437d32 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -693,6 +693,15 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode()) continue; + // Ignore inline asm instructions: they should use physical + // registers/regclasses + if (MI.isInlineAsm()) + continue; + + // Ignore debug info. + if (MI.isDebugInstr()) + continue; + if (!assignInstr(MI)) { reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", "unable to map instruction", MI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp index eeec2a5d536a..8a7fb4fbbf2d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -27,9 +28,9 @@ using namespace llvm; -unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, +Register llvm::constrainRegToClass(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, - const RegisterBankInfo &RBI, unsigned Reg, + const RegisterBankInfo &RBI, Register Reg, const TargetRegisterClass &RegClass) { if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) return MRI.createVirtualRegister(&RegClass); @@ -37,17 +38,16 @@ unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, return Reg; } -unsigned llvm::constrainOperandRegClass( +Register llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, - const TargetRegisterClass &RegClass, const MachineOperand &RegMO, - unsigned OpIdx) { + const TargetRegisterClass &RegClass, const MachineOperand &RegMO) { Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); - unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass); + Register ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass); // If we created a new virtual register because the class is not compatible // then create a copy between the new and the old register. if (ConstrainedReg != Reg) { @@ -63,11 +63,20 @@ unsigned llvm::constrainOperandRegClass( TII.get(TargetOpcode::COPY), Reg) .addReg(ConstrainedReg); } + } else { + if (GISelChangeObserver *Observer = MF.getObserver()) { + if (!RegMO.isDef()) { + MachineInstr *RegDef = MRI.getVRegDef(Reg); + Observer->changedInstr(*RegDef); + } + Observer->changingAllUsesOfReg(MRI, Reg); + Observer->finishedChangingAllUsesOfReg(); + } } return ConstrainedReg; } -unsigned llvm::constrainOperandRegClass( +Register llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, @@ -105,7 +114,7 @@ unsigned llvm::constrainOperandRegClass( return Reg; } return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass, - RegMO, OpIdx); + RegMO); } bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, @@ -155,6 +164,20 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, return true; } +bool llvm::canReplaceReg(Register DstReg, Register SrcReg, + MachineRegisterInfo &MRI) { + // Give up if either DstReg or SrcReg is a physical register. + if (DstReg.isPhysical() || SrcReg.isPhysical()) + return false; + // Give up if the types don't match. + if (MRI.getType(DstReg) != MRI.getType(SrcReg)) + return false; + // Replace if either DstReg has no constraints or the register + // constraints match. + return !MRI.getRegClassOrRegBank(DstReg) || + MRI.getRegClassOrRegBank(DstReg) == MRI.getRegClassOrRegBank(SrcReg); +} + bool llvm::isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI) { // If we can move an instruction, we can remove it. Otherwise, it has @@ -175,22 +198,37 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, return true; } -void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, - MachineOptimizationRemarkEmitter &MORE, - MachineOptimizationRemarkMissed &R) { - MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); - +static void reportGISelDiagnostic(DiagnosticSeverity Severity, + MachineFunction &MF, + const TargetPassConfig &TPC, + MachineOptimizationRemarkEmitter &MORE, + MachineOptimizationRemarkMissed &R) { + bool IsFatal = Severity == DS_Error && + TPC.isGlobalISelAbortEnabled(); // Print the function name explicitly if we don't have a debug location (which // makes the diagnostic less useful) or if we're going to emit a raw error. - if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled()) + if (!R.getLocation().isValid() || IsFatal) R << (" (in function: " + MF.getName() + ")").str(); - if (TPC.isGlobalISelAbortEnabled()) + if (IsFatal) report_fatal_error(R.getMsg()); else MORE.emit(R); } +void llvm::reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, + MachineOptimizationRemarkEmitter &MORE, + MachineOptimizationRemarkMissed &R) { + reportGISelDiagnostic(DS_Warning, MF, TPC, MORE, R); +} + +void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, + MachineOptimizationRemarkEmitter &MORE, + MachineOptimizationRemarkMissed &R) { + MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); + reportGISelDiagnostic(DS_Error, MF, TPC, MORE, R); +} + void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, const char *PassName, StringRef Msg, @@ -204,7 +242,7 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } -Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg, +Optional<int64_t> llvm::getConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI) { Optional<ValueAndVReg> ValAndVReg = getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false); @@ -216,7 +254,7 @@ Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg, } Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( - unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, + Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, bool HandleFConstant) { SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes; MachineInstr *MI; @@ -292,28 +330,51 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( return ValueAndVReg{Val.getSExtValue(), VReg}; } -const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg, - const MachineRegisterInfo &MRI) { +const llvm::ConstantFP * +llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { MachineInstr *MI = MRI.getVRegDef(VReg); if (TargetOpcode::G_FCONSTANT != MI->getOpcode()) return nullptr; return MI->getOperand(1).getFPImm(); } -llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, - const MachineRegisterInfo &MRI) { +namespace { +struct DefinitionAndSourceRegister { + llvm::MachineInstr *MI; + Register Reg; +}; +} // namespace + +static llvm::Optional<DefinitionAndSourceRegister> +getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { + Register DefSrcReg = Reg; auto *DefMI = MRI.getVRegDef(Reg); auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); if (!DstTy.isValid()) - return nullptr; + return None; while (DefMI->getOpcode() == TargetOpcode::COPY) { Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); if (!SrcTy.isValid() || SrcTy != DstTy) break; DefMI = MRI.getVRegDef(SrcReg); + DefSrcReg = SrcReg; } - return DefMI; + return DefinitionAndSourceRegister{DefMI, DefSrcReg}; +} + +llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, + const MachineRegisterInfo &MRI) { + Optional<DefinitionAndSourceRegister> DefSrcReg = + getDefSrcRegIgnoringCopies(Reg, MRI); + return DefSrcReg ? DefSrcReg->MI : nullptr; +} + +Register llvm::getSrcRegIgnoringCopies(Register Reg, + const MachineRegisterInfo &MRI) { + Optional<DefinitionAndSourceRegister> DefSrcReg = + getDefSrcRegIgnoringCopies(Reg, MRI); + return DefSrcReg ? DefSrcReg->Reg : Register(); } llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg, @@ -335,54 +396,59 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) { return APF; } -Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1, - const unsigned Op2, +Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, + const Register Op2, const MachineRegisterInfo &MRI) { - auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI); - if (MaybeOp1Cst && MaybeOp2Cst) { - LLT Ty = MRI.getType(Op1); - APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); - APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true); - switch (Opcode) { - default: + if (!MaybeOp2Cst) + return None; + + auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + if (!MaybeOp1Cst) + return None; + + LLT Ty = MRI.getType(Op1); + APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); + APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true); + switch (Opcode) { + default: + break; + case TargetOpcode::G_ADD: + return C1 + C2; + case TargetOpcode::G_AND: + return C1 & C2; + case TargetOpcode::G_ASHR: + return C1.ashr(C2); + case TargetOpcode::G_LSHR: + return C1.lshr(C2); + case TargetOpcode::G_MUL: + return C1 * C2; + case TargetOpcode::G_OR: + return C1 | C2; + case TargetOpcode::G_SHL: + return C1 << C2; + case TargetOpcode::G_SUB: + return C1 - C2; + case TargetOpcode::G_XOR: + return C1 ^ C2; + case TargetOpcode::G_UDIV: + if (!C2.getBoolValue()) break; - case TargetOpcode::G_ADD: - return C1 + C2; - case TargetOpcode::G_AND: - return C1 & C2; - case TargetOpcode::G_ASHR: - return C1.ashr(C2); - case TargetOpcode::G_LSHR: - return C1.lshr(C2); - case TargetOpcode::G_MUL: - return C1 * C2; - case TargetOpcode::G_OR: - return C1 | C2; - case TargetOpcode::G_SHL: - return C1 << C2; - case TargetOpcode::G_SUB: - return C1 - C2; - case TargetOpcode::G_XOR: - return C1 ^ C2; - case TargetOpcode::G_UDIV: - if (!C2.getBoolValue()) - break; - return C1.udiv(C2); - case TargetOpcode::G_SDIV: - if (!C2.getBoolValue()) - break; - return C1.sdiv(C2); - case TargetOpcode::G_UREM: - if (!C2.getBoolValue()) - break; - return C1.urem(C2); - case TargetOpcode::G_SREM: - if (!C2.getBoolValue()) - break; - return C1.srem(C2); - } + return C1.udiv(C2); + case TargetOpcode::G_SDIV: + if (!C2.getBoolValue()) + break; + return C1.sdiv(C2); + case TargetOpcode::G_UREM: + if (!C2.getBoolValue()) + break; + return C1.urem(C2); + case TargetOpcode::G_SREM: + if (!C2.getBoolValue()) + break; + return C1.srem(C2); } + return None; } @@ -411,7 +477,19 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, return false; } -Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, +Align llvm::inferAlignFromPtrInfo(MachineFunction &MF, + const MachinePointerInfo &MPO) { + auto PSV = MPO.V.dyn_cast<const PseudoSourceValue *>(); + if (auto FSPV = dyn_cast_or_null<FixedStackPseudoSourceValue>(PSV)) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + return commonAlignment(MFI.getObjectAlign(FSPV->getFrameIndex()), + MPO.Offset); + } + + return Align(1); +} + +Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI) { auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); @@ -431,3 +509,55 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } + +LLT llvm::getLCMType(LLT Ty0, LLT Ty1) { + if (!Ty0.isVector() && !Ty1.isVector()) { + unsigned Mul = Ty0.getSizeInBits() * Ty1.getSizeInBits(); + int GCDSize = greatestCommonDivisor(Ty0.getSizeInBits(), + Ty1.getSizeInBits()); + return LLT::scalar(Mul / GCDSize); + } + + if (Ty0.isVector() && !Ty1.isVector()) { + assert(Ty0.getElementType() == Ty1 && "not yet handled"); + return Ty0; + } + + if (Ty1.isVector() && !Ty0.isVector()) { + assert(Ty1.getElementType() == Ty0 && "not yet handled"); + return Ty1; + } + + if (Ty0.isVector() && Ty1.isVector()) { + assert(Ty0.getElementType() == Ty1.getElementType() && "not yet handled"); + + int GCDElts = greatestCommonDivisor(Ty0.getNumElements(), + Ty1.getNumElements()); + + int Mul = Ty0.getNumElements() * Ty1.getNumElements(); + return LLT::vector(Mul / GCDElts, Ty0.getElementType()); + } + + llvm_unreachable("not yet handled"); +} + +LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { + if (OrigTy.isVector() && TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy.getElementType()); + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigTy.getElementType()); + } + + if (OrigTy.isVector() && !TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy); + return TargetTy; + } + + assert(!OrigTy.isVector() && !TargetTy.isVector() && + "GCD type of vector and scalar not implemented"); + + int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), + TargetTy.getSizeInBits()); + return LLT::scalar(GCD); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp index 6e5593abb43e..1e20c02ba160 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp @@ -83,6 +83,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/SectionKind.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -463,7 +464,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, Type *Ty = Globals[j]->getValueType(); // Make sure we use the same alignment AsmPrinter would use. - Align Alignment(DL.getPreferredAlignment(Globals[j])); + Align Alignment = DL.getPreferredAlign(Globals[j]); unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize; MergedSize += Padding; MergedSize += DL.getTypeAllocSize(Ty); @@ -523,7 +524,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, const StructLayout *MergedLayout = DL.getStructLayout(MergedTy); for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); - std::string Name = Globals[k]->getName(); + std::string Name(Globals[k]->getName()); GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility(); GlobalValue::DLLStorageClassTypes DLLStorage = Globals[k]->getDLLStorageClass(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp index 65c2a37e5d43..0ba7e920e507 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp @@ -20,7 +20,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -35,7 +35,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" -#include "llvm/PassSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" @@ -43,6 +42,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #define DEBUG_TYPE "hardware-loops" @@ -245,14 +245,17 @@ bool HardwareLoops::runOnFunction(Function &F) { // converted and the parent loop doesn't support containing a hardware loop. bool HardwareLoops::TryConvertLoop(Loop *L) { // Process nested loops first. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { - if (TryConvertLoop(*I)) { - reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested", - ORE, L); - return true; // Stop search. - } + bool AnyChanged = false; + for (Loop *SL : *L) + AnyChanged |= TryConvertLoop(SL); + if (AnyChanged) { + reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested", + ORE, L); + return true; // Stop search. } + LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n"); + HardwareLoopInfo HWLoopInfo(L); if (!HWLoopInfo.canAnalyze(*LI)) { reportHWLoopFailure("cannot analyze loop, irreducible control flow", @@ -476,9 +479,7 @@ Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) { Function *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg, - { EltsRem->getType(), EltsRem->getType(), - LoopDecrement->getType() - }); + { EltsRem->getType() }); Value *Ops[] = { EltsRem, LoopDecrement }; Value *Call = CondBuilder.CreateCall(DecFunc, Ops); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp index 7d64828aa482..1a5c5d685017 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -447,7 +448,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = ST.getTargetLowering(); TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); - BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>()); + MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); ProfileSummaryInfo *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); @@ -462,10 +463,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. BranchFolder BF(true, false, MBFI, *MBPI, PSI); - auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); - BFChange = BF.OptimizeFunction( - MF, TII, ST.getRegisterInfo(), - MMIWP ? &MMIWP->getMMI() : nullptr); + BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo()); } LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" @@ -604,10 +602,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (MadeChange && IfCvtBranchFold) { BranchFolder BF(false, false, MBFI, *MBPI, PSI); - auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); - BF.OptimizeFunction( - MF, TII, MF.getSubtarget().getRegisterInfo(), - MMIWP ? &MMIWP->getMMI() : nullptr); + BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo()); } MadeChange |= BFChange; @@ -972,6 +967,11 @@ bool IfConverter::ValidDiamond( FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) return false; + // If the True and False BBs are equal we're dealing with a degenerate case + // that we don't treat as a diamond. + if (TrueBBI.BB == FalseBBI.BB) + return false; + MachineBasicBlock *TT = TrueBBI.TrueBB; MachineBasicBlock *FT = FalseBBI.TrueBB; @@ -1851,7 +1851,7 @@ bool IfConverter::IfConvertDiamondCommon( while (NumDups1 != 0) { // Since this instruction is going to be deleted, update call // site info state if the instruction is call instruction. - if (DI2->isCall(MachineInstr::IgnoreBundle)) + if (DI2->shouldUpdateCallSiteInfo()) MBB2.getParent()->eraseCallSiteInfo(&*DI2); ++DI2; @@ -1900,7 +1900,7 @@ bool IfConverter::IfConvertDiamondCommon( // Since this instruction is going to be deleted, update call // site info state if the instruction is call instruction. - if (DI1->isCall(MachineInstr::IgnoreBundle)) + if (DI1->shouldUpdateCallSiteInfo()) MBB1.getParent()->eraseCallSiteInfo(&*DI1); // skip dbg_value instructions @@ -2188,8 +2188,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(&I); // Make a copy of the call site info. - if (MI->isCall(MachineInstr::IgnoreBundle)) - MF.copyCallSiteInfo(&I,MI); + if (I.isCandidateForCallSiteEntry()) + MF.copyCallSiteInfo(&I, MI); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; @@ -2237,10 +2237,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, } /// Move all instructions from FromBB to the end of ToBB. This will leave -/// FromBB as an empty block, so remove all of its successor edges except for -/// the fall-through edge. If AddEdges is true, i.e., when FromBBI's branch is -/// being moved, add those successor edges to ToBBI and remove the old edge -/// from ToBBI to FromBBI. +/// FromBB as an empty block, so remove all of its successor edges and move it +/// to the end of the function. If AddEdges is true, i.e., when FromBBI's +/// branch is being moved, add those successor edges to ToBBI and remove the old +/// edge from ToBBI to FromBBI. void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { MachineBasicBlock &FromMBB = *FromBBI.BB; assert(!FromMBB.hasAddressTaken() && @@ -2280,8 +2280,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { for (MachineBasicBlock *Succ : FromSuccs) { // Fallthrough edge can't be transferred. - if (Succ == FallThrough) + if (Succ == FallThrough) { + FromMBB.removeSuccessor(Succ); continue; + } auto NewProb = BranchProbability::getZero(); if (AddEdges) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 0bbedb0a5ea6..16c9bfc672af 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -364,12 +364,18 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) { int64_t Offset; + bool OffsetIsScalable; const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) || + + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI) || !BaseOp->isReg() || BaseOp->getReg() != PointerReg) return SR_Unsuitable; + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) + return SR_Unsuitable; + // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. if (!(MI.mayLoadOrStore() && !MI.isPredicable() && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp index ed3e159ac566..41eef2fed840 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "Spiller.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -24,8 +23,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -40,6 +39,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/Spiller.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -113,10 +114,10 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { /// This is the map from original register to a set containing all its /// siblings. To hoist a spill to another BB, we need to find out a live /// sibling there and use it as the source of the new spill. - DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap; + DenseMap<Register, SmallSetVector<Register, 16>> Virt2SiblingsMap; bool isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, - MachineBasicBlock &BB, unsigned &LiveReg); + MachineBasicBlock &BB, Register &LiveReg); void rmRedundantSpills( SmallPtrSet<MachineInstr *, 16> &Spills, @@ -175,7 +176,7 @@ class InlineSpiller : public Spiller { unsigned Original; // All registers to spill to StackSlot, including the main register. - SmallVector<unsigned, 8> RegsToSpill; + SmallVector<Register, 8> RegsToSpill; // All COPY instructions to/from snippets. // They are ignored since both operands refer to the same stack slot. @@ -211,24 +212,24 @@ private: bool isSnippet(const LiveInterval &SnipLI); void collectRegsToSpill(); - bool isRegToSpill(unsigned Reg) { return is_contained(RegsToSpill, Reg); } + bool isRegToSpill(Register Reg) { return is_contained(RegsToSpill, Reg); } - bool isSibling(unsigned Reg); + bool isSibling(Register Reg); bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI); void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); void markValueUsed(LiveInterval*, VNInfo*); - bool canGuaranteeAssignmentAfterRemat(unsigned VReg, MachineInstr &MI); + bool canGuaranteeAssignmentAfterRemat(Register VReg, MachineInstr &MI); bool reMaterializeFor(LiveInterval &, MachineInstr &MI); void reMaterializeAll(); - bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); + bool coalesceStackAccess(MachineInstr *MI, Register Reg); bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>, MachineInstr *LoadMI = nullptr); - void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); - void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); + void insertReload(Register VReg, SlotIndex, MachineBasicBlock::iterator MI); + void insertSpill(Register VReg, bool isKill, MachineBasicBlock::iterator MI); - void spillAroundUses(unsigned Reg); + void spillAroundUses(Register Reg); void spillAll(); }; @@ -258,21 +259,21 @@ Spiller *llvm::createInlineSpiller(MachineFunctionPass &pass, /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, /// otherwise return 0. -static unsigned isFullCopyOf(const MachineInstr &MI, unsigned Reg) { +static Register isFullCopyOf(const MachineInstr &MI, Register Reg) { if (!MI.isFullCopy()) - return 0; + return Register(); if (MI.getOperand(0).getReg() == Reg) return MI.getOperand(1).getReg(); if (MI.getOperand(1).getReg() == Reg) return MI.getOperand(0).getReg(); - return 0; + return Register(); } /// isSnippet - Identify if a live interval is a snippet that should be spilled. /// It is assumed that SnipLI is a virtual register with the same original as /// Edit->getReg(). bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { - unsigned Reg = Edit->getReg(); + Register Reg = Edit->getReg(); // A snippet is a tiny live range with only a single instruction using it // besides copies to/from Reg or spills/fills. We accept: @@ -316,7 +317,7 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { /// collectRegsToSpill - Collect live range snippets that only have a single /// real use. void InlineSpiller::collectRegsToSpill() { - unsigned Reg = Edit->getReg(); + Register Reg = Edit->getReg(); // Main register always spills. RegsToSpill.assign(1, Reg); @@ -330,7 +331,7 @@ void InlineSpiller::collectRegsToSpill() { for (MachineRegisterInfo::reg_instr_iterator RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { MachineInstr &MI = *RI++; - unsigned SnipReg = isFullCopyOf(MI, Reg); + Register SnipReg = isFullCopyOf(MI, Reg); if (!isSibling(SnipReg)) continue; LiveInterval &SnipLI = LIS.getInterval(SnipReg); @@ -345,8 +346,8 @@ void InlineSpiller::collectRegsToSpill() { } } -bool InlineSpiller::isSibling(unsigned Reg) { - return Register::isVirtualRegister(Reg) && VRM.getOriginal(Reg) == Original; +bool InlineSpiller::isSibling(Register Reg) { + return Reg.isVirtual() && VRM.getOriginal(Reg) == Original; } /// It is beneficial to spill to earlier place in the same BB in case @@ -431,7 +432,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { do { LiveInterval *LI; std::tie(LI, VNI) = WorkList.pop_back_val(); - unsigned Reg = LI->reg; + Register Reg = LI->reg; LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); @@ -455,7 +456,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { continue; // Follow sibling copies down the dominator tree. - if (unsigned DstReg = isFullCopyOf(MI, Reg)) { + if (Register DstReg = isFullCopyOf(MI, Reg)) { if (isSibling(DstReg)) { LiveInterval &DstLI = LIS.getInterval(DstReg); VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); @@ -517,7 +518,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { } while (!WorkList.empty()); } -bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg, +bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg, MachineInstr &MI) { if (!RestrictStatepointRemat) return true; @@ -536,7 +537,19 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg, // At the moment, we only handle this for STATEPOINTs since they're the only // pseudo op where we've seen this. If we start seeing other instructions // with the same problem, we need to revisit this. - return (MI.getOpcode() != TargetOpcode::STATEPOINT); + if (MI.getOpcode() != TargetOpcode::STATEPOINT) + return true; + // For STATEPOINTs we allow re-materialization for fixed arguments only hoping + // that number of physical registers is enough to cover all fixed arguments. + // If it is not true we need to revisit it. + for (unsigned Idx = StatepointOpers(&MI).getVarIdx(), + EndIdx = MI.getNumOperands(); + Idx < EndIdx; ++Idx) { + MachineOperand &MO = MI.getOperand(Idx); + if (MO.isReg() && MO.getReg() == VReg) + return false; + } + return true; } /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. @@ -602,7 +615,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { } // Allocate a new register for the remat. - unsigned NewVReg = Edit->createFrom(Original); + Register NewVReg = Edit->createFrom(Original); // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = @@ -641,7 +654,7 @@ void InlineSpiller::reMaterializeAll() { // Try to remat before all uses of snippets. bool anyRemat = false; - for (unsigned Reg : RegsToSpill) { + for (Register Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); for (MachineRegisterInfo::reg_bundle_iterator RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); @@ -662,7 +675,7 @@ void InlineSpiller::reMaterializeAll() { return; // Remove any values that were completely rematted. - for (unsigned Reg : RegsToSpill) { + for (Register Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { @@ -692,7 +705,7 @@ void InlineSpiller::reMaterializeAll() { // So to get rid of unused reg, we need to check whether it has non-dbg // reference instead of whether it has non-empty interval. unsigned ResultPos = 0; - for (unsigned Reg : RegsToSpill) { + for (Register Reg : RegsToSpill) { if (MRI.reg_nodbg_empty(Reg)) { Edit->eraseVirtReg(Reg); continue; @@ -714,9 +727,9 @@ void InlineSpiller::reMaterializeAll() { //===----------------------------------------------------------------------===// /// If MI is a load or store of StackSlot, it can be removed. -bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { +bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, Register Reg) { int FI = 0; - unsigned InstrReg = TII.isLoadFromStackSlot(*MI, FI); + Register InstrReg = TII.isLoadFromStackSlot(*MI, FI); bool IsLoad = InstrReg; if (!IsLoad) InstrReg = TII.isStoreToStackSlot(*MI, FI); @@ -750,7 +763,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E, LiveIntervals const &LIS, const char *const header, - unsigned VReg =0) { + Register VReg = Register()) { char NextLine = '\n'; char SlotIndent = '\t'; @@ -795,7 +808,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, return false; bool WasCopy = MI->isCopy(); - unsigned ImpReg = 0; + Register ImpReg; // Spill subregs if the target allows it. // We always want to spill subregs for stackmap/patchpoint pseudos. @@ -864,7 +877,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, HSpiller.rmFromMergeableSpills(*MI, FI)) --NumSpills; LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); - if (MI->isCall()) + // Update the call site info. + if (MI->isCandidateForCallSiteEntry()) MI->getMF()->moveCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); @@ -898,7 +912,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, return true; } -void InlineSpiller::insertReload(unsigned NewVReg, +void InlineSpiller::insertReload(Register NewVReg, SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); @@ -917,47 +931,51 @@ void InlineSpiller::insertReload(unsigned NewVReg, /// Check if \p Def fully defines a VReg with an undefined value. /// If that's the case, that means the value of VReg is actually /// not relevant. -static bool isFullUndefDef(const MachineInstr &Def) { +static bool isRealSpill(const MachineInstr &Def) { if (!Def.isImplicitDef()) - return false; + return true; assert(Def.getNumOperands() == 1 && "Implicit def with more than one definition"); // We can say that the VReg defined by Def is undef, only if it is // fully defined by Def. Otherwise, some of the lanes may not be // undef and the value of the VReg matters. - return !Def.getOperand(0).getSubReg(); + return Def.getOperand(0).getSubReg(); } /// insertSpill - Insert a spill of NewVReg after MI. -void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, +void InlineSpiller::insertSpill(Register NewVReg, bool isKill, MachineBasicBlock::iterator MI) { + // Spill are not terminators, so inserting spills after terminators will + // violate invariants in MachineVerifier. + assert(!MI->isTerminator() && "Inserting a spill after a terminator"); MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI, &MBB); - bool IsRealSpill = true; - if (isFullUndefDef(*MI)) { + MachineBasicBlock::iterator SpillBefore = std::next(MI); + bool IsRealSpill = isRealSpill(*MI); + if (IsRealSpill) + TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + else // Don't spill undef value. // Anything works for undef, in particular keeping the memory // uninitialized is a viable option and it saves code size and // run time. - BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) + BuildMI(MBB, SpillBefore, MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) .addReg(NewVReg, getKillRegState(isKill)); - IsRealSpill = false; - } else - TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI); - LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); + MachineBasicBlock::iterator Spill = std::next(MI); + LIS.InsertMachineInstrRangeInMaps(Spill, MIS.end()); - LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, - "spill")); + LLVM_DEBUG( + dumpMachineInstrRangeWithSlotIndex(Spill, MIS.end(), LIS, "spill")); ++NumSpills; if (IsRealSpill) - HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); + HSpiller.addToMergeableSpills(*Spill, StackSlot, Original); } /// spillAroundUses - insert spill code around each use of Reg. -void InlineSpiller::spillAroundUses(unsigned Reg) { +void InlineSpiller::spillAroundUses(Register Reg) { LLVM_DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n'); LiveInterval &OldLI = LIS.getInterval(Reg); @@ -1000,7 +1018,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { Idx = VNI->def; // Check for a sibling copy. - unsigned SibReg = isFullCopyOf(*MI, Reg); + Register SibReg = isFullCopyOf(*MI, Reg); if (SibReg && isSibling(SibReg)) { // This may actually be a copy between snippets. if (isRegToSpill(SibReg)) { @@ -1029,7 +1047,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Create a new virtual register for spill/fill. // FIXME: Infer regclass from instruction alone. - unsigned NewVReg = Edit->createFrom(Reg); + Register NewVReg = Edit->createFrom(Reg); if (RI.Reads) insertReload(NewVReg, Idx, MI); @@ -1070,13 +1088,13 @@ void InlineSpiller::spillAll() { VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot); assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); - for (unsigned Reg : RegsToSpill) + for (Register Reg : RegsToSpill) StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg), StackInt->getValNumInfo(0)); LLVM_DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); // Spill around uses of all RegsToSpill. - for (unsigned Reg : RegsToSpill) + for (Register Reg : RegsToSpill) spillAroundUses(Reg); // Hoisted spills may cause dead code. @@ -1086,7 +1104,7 @@ void InlineSpiller::spillAll() { } // Finally delete the SnippetCopies. - for (unsigned Reg : RegsToSpill) { + for (Register Reg : RegsToSpill) { for (MachineRegisterInfo::reg_instr_iterator RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { @@ -1099,7 +1117,7 @@ void InlineSpiller::spillAll() { } // Delete all spilled registers. - for (unsigned Reg : RegsToSpill) + for (Register Reg : RegsToSpill) Edit->eraseVirtReg(Reg); } @@ -1168,18 +1186,18 @@ bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill, /// Check BB to see if it is a possible target BB to place a hoisted spill, /// i.e., there should be a living sibling of OrigReg at the insert point. bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, - MachineBasicBlock &BB, unsigned &LiveReg) { + MachineBasicBlock &BB, Register &LiveReg) { SlotIndex Idx; - unsigned OrigReg = OrigLI.reg; + Register OrigReg = OrigLI.reg; MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB); if (MI != BB.end()) Idx = LIS.getInstructionIndex(*MI); else Idx = LIS.getMBBEndIdx(&BB).getPrevSlot(); - SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg]; + SmallSetVector<Register, 16> &Siblings = Virt2SiblingsMap[OrigReg]; assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI"); - for (auto const SibReg : Siblings) { + for (const Register &SibReg : Siblings) { LiveInterval &LI = LIS.getInterval(SibReg); VNInfo *VNI = LI.getVNInfoAt(Idx); if (VNI) { @@ -1288,10 +1306,7 @@ void HoistSpillHelper::getVisitOrders( Orders.push_back(MDT.getBase().getNode(Root)); do { MachineDomTreeNode *Node = Orders[idx++]; - const std::vector<MachineDomTreeNode *> &Children = Node->getChildren(); - unsigned NumChildren = Children.size(); - for (unsigned i = 0; i != NumChildren; ++i) { - MachineDomTreeNode *Child = Children[i]; + for (MachineDomTreeNode *Child : Node->children()) { if (WorkSet.count(Child)) Orders.push_back(Child); } @@ -1359,10 +1374,7 @@ void HoistSpillHelper::runHoistSpills( // Collect spills in subtree of current node (*RIt) to // SpillsInSubTreeMap[*RIt].first. - const std::vector<MachineDomTreeNode *> &Children = (*RIt)->getChildren(); - unsigned NumChildren = Children.size(); - for (unsigned i = 0; i != NumChildren; ++i) { - MachineDomTreeNode *Child = Children[i]; + for (MachineDomTreeNode *Child : (*RIt)->children()) { if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end()) continue; // The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below @@ -1388,7 +1400,7 @@ void HoistSpillHelper::runHoistSpills( continue; // Check whether Block is a possible candidate to insert spill. - unsigned LiveReg = 0; + Register LiveReg; if (!isSpillCandBB(OrigLI, OrigVNI, *Block, LiveReg)) continue; @@ -1450,12 +1462,12 @@ void HoistSpillHelper::runHoistSpills( /// inside its subtree to that node. In this way, we can get benefit locally /// even if hoisting all the equal spills to one cold place is impossible. void HoistSpillHelper::hoistAllSpills() { - SmallVector<unsigned, 4> NewVRegs; + SmallVector<Register, 4> NewVRegs; LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); - unsigned Original = VRM.getPreSplitReg(Reg); + Register Reg = Register::index2VirtReg(i); + Register Original = VRM.getPreSplitReg(Reg); if (!MRI.def_empty(Reg)) Virt2SiblingsMap[Original].insert(Reg); } @@ -1503,7 +1515,7 @@ void HoistSpillHelper::hoistAllSpills() { // Insert hoisted spills. for (auto const &Insert : SpillsToIns) { MachineBasicBlock *BB = Insert.first; - unsigned LiveReg = Insert.second; + Register LiveReg = Insert.second; MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB); TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot, MRI.getRegClass(LiveReg), &TRI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h index 50c6ac62d194..9019e9f61fa0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h @@ -157,8 +157,6 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { Entry *get(unsigned PhysReg); public: - friend class Cursor; - InterferenceCache() = default; ~InterferenceCache() { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 1f9b436378d2..c4d83547a06c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -280,7 +280,7 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, bool InterleavedAccess::lowerInterleavedLoad( LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) { - if (!LI->isSimple()) + if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType())) return false; SmallVector<ShuffleVectorInst *, 4> Shuffles; @@ -308,7 +308,8 @@ bool InterleavedAccess::lowerInterleavedLoad( unsigned Factor, Index; - unsigned NumLoadElements = LI->getType()->getVectorNumElements(); + unsigned NumLoadElements = + cast<FixedVectorType>(LI->getType())->getNumElements(); // Check if the first shufflevector is DE-interleave shuffle. if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index, MaxFactor, NumLoadElements)) @@ -421,12 +422,13 @@ bool InterleavedAccess::lowerInterleavedStore( return false; ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand()); - if (!SVI || !SVI->hasOneUse()) + if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType())) return false; // Check if the shufflevector is RE-interleave shuffle. unsigned Factor; - unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements(); + unsigned OpNumElts = + cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements(); if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts)) return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 42691b8a6154..f7131926ee65 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -673,9 +673,9 @@ public: ElementInfo *EI; /// Vector Type - VectorType *const VTy; + FixedVectorType *const VTy; - VectorInfo(VectorType *VTy) + VectorInfo(FixedVectorType *VTy) : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) { EI = new ElementInfo[VTy->getNumElements()]; } @@ -735,7 +735,7 @@ public: if (!Op) return false; - VectorType *VTy = dyn_cast<VectorType>(Op->getType()); + FixedVectorType *VTy = dyn_cast<FixedVectorType>(Op->getType()); if (!VTy) return false; @@ -785,8 +785,8 @@ public: /// \returns false if no sensible information can be gathered. static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result, const DataLayout &DL) { - VectorType *ArgTy = dyn_cast<VectorType>(SVI->getOperand(0)->getType()); - assert(ArgTy && "ShuffleVector Operand is not a VectorType"); + FixedVectorType *ArgTy = + cast<FixedVectorType>(SVI->getOperand(0)->getType()); // Compute the left hand vector information. VectorInfo LHS(ArgTy); @@ -1200,14 +1200,15 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, IRBuilder<> Builder(InsertionPoint); Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType(); unsigned ElementsPerSVI = - InterleavedLoad.front().SVI->getType()->getNumElements(); - VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI); + cast<FixedVectorType>(InterleavedLoad.front().SVI->getType()) + ->getNumElements(); + FixedVectorType *ILTy = FixedVectorType::get(ETy, Factor * ElementsPerSVI); SmallVector<unsigned, 4> Indices; for (unsigned i = 0; i < Factor; i++) Indices.push_back(i); InterleavedCost = TTI.getInterleavedMemoryOpCost( - Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlignment(), + Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(), InsertionPoint->getPointerAddressSpace()); if (InterleavedCost >= InstructionCost) { @@ -1220,7 +1221,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, "interleaved.wide.ptrcast"); // Create the wide load and update the MemorySSA. - auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlignment(), + auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlign(), "interleaved.wide.load"); auto MSSAU = MemorySSAUpdater(&MSSA); MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore( @@ -1230,7 +1231,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, // Create the final SVIs and replace all uses. int i = 0; for (auto &VI : InterleavedLoad) { - SmallVector<uint32_t, 4> Mask; + SmallVector<int, 4> Mask; for (unsigned j = 0; j < ElementsPerSVI; j++) Mask.push_back(i + j * Factor); @@ -1265,8 +1266,11 @@ bool InterleavedLoadCombineImpl::run() { for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (auto SVI = dyn_cast<ShuffleVectorInst>(&I)) { + // We don't support scalable vectors in this pass. + if (isa<ScalableVectorType>(SVI->getType())) + continue; - Candidates.emplace_back(SVI->getType()); + Candidates.emplace_back(cast<FixedVectorType>(SVI->getType())); if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) { Candidates.pop_back(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp index 4461a235d6c1..e37c21e76597 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -203,22 +202,21 @@ static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, const char *Dname, const char *LDname) { - CallSite CS(CI); switch (CI->getArgOperand(0)->getType()->getTypeID()) { default: llvm_unreachable("Invalid type in intrinsic"); case Type::FloatTyID: - ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(), - Type::getFloatTy(CI->getContext())); + ReplaceCallWith(Fname, CI, CI->arg_begin(), CI->arg_end(), + Type::getFloatTy(CI->getContext())); break; case Type::DoubleTyID: - ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(), - Type::getDoubleTy(CI->getContext())); + ReplaceCallWith(Dname, CI, CI->arg_begin(), CI->arg_end(), + Type::getDoubleTy(CI->getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: - ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(), - CI->getArgOperand(0)->getType()); + ReplaceCallWith(LDname, CI, CI->arg_begin(), CI->arg_end(), + CI->getArgOperand(0)->getType()); break; } } @@ -230,7 +228,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { const Function *Callee = CI->getCalledFunction(); assert(Callee && "Cannot lower an indirect call!"); - CallSite CS(CI); switch (Callee->getIntrinsicID()) { case Intrinsic::not_intrinsic: report_fatal_error("Cannot lower a call to a non-intrinsic function '"+ @@ -424,6 +421,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl"); break; } + case Intrinsic::roundeven: { + ReplaceFPIntrinsicWithCall(CI, "roundevenf", "roundeven", "roundevenl"); + break; + } case Intrinsic::copysign: { ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl"); break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 50c178ff7598..b485f2cf7261 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -157,9 +157,6 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, if (!MCE || !MAB) return true; - // Don't waste memory on names of temp labels. - Context.setUseNamesOnTempLabels(false); - Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( T, Context, std::unique_ptr<MCAsmBackend>(MAB), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp index ac3ef0e709f3..690b429832a5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp @@ -44,6 +44,7 @@ void LexicalScopes::reset() { AbstractScopeMap.clear(); InlinedLexicalScopeMap.clear(); AbstractScopesList.clear(); + DominatedBlocks.clear(); } /// initialize - Scan machine function and constuct lexical scope nest. @@ -229,24 +230,24 @@ LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) { return &I->second; } -/// constructScopeNest +/// constructScopeNest - Traverse the Scope tree depth-first, storing +/// traversal state in WorkStack and recording the depth-first +/// numbering (setDFSIn, setDFSOut) for edge classification. void LexicalScopes::constructScopeNest(LexicalScope *Scope) { assert(Scope && "Unable to calculate scope dominance graph!"); - SmallVector<LexicalScope *, 4> WorkStack; - WorkStack.push_back(Scope); + SmallVector<std::pair<LexicalScope *, size_t>, 4> WorkStack; + WorkStack.push_back(std::make_pair(Scope, 0)); unsigned Counter = 0; while (!WorkStack.empty()) { - LexicalScope *WS = WorkStack.back(); + auto &ScopePosition = WorkStack.back(); + LexicalScope *WS = ScopePosition.first; + size_t ChildNum = ScopePosition.second++; const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); - bool visitedChildren = false; - for (auto &ChildScope : Children) - if (!ChildScope->getDFSOut()) { - WorkStack.push_back(ChildScope); - visitedChildren = true; - ChildScope->setDFSIn(++Counter); - break; - } - if (!visitedChildren) { + if (ChildNum < Children.size()) { + auto &ChildScope = Children[ChildNum]; + WorkStack.push_back(std::make_pair(ChildScope, 0)); + ChildScope->setDFSIn(++Counter); + } else { WorkStack.pop_back(); WS->setDFSOut(++Counter); } @@ -291,13 +292,17 @@ void LexicalScopes::getMachineBasicBlocks( return; } + // The scope ranges can cover multiple basic blocks in each span. Iterate over + // all blocks (in the order they are in the function) until we reach the one + // containing the end of the span. SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges(); for (auto &R : InsnRanges) - MBBs.insert(R.first->getParent()); + for (auto CurMBBIt = R.first->getParent()->getIterator(), + EndBBIt = std::next(R.second->getParent()->getIterator()); + CurMBBIt != EndBBIt; CurMBBIt++) + MBBs.insert(&*CurMBBIt); } -/// dominates - Return true if DebugLoc's lexical scope dominates at least one -/// machine instruction's lexical scope in a given machine basic block. bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) { assert(MF && "Unexpected uninitialized LexicalScopes object!"); LexicalScope *Scope = getOrCreateLexicalScope(DL); @@ -308,14 +313,18 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) { if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF) return true; - bool Result = false; - for (auto &I : *MBB) { - if (const DILocation *IDL = I.getDebugLoc()) - if (LexicalScope *IScope = getOrCreateLexicalScope(IDL)) - if (Scope->dominates(IScope)) - return true; + // Fetch all the blocks in DLs scope. Because the range / block list also + // contain any subscopes, any instruction that DL dominates can be found in + // the block set. + // + // Cache the set of fetched blocks to avoid repeatedly recomputing the set in + // the LiveDebugValues pass. + std::unique_ptr<BlockSetT> &Set = DominatedBlocks[DL]; + if (!Set) { + Set = std::make_unique<BlockSetT>(); + getMachineBasicBlocks(DL, *Set); } - return Result; + return Set->count(MBB) != 0; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp index 2226c10b49a4..07a275b546f6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -6,32 +6,107 @@ // //===----------------------------------------------------------------------===// /// -/// This pass implements a data flow analysis that propagates debug location -/// information by inserting additional DBG_VALUE insts into the machine -/// instruction stream. Before running, each DBG_VALUE inst corresponds to a -/// source assignment of a variable. Afterwards, a DBG_VALUE inst specifies a -/// variable location for the current basic block (see SourceLevelDebugging.rst). +/// \file LiveDebugValues.cpp /// -/// This is a separate pass from DbgValueHistoryCalculator to facilitate -/// testing and improve modularity. +/// LiveDebugValues is an optimistic "available expressions" dataflow +/// algorithm. The set of expressions is the set of machine locations +/// (registers, spill slots, constants) that a variable fragment might be +/// located, qualified by a DIExpression and indirect-ness flag, while each +/// variable is identified by a DebugVariable object. The availability of an +/// expression begins when a DBG_VALUE instruction specifies the location of a +/// DebugVariable, and continues until that location is clobbered or +/// re-specified by a different DBG_VALUE for the same DebugVariable. /// -/// Each variable location is represented by a VarLoc object that identifies the -/// source variable, its current machine-location, and the DBG_VALUE inst that -/// specifies the location. Each VarLoc is indexed in the (function-scope) -/// VarLocMap, giving each VarLoc a unique index. Rather than operate directly -/// on machine locations, the dataflow analysis in this pass identifies -/// locations by their index in the VarLocMap, meaning all the variable -/// locations in a block can be described by a sparse vector of VarLocMap -/// indexes. +/// The cannonical "available expressions" problem doesn't have expression +/// clobbering, instead when a variable is re-assigned, any expressions using +/// that variable get invalidated. LiveDebugValues can map onto "available +/// expressions" by having every register represented by a variable, which is +/// used in an expression that becomes available at a DBG_VALUE instruction. +/// When the register is clobbered, its variable is effectively reassigned, and +/// expressions computed from it become unavailable. A similar construct is +/// needed when a DebugVariable has its location re-specified, to invalidate +/// all other locations for that DebugVariable. +/// +/// Using the dataflow analysis to compute the available expressions, we create +/// a DBG_VALUE at the beginning of each block where the expression is +/// live-in. This propagates variable locations into every basic block where +/// the location can be determined, rather than only having DBG_VALUEs in blocks +/// where locations are specified due to an assignment or some optimization. +/// Movements of values between registers and spill slots are annotated with +/// DBG_VALUEs too to track variable values bewteen locations. All this allows +/// DbgEntityHistoryCalculator to focus on only the locations within individual +/// blocks, facilitating testing and improving modularity. +/// +/// We follow an optimisic dataflow approach, with this lattice: +/// +/// \verbatim +/// ┬ "Unknown" +/// | +/// v +/// True +/// | +/// v +/// ⊥ False +/// \endverbatim With "True" signifying that the expression is available (and +/// thus a DebugVariable's location is the corresponding register), while +/// "False" signifies that the expression is unavailable. "Unknown"s never +/// survive to the end of the analysis (see below). +/// +/// Formally, all DebugVariable locations that are live-out of a block are +/// initialized to \top. A blocks live-in values take the meet of the lattice +/// value for every predecessors live-outs, except for the entry block, where +/// all live-ins are \bot. The usual dataflow propagation occurs: the transfer +/// function for a block assigns an expression for a DebugVariable to be "True" +/// if a DBG_VALUE in the block specifies it; "False" if the location is +/// clobbered; or the live-in value if it is unaffected by the block. We +/// visit each block in reverse post order until a fixedpoint is reached. The +/// solution produced is maximal. +/// +/// Intuitively, we start by assuming that every expression / variable location +/// is at least "True", and then propagate "False" from the entry block and any +/// clobbers until there are no more changes to make. This gives us an accurate +/// solution because all incorrect locations will have a "False" propagated into +/// them. It also gives us a solution that copes well with loops by assuming +/// that variable locations are live-through every loop, and then removing those +/// that are not through dataflow. +/// +/// Within LiveDebugValues: each variable location is represented by a +/// VarLoc object that identifies the source variable, its current +/// machine-location, and the DBG_VALUE inst that specifies the location. Each +/// VarLoc is indexed in the (function-scope) \p VarLocMap, giving each VarLoc a +/// unique index. Rather than operate directly on machine locations, the +/// dataflow analysis in this pass identifies locations by their index in the +/// VarLocMap, meaning all the variable locations in a block can be described +/// by a sparse vector of VarLocMap indicies. +/// +/// All the storage for the dataflow analysis is local to the ExtendRanges +/// method and passed down to helper methods. "OutLocs" and "InLocs" record the +/// in and out lattice values for each block. "OpenRanges" maintains a list of +/// variable locations and, with the "process" method, evaluates the transfer +/// function of each block. "flushPendingLocs" installs DBG_VALUEs for each +/// live-in location at the start of blocks, while "Transfers" records +/// transfers of values between machine-locations. +/// +/// We avoid explicitly representing the "Unknown" (\top) lattice value in the +/// implementation. Instead, unvisited blocks implicitly have all lattice +/// values set as "Unknown". After being visited, there will be path back to +/// the entry block where the lattice value is "False", and as the transfer +/// function cannot make new "Unknown" locations, there are no scenarios where +/// a block can have an "Unknown" location after being visited. Similarly, we +/// don't enumerate all possible variable locations before exploring the +/// function: when a new location is discovered, all blocks previously explored +/// were implicitly "False" but unrecorded, and become explicitly "False" when +/// a new VarLoc is created with its bit not set in predecessor InLocs or +/// OutLocs. /// //===----------------------------------------------------------------------===// +#include "llvm/ADT/CoalescingBitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/CodeGen/LexicalScopes.h" @@ -64,6 +139,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -78,7 +154,18 @@ using namespace llvm; #define DEBUG_TYPE "livedebugvalues" STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); -STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed"); + +// Options to prevent pathological compile-time behavior. If InputBBLimit and +// InputDbgValueLimit are both exceeded, range extension is disabled. +static cl::opt<unsigned> InputBBLimit( + "livedebugvalues-input-bb-limit", + cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"), + cl::init(10000), cl::Hidden); +static cl::opt<unsigned> InputDbgValueLimit( + "livedebugvalues-input-dbg-value-limit", + cl::desc( + "Maximum input DBG_VALUE insts supported by debug range extension"), + cl::init(50000), cl::Hidden); // If @MI is a DBG_VALUE with debug value described by a defined // register, returns the number of this register. In the other case, returns 0. @@ -87,7 +174,8 @@ static Register isDbgValueDescribedByReg(const MachineInstr &MI) { assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); // If location of variable is described using a register (directly // or indirectly), this register is always a first operand. - return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register(); + return MI.getDebugOperand(0).isReg() ? MI.getDebugOperand(0).getReg() + : Register(); } /// If \p Op is a stack or frame register return true, otherwise return false. @@ -101,7 +189,7 @@ static bool isRegOtherThanSPAndFP(const MachineOperand &Op, const MachineFunction *MF = MI.getParent()->getParent(); const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); - unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register SP = TLI->getStackPointerRegisterToSaveRestore(); Register FP = TRI->getFrameRegister(*MF); Register Reg = Op.getReg(); @@ -110,8 +198,72 @@ static bool isRegOtherThanSPAndFP(const MachineOperand &Op, namespace { +// Max out the number of statically allocated elements in DefinedRegsSet, as +// this prevents fallback to std::set::count() operations. using DefinedRegsSet = SmallSet<Register, 32>; +using VarLocSet = CoalescingBitVector<uint64_t>; + +/// A type-checked pair of {Register Location (or 0), Index}, used to index +/// into a \ref VarLocMap. This can be efficiently converted to a 64-bit int +/// for insertion into a \ref VarLocSet, and efficiently converted back. The +/// type-checker helps ensure that the conversions aren't lossy. +/// +/// Why encode a location /into/ the VarLocMap index? This makes it possible +/// to find the open VarLocs killed by a register def very quickly. This is a +/// performance-critical operation for LiveDebugValues. +struct LocIndex { + using u32_location_t = uint32_t; + using u32_index_t = uint32_t; + + u32_location_t Location; // Physical registers live in the range [1;2^30) (see + // \ref MCRegister), so we have plenty of range left + // here to encode non-register locations. + u32_index_t Index; + + /// The first location greater than 0 that is not reserved for VarLocs of + /// kind RegisterKind. + static constexpr u32_location_t kFirstInvalidRegLocation = 1 << 30; + + /// A special location reserved for VarLocs of kind SpillLocKind. + static constexpr u32_location_t kSpillLocation = kFirstInvalidRegLocation; + + /// A special location reserved for VarLocs of kind EntryValueBackupKind and + /// EntryValueCopyBackupKind. + static constexpr u32_location_t kEntryValueBackupLocation = + kFirstInvalidRegLocation + 1; + + LocIndex(u32_location_t Location, u32_index_t Index) + : Location(Location), Index(Index) {} + + uint64_t getAsRawInteger() const { + return (static_cast<uint64_t>(Location) << 32) | Index; + } + + template<typename IntT> static LocIndex fromRawInteger(IntT ID) { + static_assert(std::is_unsigned<IntT>::value && + sizeof(ID) == sizeof(uint64_t), + "Cannot convert raw integer to LocIndex"); + return {static_cast<u32_location_t>(ID >> 32), + static_cast<u32_index_t>(ID)}; + } + + /// Get the start of the interval reserved for VarLocs of kind RegisterKind + /// which reside in \p Reg. The end is at rawIndexForReg(Reg+1)-1. + static uint64_t rawIndexForReg(uint32_t Reg) { + return LocIndex(Reg, 0).getAsRawInteger(); + } + + /// Return a range covering all set indices in the interval reserved for + /// \p Location in \p Set. + static auto indexRangeForLocation(const VarLocSet &Set, + u32_location_t Location) { + uint64_t Start = LocIndex(Location, 0).getAsRawInteger(); + uint64_t End = LocIndex(Location + 1, 0).getAsRawInteger(); + return Set.half_open_range(Start, End); + } +}; + class LiveDebugValues : public MachineFunctionPass { private: const TargetRegisterInfo *TRI; @@ -119,28 +271,10 @@ private: const TargetFrameLowering *TFI; BitVector CalleeSavedRegs; LexicalScopes LS; + VarLocSet::Allocator Alloc; enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore }; - /// Keeps track of lexical scopes associated with a user value's source - /// location. - class UserValueScopes { - DebugLoc DL; - LexicalScopes &LS; - SmallPtrSet<const MachineBasicBlock *, 4> LBlocks; - - public: - UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {} - - /// Return true if current scope dominates at least one machine - /// instruction in a given machine basic block. - bool dominates(MachineBasicBlock *MBB) { - if (LBlocks.empty()) - LS.getMachineBasicBlocks(DL, LBlocks); - return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB); - } - }; - using FragmentInfo = DIExpression::FragmentInfo; using OptFragmentInfo = Optional<DIExpression::FragmentInfo>; @@ -154,6 +288,9 @@ private: bool operator==(const SpillLoc &Other) const { return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset; } + bool operator!=(const SpillLoc &Other) const { + return !(*this == Other); + } }; /// Identity of the variable at this location. @@ -166,7 +303,6 @@ private: /// is moved. const MachineInstr &MI; - mutable UserValueScopes UVS; enum VarLocKind { InvalidKind = 0, RegisterKind, @@ -191,7 +327,7 @@ private: VarLoc(const MachineInstr &MI, LexicalScopes &LS) : Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()), - Expr(MI.getDebugExpression()), MI(MI), UVS(MI.getDebugLoc(), LS) { + Expr(MI.getDebugExpression()), MI(MI) { static_assert((sizeof(Loc) == sizeof(uint64_t)), "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); @@ -199,15 +335,15 @@ private: if (int RegNo = isDbgValueDescribedByReg(MI)) { Kind = RegisterKind; Loc.RegNo = RegNo; - } else if (MI.getOperand(0).isImm()) { + } else if (MI.getDebugOperand(0).isImm()) { Kind = ImmediateKind; - Loc.Immediate = MI.getOperand(0).getImm(); - } else if (MI.getOperand(0).isFPImm()) { + Loc.Immediate = MI.getDebugOperand(0).getImm(); + } else if (MI.getDebugOperand(0).isFPImm()) { Kind = ImmediateKind; - Loc.FPImm = MI.getOperand(0).getFPImm(); - } else if (MI.getOperand(0).isCImm()) { + Loc.FPImm = MI.getDebugOperand(0).getFPImm(); + } else if (MI.getDebugOperand(0).isCImm()) { Kind = ImmediateKind; - Loc.CImm = MI.getOperand(0).getCImm(); + Loc.CImm = MI.getDebugOperand(0).getCImm(); } // We create the debug entry values from the factory functions rather than @@ -218,7 +354,7 @@ private: /// Take the variable and machine-location in DBG_VALUE MI, and build an /// entry location using the given expression. static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS, - const DIExpression *EntryExpr, unsigned Reg) { + const DIExpression *EntryExpr, Register Reg) { VarLoc VL(MI, LS); assert(VL.Kind == RegisterKind); VL.Kind = EntryValueKind; @@ -247,7 +383,7 @@ private: static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI, LexicalScopes &LS, const DIExpression *EntryExpr, - unsigned NewReg) { + Register NewReg) { VarLoc VL(MI, LS); assert(VL.Kind == RegisterKind); VL.Kind = EntryValueCopyBackupKind; @@ -259,7 +395,7 @@ private: /// Copy the register location in DBG_VALUE MI, updating the register to /// be NewReg. static VarLoc CreateCopyLoc(const MachineInstr &MI, LexicalScopes &LS, - unsigned NewReg) { + Register NewReg) { VarLoc VL(MI, LS); assert(VL.Kind == RegisterKind); VL.Loc.RegNo = NewReg; @@ -287,6 +423,7 @@ private: const auto &IID = MI.getDesc(); const DILocalVariable *Var = MI.getDebugVariable(); const DIExpression *DIExpr = MI.getDebugExpression(); + NumInserted++; switch (Kind) { case EntryValueKind: @@ -294,8 +431,8 @@ private: // expression. The register location of such DBG_VALUE is always the one // from the entry DBG_VALUE, it does not matter if the entry value was // copied in to another register due to some optimizations. - return BuildMI(MF, DbgLoc, IID, Indirect, MI.getOperand(0).getReg(), - Var, Expr); + return BuildMI(MF, DbgLoc, IID, Indirect, + MI.getDebugOperand(0).getReg(), Var, Expr); case RegisterKind: // Register locations are like the source DBG_VALUE, but with the // register number from this VarLoc. @@ -311,7 +448,7 @@ private: return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr); } case ImmediateKind: { - MachineOperand MO = MI.getOperand(0); + MachineOperand MO = MI.getDebugOperand(0); return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr); } case EntryValueBackupKind: @@ -357,41 +494,42 @@ private: /// Determine whether the lexical scope of this value's debug location /// dominates MBB. - bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); } + bool dominates(LexicalScopes &LS, MachineBasicBlock &MBB) const { + return LS.dominates(MI.getDebugLoc().get(), &MBB); + } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // TRI can be null. void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const { - dbgs() << "VarLoc("; + Out << "VarLoc("; switch (Kind) { case RegisterKind: case EntryValueKind: case EntryValueBackupKind: case EntryValueCopyBackupKind: - dbgs() << printReg(Loc.RegNo, TRI); + Out << printReg(Loc.RegNo, TRI); break; case SpillLocKind: - dbgs() << printReg(Loc.SpillLocation.SpillBase, TRI); - dbgs() << "[" << Loc.SpillLocation.SpillOffset << "]"; + Out << printReg(Loc.SpillLocation.SpillBase, TRI); + Out << "[" << Loc.SpillLocation.SpillOffset << "]"; break; case ImmediateKind: - dbgs() << Loc.Immediate; + Out << Loc.Immediate; break; case InvalidKind: llvm_unreachable("Invalid VarLoc in dump method"); } - dbgs() << ", \"" << Var.getVariable()->getName() << "\", " << *Expr - << ", "; + Out << ", \"" << Var.getVariable()->getName() << "\", " << *Expr << ", "; if (Var.getInlinedAt()) - dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n"; + Out << "!" << Var.getInlinedAt()->getMetadataID() << ")\n"; else - dbgs() << "(null))"; + Out << "(null))"; if (isEntryBackupLoc()) - dbgs() << " (backup loc)\n"; + Out << " (backup loc)\n"; else - dbgs() << "\n"; + Out << "\n"; } #endif @@ -407,12 +545,62 @@ private: } }; - using VarLocMap = UniqueVector<VarLoc>; - using VarLocSet = SparseBitVector<>; - using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>; + /// VarLocMap is used for two things: + /// 1) Assigning a unique LocIndex to a VarLoc. This LocIndex can be used to + /// virtually insert a VarLoc into a VarLocSet. + /// 2) Given a LocIndex, look up the unique associated VarLoc. + class VarLocMap { + /// Map a VarLoc to an index within the vector reserved for its location + /// within Loc2Vars. + std::map<VarLoc, LocIndex::u32_index_t> Var2Index; + + /// Map a location to a vector which holds VarLocs which live in that + /// location. + SmallDenseMap<LocIndex::u32_location_t, std::vector<VarLoc>> Loc2Vars; + + /// Determine the 32-bit location reserved for \p VL, based on its kind. + static LocIndex::u32_location_t getLocationForVar(const VarLoc &VL) { + switch (VL.Kind) { + case VarLoc::RegisterKind: + assert((VL.Loc.RegNo < LocIndex::kFirstInvalidRegLocation) && + "Physreg out of range?"); + return VL.Loc.RegNo; + case VarLoc::SpillLocKind: + return LocIndex::kSpillLocation; + case VarLoc::EntryValueBackupKind: + case VarLoc::EntryValueCopyBackupKind: + return LocIndex::kEntryValueBackupLocation; + default: + return 0; + } + } + + public: + /// Retrieve a unique LocIndex for \p VL. + LocIndex insert(const VarLoc &VL) { + LocIndex::u32_location_t Location = getLocationForVar(VL); + LocIndex::u32_index_t &Index = Var2Index[VL]; + if (!Index) { + auto &Vars = Loc2Vars[Location]; + Vars.push_back(VL); + Index = Vars.size(); + } + return {Location, Index - 1}; + } + + /// Retrieve the unique VarLoc associated with \p ID. + const VarLoc &operator[](LocIndex ID) const { + auto LocIt = Loc2Vars.find(ID.Location); + assert(LocIt != Loc2Vars.end() && "Location not tracked"); + return LocIt->second[ID.Index]; + } + }; + + using VarLocInMBB = + SmallDenseMap<const MachineBasicBlock *, std::unique_ptr<VarLocSet>>; struct TransferDebugPair { - MachineInstr *TransferInst; /// Instruction where this transfer occurs. - unsigned LocationID; /// Location number for the transfer dest. + MachineInstr *TransferInst; ///< Instruction where this transfer occurs. + LocIndex LocationID; ///< Location number for the transfer dest. }; using TransferMap = SmallVector<TransferDebugPair, 4>; @@ -441,13 +629,14 @@ private: class OpenRangesSet { VarLocSet VarLocs; // Map the DebugVariable to recent primary location ID. - SmallDenseMap<DebugVariable, unsigned, 8> Vars; + SmallDenseMap<DebugVariable, LocIndex, 8> Vars; // Map the DebugVariable to recent backup location ID. - SmallDenseMap<DebugVariable, unsigned, 8> EntryValuesBackupVars; + SmallDenseMap<DebugVariable, LocIndex, 8> EntryValuesBackupVars; OverlapMap &OverlappingFragments; public: - OpenRangesSet(OverlapMap &_OLapMap) : OverlappingFragments(_OLapMap) {} + OpenRangesSet(VarLocSet::Allocator &Alloc, OverlapMap &_OLapMap) + : VarLocs(Alloc), OverlappingFragments(_OLapMap) {} const VarLocSet &getVarLocs() const { return VarLocs; } @@ -459,17 +648,18 @@ private: void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs); /// Insert a new range into the set. - void insert(unsigned VarLocID, const VarLoc &VL); + void insert(LocIndex VarLocID, const VarLoc &VL); /// Insert a set of ranges. void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) { - for (unsigned Id : ToLoad) { - const VarLoc &VarL = Map[Id]; - insert(Id, VarL); + for (uint64_t ID : ToLoad) { + LocIndex Idx = LocIndex::fromRawInteger(ID); + const VarLoc &VarL = Map[Idx]; + insert(Idx, VarL); } } - llvm::Optional<unsigned> getEntryValueBackup(DebugVariable Var); + llvm::Optional<LocIndex> getEntryValueBackup(DebugVariable Var); /// Empty the set. void clear() { @@ -485,8 +675,57 @@ private: "open ranges are inconsistent"); return VarLocs.empty(); } + + /// Get an empty range of VarLoc IDs. + auto getEmptyVarLocRange() const { + return iterator_range<VarLocSet::const_iterator>(getVarLocs().end(), + getVarLocs().end()); + } + + /// Get all set IDs for VarLocs of kind RegisterKind in \p Reg. + auto getRegisterVarLocs(Register Reg) const { + return LocIndex::indexRangeForLocation(getVarLocs(), Reg); + } + + /// Get all set IDs for VarLocs of kind SpillLocKind. + auto getSpillVarLocs() const { + return LocIndex::indexRangeForLocation(getVarLocs(), + LocIndex::kSpillLocation); + } + + /// Get all set IDs for VarLocs of kind EntryValueBackupKind or + /// EntryValueCopyBackupKind. + auto getEntryValueBackupVarLocs() const { + return LocIndex::indexRangeForLocation( + getVarLocs(), LocIndex::kEntryValueBackupLocation); + } }; + /// Collect all VarLoc IDs from \p CollectFrom for VarLocs of kind + /// RegisterKind which are located in any reg in \p Regs. Insert collected IDs + /// into \p Collected. + void collectIDsForRegs(VarLocSet &Collected, const DefinedRegsSet &Regs, + const VarLocSet &CollectFrom) const; + + /// Get the registers which are used by VarLocs of kind RegisterKind tracked + /// by \p CollectFrom. + void getUsedRegs(const VarLocSet &CollectFrom, + SmallVectorImpl<uint32_t> &UsedRegs) const; + + VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, VarLocInMBB &Locs) { + std::unique_ptr<VarLocSet> &VLS = Locs[MBB]; + if (!VLS) + VLS = std::make_unique<VarLocSet>(Alloc); + return *VLS.get(); + } + + const VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, + const VarLocInMBB &Locs) const { + auto It = Locs.find(MBB); + assert(It != Locs.end() && "MBB not in map"); + return *It->second.get(); + } + /// Tests whether this instruction is a spill to a stack location. bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); @@ -497,7 +736,7 @@ private: /// TODO: Store optimization can fold spills into other stores (including /// other spills). We do not handle this yet (more than one memory operand). bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, - unsigned &Reg); + Register &Reg); /// Returns true if the given machine instruction is a debug value which we /// can emit entry values for. @@ -511,14 +750,14 @@ private: /// and set \p Reg to the spilled register. Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI, MachineFunction *MF, - unsigned &Reg); + Register &Reg); /// Given a spill instruction, extract the register and offset used to /// address the spill location in a target independent way. VarLoc::SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI); void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers, VarLocMap &VarLocIDs, - unsigned OldVarID, TransferKind Kind, - unsigned NewReg = 0); + LocIndex OldVarID, TransferKind Kind, + Register NewReg = Register()); void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs); @@ -528,7 +767,7 @@ private: VarLocMap &VarLocIDs, const VarLoc &EntryVL); void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, - SparseBitVector<> &KillSet); + VarLocSet &KillSet); void recordEntryValue(const MachineInstr &MI, const DefinedRegsSet &DefinedRegs, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs); @@ -548,8 +787,7 @@ private: bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet<const MachineBasicBlock *, 16> &Visited, - SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks, - VarLocInMBB &PendingInLocs); + SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks); /// Create DBG_VALUE insts for inlocs that have been propagated but /// had their instruction creation deferred. @@ -617,8 +855,8 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) { auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; auto It = EraseFrom->find(VarToErase); if (It != EraseFrom->end()) { - unsigned ID = It->second; - VarLocs.reset(ID); + LocIndex ID = It->second; + VarLocs.reset(ID.getAsRawInteger()); EraseFrom->erase(It); } }; @@ -648,23 +886,23 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) { void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) { VarLocs.intersectWithComplement(KillSet); - for (unsigned ID : KillSet) { - const VarLoc *VL = &VarLocIDs[ID]; + for (uint64_t ID : KillSet) { + const VarLoc *VL = &VarLocIDs[LocIndex::fromRawInteger(ID)]; auto *EraseFrom = VL->isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; EraseFrom->erase(VL->Var); } } -void LiveDebugValues::OpenRangesSet::insert(unsigned VarLocID, +void LiveDebugValues::OpenRangesSet::insert(LocIndex VarLocID, const VarLoc &VL) { auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; - VarLocs.set(VarLocID); + VarLocs.set(VarLocID.getAsRawInteger()); InsertInto->insert({VL.Var, VarLocID}); } /// Return the Loc ID of an entry value backup location, if it exists for the /// variable. -llvm::Optional<unsigned> +llvm::Optional<LocIndex> LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { auto It = EntryValuesBackupVars.find(Var); if (It != EntryValuesBackupVars.end()) @@ -673,6 +911,57 @@ LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { return llvm::None; } +void LiveDebugValues::collectIDsForRegs(VarLocSet &Collected, + const DefinedRegsSet &Regs, + const VarLocSet &CollectFrom) const { + assert(!Regs.empty() && "Nothing to collect"); + SmallVector<uint32_t, 32> SortedRegs; + for (Register Reg : Regs) + SortedRegs.push_back(Reg); + array_pod_sort(SortedRegs.begin(), SortedRegs.end()); + auto It = CollectFrom.find(LocIndex::rawIndexForReg(SortedRegs.front())); + auto End = CollectFrom.end(); + for (uint32_t Reg : SortedRegs) { + // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains all + // possible VarLoc IDs for VarLocs of kind RegisterKind which live in Reg. + uint64_t FirstIndexForReg = LocIndex::rawIndexForReg(Reg); + uint64_t FirstInvalidIndex = LocIndex::rawIndexForReg(Reg + 1); + It.advanceToLowerBound(FirstIndexForReg); + + // Iterate through that half-open interval and collect all the set IDs. + for (; It != End && *It < FirstInvalidIndex; ++It) + Collected.set(*It); + + if (It == End) + return; + } +} + +void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom, + SmallVectorImpl<uint32_t> &UsedRegs) const { + // All register-based VarLocs are assigned indices greater than or equal to + // FirstRegIndex. + uint64_t FirstRegIndex = LocIndex::rawIndexForReg(1); + uint64_t FirstInvalidIndex = + LocIndex::rawIndexForReg(LocIndex::kFirstInvalidRegLocation); + for (auto It = CollectFrom.find(FirstRegIndex), + End = CollectFrom.find(FirstInvalidIndex); + It != End;) { + // We found a VarLoc ID for a VarLoc that lives in a register. Figure out + // which register and add it to UsedRegs. + uint32_t FoundReg = LocIndex::fromRawInteger(*It).Location; + assert((UsedRegs.empty() || FoundReg != UsedRegs.back()) && + "Duplicate used reg"); + UsedRegs.push_back(FoundReg); + + // Skip to the next /set/ register. Note that this finds a lower bound, so + // even if there aren't any VarLocs living in `FoundReg+1`, we're still + // guaranteed to move on to the next register (or to end()). + uint64_t NextRegIndex = LocIndex::rawIndexForReg(FoundReg + 1); + It.advanceToLowerBound(NextRegIndex); + } +} + //===----------------------------------------------------------------------===// // Debug Range Extension Implementation //===----------------------------------------------------------------------===// @@ -685,12 +974,14 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, raw_ostream &Out) const { Out << '\n' << msg << '\n'; for (const MachineBasicBlock &BB : MF) { - const VarLocSet &L = V.lookup(&BB); + if (!V.count(&BB)) + continue; + const VarLocSet &L = getVarLocsInMBB(&BB, V); if (L.empty()) continue; Out << "MBB: " << BB.getNumber() << ":\n"; - for (unsigned VLL : L) { - const VarLoc &VL = VarLocIDs[VLL]; + for (uint64_t VLL : L) { + const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(VLL)]; Out << " Var: " << VL.Var.getVariable()->getName(); Out << " MI: "; VL.dump(TRI, Out); @@ -710,7 +1001,7 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) { "Inconsistent memory operand in spill instruction"); int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex(); const MachineBasicBlock *MBB = MI.getParent(); - unsigned Reg; + Register Reg; int Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg); return {Reg, Offset}; } @@ -730,7 +1021,7 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI, // the entry value any more. In addition, if the debug expression from the // DBG_VALUE is not empty, we can assume the parameter's value has changed // indicating that we should stop tracking its entry value as well. - if (!MI.getOperand(0).isReg() || + if (!MI.getDebugOperand(0).isReg() || MI.getDebugExpression()->getNumElements() != 0) return true; @@ -738,7 +1029,7 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI, // it means the parameter's value has not changed and we should be able to use // its entry value. bool TrySalvageEntryValue = false; - Register Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getDebugOperand(0).getReg(); auto I = std::next(MI.getReverseIterator()); const MachineOperand *SrcRegOp, *DestRegOp; if (I != MI.getParent()->rend()) { @@ -757,13 +1048,10 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI, } if (TrySalvageEntryValue) { - for (unsigned ID : OpenRanges.getVarLocs()) { - const VarLoc &VL = VarLocIDs[ID]; - if (!VL.isEntryBackupLoc()) - continue; - + for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) { + const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)]; if (VL.getEntryValueCopyBackupReg() == Reg && - VL.MI.getOperand(0).getReg() == SrcRegOp->getReg()) + VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg()) return false; } } @@ -801,23 +1089,25 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, } } - unsigned ID; - if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() || - MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) { + if (isDbgValueDescribedByReg(MI) || MI.getDebugOperand(0).isImm() || + MI.getDebugOperand(0).isFPImm() || MI.getDebugOperand(0).isCImm()) { // Use normal VarLoc constructor for registers and immediates. VarLoc VL(MI, LS); // End all previous ranges of VL.Var. OpenRanges.erase(VL); - ID = VarLocIDs.insert(VL); + LocIndex ID = VarLocIDs.insert(VL); // Add the VarLoc to OpenRanges from this DBG_VALUE. OpenRanges.insert(ID, VL); } else if (MI.hasOneMemOperand()) { llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?"); } else { - // This must be an undefined location. We should leave OpenRanges closed. - assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 && + // This must be an undefined location. If it has an open range, erase it. + assert(MI.getDebugOperand(0).isReg() && + MI.getDebugOperand(0).getReg() == 0 && "Unexpected non-undef DBG_VALUE encountered"); + VarLoc VL(MI, LS); + OpenRanges.erase(VL); } } @@ -826,13 +1116,20 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, - SparseBitVector<> &KillSet) { - for (unsigned ID : KillSet) { - if (!VarLocIDs[ID].Var.getVariable()->isParameter()) + VarLocSet &KillSet) { + // Do not insert entry value locations after a terminator. + if (MI.isTerminator()) + return; + + for (uint64_t ID : KillSet) { + LocIndex Idx = LocIndex::fromRawInteger(ID); + const VarLoc &VL = VarLocIDs[Idx]; + if (!VL.Var.getVariable()->isParameter()) continue; - auto DebugVar = VarLocIDs[ID].Var; - auto EntryValBackupID = OpenRanges.getEntryValueBackup(DebugVar); + auto DebugVar = VL.Var; + Optional<LocIndex> EntryValBackupID = + OpenRanges.getEntryValueBackup(DebugVar); // If the parameter has the entry value backup, it means we should // be able to use its entry value. @@ -842,7 +1139,7 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID]; VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Loc.RegNo); - unsigned EntryValueID = VarLocIDs.insert(EntryLoc); + LocIndex EntryValueID = VarLocIDs.insert(EntryLoc); Transfers.push_back({&MI, EntryValueID}); OpenRanges.insert(EntryValueID, EntryLoc); } @@ -855,12 +1152,12 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, /// otherwise it is variable's location on the stack. void LiveDebugValues::insertTransferDebugPair( MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers, - VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind, - unsigned NewReg) { + VarLocMap &VarLocIDs, LocIndex OldVarID, TransferKind Kind, + Register NewReg) { const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI; auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) { - unsigned LocId = VarLocIDs.insert(VL); + LocIndex LocId = VarLocIDs.insert(VL); // Close this variable's previous location range. OpenRanges.erase(VL); @@ -868,6 +1165,7 @@ void LiveDebugValues::insertTransferDebugPair( // Record the new location as an open range, and a postponed transfer // inserting a DBG_VALUE for this location. OpenRanges.insert(LocId, VL); + assert(!MI.isTerminator() && "Cannot insert DBG_VALUE after terminator"); TransferDebugPair MIP = {&MI, LocId}; Transfers.push_back(MIP); }; @@ -922,39 +1220,67 @@ void LiveDebugValues::insertTransferDebugPair( void LiveDebugValues::transferRegisterDef( MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers) { + + // Meta Instructions do not affect the debug liveness of any register they + // define. + if (MI.isMetaInstruction()) + return; + MachineFunction *MF = MI.getMF(); const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); - unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - SparseBitVector<> KillSet; + Register SP = TLI->getStackPointerRegisterToSaveRestore(); + + // Find the regs killed by MI, and find regmasks of preserved regs. + DefinedRegsSet DeadRegs; + SmallVector<const uint32_t *, 4> RegMasks; for (const MachineOperand &MO : MI.operands()) { - // Determine whether the operand is a register def. Assume that call - // instructions never clobber SP, because some backends (e.g., AArch64) - // never list SP in the regmask. + // Determine whether the operand is a register def. if (MO.isReg() && MO.isDef() && MO.getReg() && Register::isPhysicalRegister(MO.getReg()) && !(MI.isCall() && MO.getReg() == SP)) { // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) - for (unsigned ID : OpenRanges.getVarLocs()) - if (VarLocIDs[ID].isDescribedByReg() == *RAI) - KillSet.set(ID); + // FIXME: Can we break out of this loop early if no insertion occurs? + DeadRegs.insert(*RAI); } else if (MO.isRegMask()) { + RegMasks.push_back(MO.getRegMask()); + } + } + + // Erase VarLocs which reside in one of the dead registers. For performance + // reasons, it's critical to not iterate over the full set of open VarLocs. + // Iterate over the set of dying/used regs instead. + if (!RegMasks.empty()) { + SmallVector<uint32_t, 32> UsedRegs; + getUsedRegs(OpenRanges.getVarLocs(), UsedRegs); + for (uint32_t Reg : UsedRegs) { // Remove ranges of all clobbered registers. Register masks don't usually - // list SP as preserved. While the debug info may be off for an - // instruction or two around callee-cleanup calls, transferring the - // DEBUG_VALUE across the call is still a better user experience. - for (unsigned ID : OpenRanges.getVarLocs()) { - unsigned Reg = VarLocIDs[ID].isDescribedByReg(); - if (Reg && Reg != SP && MO.clobbersPhysReg(Reg)) - KillSet.set(ID); - } + // list SP as preserved. Assume that call instructions never clobber SP, + // because some backends (e.g., AArch64) never list SP in the regmask. + // While the debug info may be off for an instruction or two around + // callee-cleanup calls, transferring the DEBUG_VALUE across the call is + // still a better user experience. + if (Reg == SP) + continue; + bool AnyRegMaskKillsReg = + any_of(RegMasks, [Reg](const uint32_t *RegMask) { + return MachineOperand::clobbersPhysReg(RegMask, Reg); + }); + if (AnyRegMaskKillsReg) + DeadRegs.insert(Reg); } } + + if (DeadRegs.empty()) + return; + + VarLocSet KillSet(Alloc); + collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs()); OpenRanges.erase(KillSet, VarLocIDs); if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { auto &TM = TPC->getTM<TargetMachine>(); - if (TM.Options.EnableDebugEntryValues) + if (TM.Options.ShouldEmitDebugEntryValues()) emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet); } } @@ -973,11 +1299,11 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, } bool LiveDebugValues::isLocationSpill(const MachineInstr &MI, - MachineFunction *MF, unsigned &Reg) { + MachineFunction *MF, Register &Reg) { if (!isSpillInstruction(MI, MF)) return false; - auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { + auto isKilledReg = [&](const MachineOperand MO, Register &Reg) { if (!MO.isReg() || !MO.isUse()) { Reg = 0; return false; @@ -999,7 +1325,7 @@ bool LiveDebugValues::isLocationSpill(const MachineInstr &MI, // Skip next instruction that points to basic block end iterator. if (MI.getParent()->end() == NextI) continue; - unsigned RegNext; + Register RegNext; for (const MachineOperand &MONext : NextI->operands()) { // Return true if we came across the register from the // previous spill instruction that is killed in NextI. @@ -1014,7 +1340,7 @@ bool LiveDebugValues::isLocationSpill(const MachineInstr &MI, Optional<LiveDebugValues::VarLoc::SpillLoc> LiveDebugValues::isRestoreInstruction(const MachineInstr &MI, - MachineFunction *MF, unsigned &Reg) { + MachineFunction *MF, Register &Reg) { if (!MI.hasOneMemOperand()) return None; @@ -1040,7 +1366,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, TransferMap &Transfers) { MachineFunction *MF = MI.getMF(); TransferKind TKind; - unsigned Reg; + Register Reg; Optional<VarLoc::SpillLoc> Loc; LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump();); @@ -1048,12 +1374,14 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, // First, if there are any DBG_VALUEs pointing at a spill slot that is // written to, then close the variable location. The value in memory // will have changed. - VarLocSet KillSet; + VarLocSet KillSet(Alloc); if (isSpillInstruction(MI, MF)) { Loc = extractSpillBaseRegAndOffset(MI); - for (unsigned ID : OpenRanges.getVarLocs()) { - const VarLoc &VL = VarLocIDs[ID]; - if (VL.Kind == VarLoc::SpillLocKind && VL.Loc.SpillLocation == *Loc) { + for (uint64_t ID : OpenRanges.getSpillVarLocs()) { + LocIndex Idx = LocIndex::fromRawInteger(ID); + const VarLoc &VL = VarLocIDs[Idx]; + assert(VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?"); + if (VL.Loc.SpillLocation == *Loc) { // This location is overwritten by the current instruction -- terminate // the open range, and insert an explicit DBG_VALUE $noreg. // @@ -1066,7 +1394,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, // where they are located; it's best to fix handle overwrites now. KillSet.set(ID); VarLoc UndefVL = VarLoc::CreateCopyLoc(VL.MI, LS, 0); - unsigned UndefLocID = VarLocIDs.insert(UndefVL); + LocIndex UndefLocID = VarLocIDs.insert(UndefVL); Transfers.push_back({&MI, UndefLocID}); } } @@ -1089,20 +1417,31 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, << "\n"); } // Check if the register or spill location is the location of a debug value. - for (unsigned ID : OpenRanges.getVarLocs()) { - if (TKind == TransferKind::TransferSpill && - VarLocIDs[ID].isDescribedByReg() == Reg) { + auto TransferCandidates = OpenRanges.getEmptyVarLocRange(); + if (TKind == TransferKind::TransferSpill) + TransferCandidates = OpenRanges.getRegisterVarLocs(Reg); + else if (TKind == TransferKind::TransferRestore) + TransferCandidates = OpenRanges.getSpillVarLocs(); + for (uint64_t ID : TransferCandidates) { + LocIndex Idx = LocIndex::fromRawInteger(ID); + const VarLoc &VL = VarLocIDs[Idx]; + if (TKind == TransferKind::TransferSpill) { + assert(VL.isDescribedByReg() == Reg && "Broken VarLocSet?"); LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' - << VarLocIDs[ID].Var.getVariable()->getName() << ")\n"); - } else if (TKind == TransferKind::TransferRestore && - VarLocIDs[ID].Kind == VarLoc::SpillLocKind && - VarLocIDs[ID].Loc.SpillLocation == *Loc) { + << VL.Var.getVariable()->getName() << ")\n"); + } else { + assert(TKind == TransferKind::TransferRestore && + VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?"); + if (VL.Loc.SpillLocation != *Loc) + // The spill location is not the location of a debug value. + continue; LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '(' - << VarLocIDs[ID].Var.getVariable()->getName() << ")\n"); - } else - continue; - insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind, + << VL.Var.getVariable()->getName() << ")\n"); + } + insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx, TKind, Reg); + // FIXME: A comment should explain why it's correct to return early here, + // if that is in fact correct. return; } } @@ -1124,7 +1463,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, if (!DestRegOp->isDef()) return; - auto isCalleeSavedReg = [&](unsigned Reg) { + auto isCalleeSavedReg = [&](Register Reg) { for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) if (CalleeSavedRegs.test(*RAI)) return true; @@ -1146,17 +1485,19 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, // a parameter describing only a moving of the value around, rather then // modifying it, we are still able to use the entry value if needed. if (isRegOtherThanSPAndFP(*DestRegOp, MI, TRI)) { - for (unsigned ID : OpenRanges.getVarLocs()) { - if (VarLocIDs[ID].getEntryValueBackupReg() == SrcReg) { + for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) { + LocIndex Idx = LocIndex::fromRawInteger(ID); + const VarLoc &VL = VarLocIDs[Idx]; + if (VL.getEntryValueBackupReg() == SrcReg) { LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump();); - VarLoc EntryValLocCopyBackup = VarLoc::CreateEntryCopyBackupLoc( - VarLocIDs[ID].MI, LS, VarLocIDs[ID].Expr, DestReg); + VarLoc EntryValLocCopyBackup = + VarLoc::CreateEntryCopyBackupLoc(VL.MI, LS, VL.Expr, DestReg); // Stop tracking the original entry value. - OpenRanges.erase(VarLocIDs[ID]); + OpenRanges.erase(VL); // Start tracking the entry value copy. - unsigned EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup); + LocIndex EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup); OpenRanges.insert(EntryValCopyLocID, EntryValLocCopyBackup); break; } @@ -1166,12 +1507,14 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, if (!SrcRegOp->isKill()) return; - for (unsigned ID : OpenRanges.getVarLocs()) { - if (VarLocIDs[ID].isDescribedByReg() == SrcReg) { - insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, - TransferKind::TransferCopy, DestReg); - return; - } + for (uint64_t ID : OpenRanges.getRegisterVarLocs(SrcReg)) { + LocIndex Idx = LocIndex::fromRawInteger(ID); + assert(VarLocIDs[Idx].isDescribedByReg() == SrcReg && "Broken VarLocSet?"); + insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx, + TransferKind::TransferCopy, DestReg); + // FIXME: A comment should explain why it's correct to return early here, + // if that is in fact correct. + return; } } @@ -1182,13 +1525,13 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB, const VarLocMap &VarLocIDs) { bool Changed = false; - LLVM_DEBUG(for (unsigned ID + LLVM_DEBUG(for (uint64_t ID : OpenRanges.getVarLocs()) { // Copy OpenRanges to OutLocs, if not already present. dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": "; - VarLocIDs[ID].dump(TRI); + VarLocIDs[LocIndex::fromRawInteger(ID)].dump(TRI); }); - VarLocSet &VLS = OutLocs[CurMBB]; + VarLocSet &VLS = getVarLocsInMBB(CurMBB, OutLocs); Changed = VLS != OpenRanges.getVarLocs(); // New OutLocs set may be different due to spill, restore or register // copy instruction processing. @@ -1275,12 +1618,10 @@ bool LiveDebugValues::join( MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet<const MachineBasicBlock *, 16> &Visited, - SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks, - VarLocInMBB &PendingInLocs) { + SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) { LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); - bool Changed = false; - VarLocSet InLocsT; // Temporary incoming locations. + VarLocSet InLocsT(Alloc); // Temporary incoming locations. // For all predecessors of this MBB, find the set of VarLocs that // can be joined. @@ -1303,16 +1644,20 @@ bool LiveDebugValues::join( // Just copy over the Out locs to incoming locs for the first visited // predecessor, and for all other predecessors join the Out locs. + VarLocSet &OutLocVLS = *OL->second.get(); if (!NumVisited) - InLocsT = OL->second; + InLocsT = OutLocVLS; else - InLocsT &= OL->second; + InLocsT &= OutLocVLS; LLVM_DEBUG({ if (!InLocsT.empty()) { - for (auto ID : InLocsT) + for (uint64_t ID : InLocsT) dbgs() << " gathered candidate incoming var: " - << VarLocIDs[ID].Var.getVariable()->getName() << "\n"; + << VarLocIDs[LocIndex::fromRawInteger(ID)] + .Var.getVariable() + ->getName() + << "\n"; } }); @@ -1320,14 +1665,15 @@ bool LiveDebugValues::join( } // Filter out DBG_VALUES that are out of scope. - VarLocSet KillSet; + VarLocSet KillSet(Alloc); bool IsArtificial = ArtificialBlocks.count(&MBB); if (!IsArtificial) { - for (auto ID : InLocsT) { - if (!VarLocIDs[ID].dominates(MBB)) { + for (uint64_t ID : InLocsT) { + LocIndex Idx = LocIndex::fromRawInteger(ID); + if (!VarLocIDs[Idx].dominates(LS, MBB)) { KillSet.set(ID); LLVM_DEBUG({ - auto Name = VarLocIDs[ID].Var.getVariable()->getName(); + auto Name = VarLocIDs[Idx].Var.getVariable()->getName(); dbgs() << " killing " << Name << ", it doesn't dominate MBB\n"; }); } @@ -1341,30 +1687,10 @@ bool LiveDebugValues::join( assert((NumVisited || MBB.pred_empty()) && "Should have processed at least one predecessor"); - VarLocSet &ILS = InLocs[&MBB]; - VarLocSet &Pending = PendingInLocs[&MBB]; - - // New locations will have DBG_VALUE insts inserted at the start of the - // block, after location propagation has finished. Record the insertions - // that we need to perform in the Pending set. - VarLocSet Diff = InLocsT; - Diff.intersectWithComplement(ILS); - for (auto ID : Diff) { - Pending.set(ID); - ILS.set(ID); - ++NumInserted; - Changed = true; - } - - // We may have lost locations by learning about a predecessor that either - // loses or moves a variable. Find any locations in ILS that are not in the - // new in-locations, and delete those. - VarLocSet Removed = ILS; - Removed.intersectWithComplement(InLocsT); - for (auto ID : Removed) { - Pending.reset(ID); - ILS.reset(ID); - ++NumRemoved; + VarLocSet &ILS = getVarLocsInMBB(&MBB, InLocs); + bool Changed = false; + if (ILS != InLocsT) { + ILS = InLocsT; Changed = true; } @@ -1378,12 +1704,12 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, for (auto &Iter : PendingInLocs) { // Map is keyed on a constant pointer, unwrap it so we can insert insts. auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first); - VarLocSet &Pending = Iter.second; + VarLocSet &Pending = *Iter.second.get(); - for (unsigned ID : Pending) { + for (uint64_t ID : Pending) { // The ID location is live-in to MBB -- work out what kind of machine // location it is and create a DBG_VALUE. - const VarLoc &DiffIt = VarLocIDs[ID]; + const VarLoc &DiffIt = VarLocIDs[LocIndex::fromRawInteger(ID)]; if (DiffIt.isEntryBackupLoc()) continue; MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent()); @@ -1411,25 +1737,21 @@ bool LiveDebugValues::isEntryValueCandidate( if (MI.getDebugLoc()->getInlinedAt()) return false; - // Do not consider indirect debug values (TODO: explain why). - if (MI.isIndirectDebugValue()) - return false; - // Only consider parameters that are described using registers. Parameters // that are passed on the stack are not yet supported, so ignore debug // values that are described by the frame or stack pointer. - if (!isRegOtherThanSPAndFP(MI.getOperand(0), MI, TRI)) + if (!isRegOtherThanSPAndFP(MI.getDebugOperand(0), MI, TRI)) return false; // If a parameter's value has been propagated from the caller, then the // parameter's DBG_VALUE may be described using a register defined by some // instruction in the entry block, in which case we shouldn't create an // entry value. - if (DefinedRegs.count(MI.getOperand(0).getReg())) + if (DefinedRegs.count(MI.getDebugOperand(0).getReg())) return false; // TODO: Add support for parameters that have a pre-existing debug expressions - // (e.g. fragments, or indirect parameters using DW_OP_deref). + // (e.g. fragments). if (MI.getDebugExpression()->getNumElements() > 0) return false; @@ -1454,7 +1776,7 @@ void LiveDebugValues::recordEntryValue(const MachineInstr &MI, VarLocMap &VarLocIDs) { if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { auto &TM = TPC->getTM<TargetMachine>(); - if (!TM.Options.EnableDebugEntryValues) + if (!TM.Options.ShouldEmitDebugEntryValues()) return; } @@ -1472,7 +1794,7 @@ void LiveDebugValues::recordEntryValue(const MachineInstr &MI, DIExpression *NewExpr = DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue); VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr); - unsigned EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup); + LocIndex EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup); OpenRanges.insert(EntryValLocID, EntryValLocAsBackup); } @@ -1487,15 +1809,12 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors. OverlapMap OverlapFragments; // Map of overlapping variable fragments. - OpenRangesSet OpenRanges(OverlapFragments); + OpenRangesSet OpenRanges(Alloc, OverlapFragments); // Ranges that are open until end of bb. VarLocInMBB OutLocs; // Ranges that exist beyond bb. VarLocInMBB InLocs; // Ranges that are incoming after joining. TransferMap Transfers; // DBG_VALUEs associated with transfers (such as // spills, copies and restores). - VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but - // that we have deferred creating DBG_VALUE insts - // for immediately. VarToFragments SeenFragments; @@ -1526,14 +1845,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { } // Initialize per-block structures and scan for fragment overlaps. - for (auto &MBB : MF) { - PendingInLocs[&MBB] = VarLocSet(); - - for (auto &MI : MBB) { + for (auto &MBB : MF) + for (auto &MI : MBB) if (MI.isDebugValue()) accumulateFragmentMap(MI, SeenFragments, OverlapFragments); - } - } auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool { if (const DebugLoc &DL = MI.getDebugLoc()) @@ -1555,6 +1870,22 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { Worklist.push(RPONumber); ++RPONumber; } + + if (RPONumber > InputBBLimit) { + unsigned NumInputDbgValues = 0; + for (auto &MBB : MF) + for (auto &MI : MBB) + if (MI.isDebugValue()) + ++NumInputDbgValues; + if (NumInputDbgValues > InputDbgValueLimit) { + LLVM_DEBUG(dbgs() << "Disabling LiveDebugValues: " << MF.getName() + << " has " << RPONumber << " basic blocks and " + << NumInputDbgValues + << " input DBG_VALUEs, exceeding limits.\n"); + return false; + } + } + // This is a standard "union of predecessor outs" dataflow problem. // To solve it, we perform join() and process() using the two worklist method // until the ranges converge. @@ -1570,7 +1901,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; Worklist.pop(); MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, - ArtificialBlocks, PendingInLocs); + ArtificialBlocks); MBBJoined |= Visited.insert(MBB).second; if (MBBJoined) { MBBJoined = false; @@ -1579,7 +1910,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { // examine spill, copy and restore instructions to see whether they // operate with registers that correspond to user variables. // First load any pending inlocs. - OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs); + OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, InLocs), VarLocIDs); for (auto &MI : *MBB) process(MI, OpenRanges, VarLocIDs, Transfers); OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs); @@ -1606,6 +1937,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { // Add any DBG_VALUE instructions created by location transfers. for (auto &TR : Transfers) { + assert(!TR.TransferInst->isTerminator() && + "Cannot insert DBG_VALUE after terminator"); MachineBasicBlock *MBB = TR.TransferInst->getParent(); const VarLoc &VL = VarLocIDs[TR.LocationID]; MachineInstr *MI = VL.BuildDbgValue(MF); @@ -1615,7 +1948,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { // Deferred inlocs will not have had any DBG_VALUE insts created; do // that now. - flushPendingLocs(PendingInLocs, VarLocIDs); + flushPendingLocs(InLocs, VarLocIDs); LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs())); LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs())); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp index 5b20a2482b7b..158e873370b1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -96,46 +96,49 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) { enum : unsigned { UndefLocNo = ~0U }; -/// Describes a location by number along with some flags about the original -/// usage of the location. -class DbgValueLocation { +/// Describes a debug variable value by location number and expression along +/// with some flags about the original usage of the location. +class DbgVariableValue { public: - DbgValueLocation(unsigned LocNo, bool WasIndirect) - : LocNo(LocNo), WasIndirect(WasIndirect) { - static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing"); - assert(locNo() == LocNo && "location truncation"); + DbgVariableValue(unsigned LocNo, bool WasIndirect, + const DIExpression &Expression) + : LocNo(LocNo), WasIndirect(WasIndirect), Expression(&Expression) { + assert(getLocNo() == LocNo && "location truncation"); } - DbgValueLocation() : LocNo(0), WasIndirect(0) {} + DbgVariableValue() : LocNo(0), WasIndirect(0) {} - unsigned locNo() const { + const DIExpression *getExpression() const { return Expression; } + unsigned getLocNo() const { // Fix up the undef location number, which gets truncated. return LocNo == INT_MAX ? UndefLocNo : LocNo; } - bool wasIndirect() const { return WasIndirect; } - bool isUndef() const { return locNo() == UndefLocNo; } + bool getWasIndirect() const { return WasIndirect; } + bool isUndef() const { return getLocNo() == UndefLocNo; } - DbgValueLocation changeLocNo(unsigned NewLocNo) const { - return DbgValueLocation(NewLocNo, WasIndirect); + DbgVariableValue changeLocNo(unsigned NewLocNo) const { + return DbgVariableValue(NewLocNo, WasIndirect, *Expression); } - friend inline bool operator==(const DbgValueLocation &LHS, - const DbgValueLocation &RHS) { - return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect; + friend inline bool operator==(const DbgVariableValue &LHS, + const DbgVariableValue &RHS) { + return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect && + LHS.Expression == RHS.Expression; } - friend inline bool operator!=(const DbgValueLocation &LHS, - const DbgValueLocation &RHS) { + friend inline bool operator!=(const DbgVariableValue &LHS, + const DbgVariableValue &RHS) { return !(LHS == RHS); } private: unsigned LocNo : 31; unsigned WasIndirect : 1; + const DIExpression *Expression = nullptr; }; -/// Map of where a user value is live, and its location. -using LocMap = IntervalMap<SlotIndex, DbgValueLocation, 4>; +/// Map of where a user value is live to that value. +using LocMap = IntervalMap<SlotIndex, DbgVariableValue, 4>; /// Map of stack slot offsets for spilled locations. /// Non-spilled locations are not added to the map. @@ -151,12 +154,12 @@ class LDVImpl; /// holds part of a user variable. The part is identified by a byte offset. /// /// UserValues are grouped into equivalence classes for easier searching. Two -/// user values are related if they refer to the same variable, or if they are -/// held by the same virtual register. The equivalence class is the transitive -/// closure of that relation. +/// user values are related if they are held by the same virtual register. The +/// equivalence class is the transitive closure of that relation. class UserValue { const DILocalVariable *Variable; ///< The debug info variable we are part of. - const DIExpression *Expression; ///< Any complex address expression. + /// The part of the variable we describe. + const Optional<DIExpression::FragmentInfo> Fragment; DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. UserValue *leader; ///< Equivalence class leader. @@ -172,23 +175,24 @@ class UserValue { /// lexical scope. SmallSet<SlotIndex, 2> trimmedDefs; - /// Insert a DBG_VALUE into MBB at Idx for LocNo. + /// Insert a DBG_VALUE into MBB at Idx for DbgValue. void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, - SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled, - unsigned SpillOffset, LiveIntervals &LIS, + SlotIndex StopIdx, DbgVariableValue DbgValue, + bool Spilled, unsigned SpillOffset, LiveIntervals &LIS, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI); /// Replace OldLocNo ranges with NewRegs ranges where NewRegs /// is live. Returns true if any changes were made. - bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + bool splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs, LiveIntervals &LIS); public: /// Create a new UserValue. - UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L, + UserValue(const DILocalVariable *var, + Optional<DIExpression::FragmentInfo> Fragment, DebugLoc L, LocMap::Allocator &alloc) - : Variable(var), Expression(expr), dl(std::move(L)), leader(this), + : Variable(var), Fragment(Fragment), dl(std::move(L)), leader(this), locInts(alloc) {} /// Get the leader of this value's equivalence class. @@ -202,14 +206,6 @@ public: /// Return the next UserValue in the equivalence class. UserValue *getNext() const { return next; } - /// Does this UserValue match the parameters? - bool match(const DILocalVariable *Var, const DIExpression *Expr, - const DILocation *IA) const { - // FIXME: The fragment should be part of the equivalence class, but not - // other things in the expression like stack values. - return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA; - } - /// Merge equivalence classes. static UserValue *merge(UserValue *L1, UserValue *L2) { L2 = L2->getLeader(); @@ -267,33 +263,34 @@ public: void removeLocationIfUnused(unsigned LocNo) { // Bail out if LocNo still is used. for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { - DbgValueLocation Loc = I.value(); - if (Loc.locNo() == LocNo) + DbgVariableValue DbgValue = I.value(); + if (DbgValue.getLocNo() == LocNo) return; } // Remove the entry in the locations vector, and adjust all references to // location numbers above the removed entry. locations.erase(locations.begin() + LocNo); for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { - DbgValueLocation Loc = I.value(); - if (!Loc.isUndef() && Loc.locNo() > LocNo) - I.setValueUnchecked(Loc.changeLocNo(Loc.locNo() - 1)); + DbgVariableValue DbgValue = I.value(); + if (!DbgValue.isUndef() && DbgValue.getLocNo() > LocNo) + I.setValueUnchecked(DbgValue.changeLocNo(DbgValue.getLocNo() - 1)); } } /// Ensure that all virtual register locations are mapped. void mapVirtRegs(LDVImpl *LDV); - /// Add a definition point to this value. - void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) { - DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect); - // Add a singular (Idx,Idx) -> Loc mapping. + /// Add a definition point to this user value. + void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect, + const DIExpression &Expr) { + DbgVariableValue DbgValue(getLocationNo(LocMO), IsIndirect, Expr); + // Add a singular (Idx,Idx) -> value mapping. LocMap::iterator I = locInts.find(Idx); if (!I.valid() || I.start() != Idx) - I.insert(Idx, Idx.getNextSlot(), Loc); + I.insert(Idx, Idx.getNextSlot(), DbgValue); else // A later DBG_VALUE at the same SlotIndex overrides the old location. - I.setValue(Loc); + I.setValue(DbgValue); } /// Extend the current definition as far as possible down. @@ -305,29 +302,27 @@ public: /// data-flow analysis to propagate them beyond basic block boundaries. /// /// \param Idx Starting point for the definition. - /// \param Loc Location number to propagate. + /// \param DbgValue value to propagate. /// \param LR Restrict liveness to where LR has the value VNI. May be null. /// \param VNI When LR is not null, this is the value to restrict to. /// \param [out] Kills Append end points of VNI's live range to Kills. /// \param LIS Live intervals analysis. - void extendDef(SlotIndex Idx, DbgValueLocation Loc, - LiveRange *LR, const VNInfo *VNI, - SmallVectorImpl<SlotIndex> *Kills, + void extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *LR, + const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS); - /// The value in LI/LocNo may be copies to other registers. Determine if + /// The value in LI may be copies to other registers. Determine if /// any of the copies are available at the kill points, and add defs if /// possible. /// /// \param LI Scan for copies of the value in LI->reg. - /// \param LocNo Location number of LI->reg. - /// \param WasIndirect Indicates if the original use of LI->reg was indirect - /// \param Kills Points where the range of LocNo could be extended. - /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here. + /// \param DbgValue Location number of LI->reg, and DIExpression. + /// \param Kills Points where the range of DbgValue could be extended. + /// \param [in,out] NewDefs Append (Idx, DbgValue) of inserted defs here. void addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, bool WasIndirect, + LiveInterval *LI, DbgVariableValue DbgValue, const SmallVectorImpl<SlotIndex> &Kills, - SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs, + SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS); /// Compute the live intervals of all locations after collecting all their @@ -337,7 +332,7 @@ public: /// Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. - bool splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + bool splitRegister(Register OldReg, ArrayRef<Register> NewRegs, LiveIntervals &LIS); /// Rewrite virtual register locations according to the provided virtual @@ -377,7 +372,7 @@ public: : Label(label), dl(std::move(L)), loc(Idx) {} /// Does this UserLabel match the parameters? - bool match(const DILabel *L, const DILocation *IA, + bool matches(const DILabel *L, const DILocation *IA, const SlotIndex Index) const { return Label == L && dl->getInlinedAt() == IA && loc == Index; } @@ -415,16 +410,17 @@ class LDVImpl { using VRMap = DenseMap<unsigned, UserValue *>; VRMap virtRegToEqClass; - /// Map user variable to eq class leader. - using UVMap = DenseMap<const DILocalVariable *, UserValue *>; + /// Map to find existing UserValue instances. + using UVMap = DenseMap<DebugVariable, UserValue *>; UVMap userVarMap; /// Find or create a UserValue. - UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr, + UserValue *getUserValue(const DILocalVariable *Var, + Optional<DIExpression::FragmentInfo> Fragment, const DebugLoc &DL); /// Find the EC leader for VirtReg or null. - UserValue *lookupVirtReg(unsigned VirtReg); + UserValue *lookupVirtReg(Register VirtReg); /// Add DBG_VALUE instruction to our maps. /// @@ -474,10 +470,10 @@ public: } /// Map virtual register to an equivalence class. - void mapVirtReg(unsigned VirtReg, UserValue *EC); + void mapVirtReg(Register VirtReg, UserValue *EC); /// Replace all references to OldReg with NewRegs. - void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs); + void splitRegister(Register OldReg, ArrayRef<Register> NewRegs); /// Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); @@ -544,8 +540,8 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { if (I.value().isUndef()) OS << "undef"; else { - OS << I.value().locNo(); - if (I.value().wasIndirect()) + OS << I.value().getLocNo(); + if (I.value().getWasIndirect()) OS << " ind"; } } @@ -583,30 +579,27 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { } UserValue *LDVImpl::getUserValue(const DILocalVariable *Var, - const DIExpression *Expr, const DebugLoc &DL) { - UserValue *&Leader = userVarMap[Var]; - if (Leader) { - UserValue *UV = Leader->getLeader(); - Leader = UV; - for (; UV; UV = UV->getNext()) - if (UV->match(Var, Expr, DL->getInlinedAt())) - return UV; + Optional<DIExpression::FragmentInfo> Fragment, + const DebugLoc &DL) { + // FIXME: Handle partially overlapping fragments. See + // https://reviews.llvm.org/D70121#1849741. + DebugVariable ID(Var, Fragment, DL->getInlinedAt()); + UserValue *&UV = userVarMap[ID]; + if (!UV) { + userValues.push_back( + std::make_unique<UserValue>(Var, Fragment, DL, allocator)); + UV = userValues.back().get(); } - - userValues.push_back( - std::make_unique<UserValue>(Var, Expr, DL, allocator)); - UserValue *UV = userValues.back().get(); - Leader = UserValue::merge(Leader, UV); return UV; } -void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { +void LDVImpl::mapVirtReg(Register VirtReg, UserValue *EC) { assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs"); UserValue *&Leader = virtRegToEqClass[VirtReg]; Leader = UserValue::merge(Leader, EC); } -UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { +UserValue *LDVImpl::lookupVirtReg(Register VirtReg) { if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) return UV->getLeader(); return nullptr; @@ -615,8 +608,8 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // DBG_VALUE loc, offset, variable if (MI.getNumOperands() != 4 || - !(MI.getOperand(1).isReg() || MI.getOperand(1).isImm()) || - !MI.getOperand(2).isMetadata()) { + !(MI.getDebugOffset().isReg() || MI.getDebugOffset().isImm()) || + !MI.getDebugVariableOp().isMetadata()) { LLVM_DEBUG(dbgs() << "Can't handle " << MI); return false; } @@ -629,9 +622,9 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // (and if the machine verifier is improved to catch this), then these checks // could be removed or replaced by asserts. bool Discard = false; - if (MI.getOperand(0).isReg() && - Register::isVirtualRegister(MI.getOperand(0).getReg())) { - const Register Reg = MI.getOperand(0).getReg(); + if (MI.getDebugOperand(0).isReg() && + Register::isVirtualRegister(MI.getDebugOperand(0).getReg())) { + const Register Reg = MI.getDebugOperand(0).getReg(); if (!LIS->hasInterval(Reg)) { // The DBG_VALUE is described by a virtual register that does not have a // live interval. Discard the DBG_VALUE. @@ -655,19 +648,19 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { } // Get or create the UserValue for (variable,offset) here. - bool IsIndirect = MI.getOperand(1).isImm(); + bool IsIndirect = MI.isDebugOffsetImm(); if (IsIndirect) - assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); + assert(MI.getDebugOffset().getImm() == 0 && + "DBG_VALUE with nonzero offset"); const DILocalVariable *Var = MI.getDebugVariable(); const DIExpression *Expr = MI.getDebugExpression(); - UserValue *UV = - getUserValue(Var, Expr, MI.getDebugLoc()); + UserValue *UV = getUserValue(Var, Expr->getFragmentInfo(), MI.getDebugLoc()); if (!Discard) - UV->addDef(Idx, MI.getOperand(0), IsIndirect); + UV->addDef(Idx, MI.getDebugOperand(0), IsIndirect, *Expr); else { MachineOperand MO = MachineOperand::CreateReg(0U, false); MO.setIsDebug(); - UV->addDef(Idx, MO, false); + UV->addDef(Idx, MO, false, *Expr); } return true; } @@ -684,7 +677,7 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { const DebugLoc &DL = MI.getDebugLoc(); bool Found = false; for (auto const &L : userLabels) { - if (L->match(Label, DL->getInlinedAt(), Idx)) { + if (L->matches(Label, DL->getInlinedAt(), Idx)) { Found = true; break; } @@ -730,7 +723,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { return Changed; } -void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, +void UserValue::extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS) { SlotIndex Start = Idx; @@ -757,7 +750,7 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, if (I.valid() && I.start() <= Start) { // Stop when meeting a different location or an already extended interval. Start = Start.getNextSlot(); - if (I.value() != Loc || I.stop() != Start) + if (I.value() != DbgValue || I.stop() != Start) return; // This is a one-slot placeholder. Just skip it. ++I; @@ -771,13 +764,13 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, Kills->push_back(Stop); if (Start < Stop) - I.insert(Start, Stop, Loc); + I.insert(Start, Stop, DbgValue); } void UserValue::addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, bool WasIndirect, + LiveInterval *LI, DbgVariableValue DbgValue, const SmallVectorImpl<SlotIndex> &Kills, - SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs, + SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS) { if (Kills.empty()) return; @@ -801,11 +794,11 @@ void UserValue::addDefsFromCopies( if (!Register::isVirtualRegister(DstReg)) continue; - // Is LocNo extended to reach this copy? If not, another def may be blocking - // it, or we are looking at a wrong value of LI. + // Is the value extended to reach this copy? If not, another def may be + // blocking it, or we are looking at a wrong value of LI. SlotIndex Idx = LIS.getInstructionIndex(*MI); LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); - if (!I.valid() || I.value().locNo() != LocNo) + if (!I.valid() || I.value() != DbgValue) continue; if (!LIS.hasInterval(DstReg)) @@ -839,9 +832,9 @@ void UserValue::addDefsFromCopies( MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); - DbgValueLocation NewLoc(LocNo, WasIndirect); - I.insert(Idx, Idx.getNextSlot(), NewLoc); - NewDefs.push_back(std::make_pair(Idx, NewLoc)); + DbgVariableValue NewValue = DbgValue.changeLocNo(LocNo); + I.insert(Idx, Idx.getNextSlot(), NewValue); + NewDefs.push_back(std::make_pair(Idx, NewValue)); break; } } @@ -850,7 +843,7 @@ void UserValue::addDefsFromCopies( void UserValue::computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, LiveIntervals &LIS, LexicalScopes &LS) { - SmallVector<std::pair<SlotIndex, DbgValueLocation>, 16> Defs; + SmallVector<std::pair<SlotIndex, DbgVariableValue>, 16> Defs; // Collect all defs to be extended (Skipping undefs). for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) @@ -860,11 +853,11 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // Extend all defs, and possibly add new ones along the way. for (unsigned i = 0; i != Defs.size(); ++i) { SlotIndex Idx = Defs[i].first; - DbgValueLocation Loc = Defs[i].second; - const MachineOperand &LocMO = locations[Loc.locNo()]; + DbgVariableValue DbgValue = Defs[i].second; + const MachineOperand &LocMO = locations[DbgValue.getLocNo()]; if (!LocMO.isReg()) { - extendDef(Idx, Loc, nullptr, nullptr, nullptr, LIS); + extendDef(Idx, DbgValue, nullptr, nullptr, nullptr, LIS); continue; } @@ -877,7 +870,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, VNI = LI->getVNInfoAt(Idx); } SmallVector<SlotIndex, 16> Kills; - extendDef(Idx, Loc, LI, VNI, &Kills, LIS); + extendDef(Idx, DbgValue, LI, VNI, &Kills, LIS); // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that // if the original location for example is %vreg0:sub_hi, and we find a // full register copy in addDefsFromCopies (at the moment it only handles @@ -887,8 +880,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // sub-register in that regclass). For now, simply skip handling copies if // a sub-register is involved. if (LI && !LocMO.getSubReg()) - addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI, - LIS); + addDefsFromCopies(LI, DbgValue, Kills, Defs, MRI, LIS); continue; } @@ -930,7 +922,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // I.stop() >= PrevEnd. Check for overlap. if (PrevEnd && I.start() < PrevEnd) { SlotIndex IStop = I.stop(); - DbgValueLocation Loc = I.value(); + DbgVariableValue DbgValue = I.value(); // Stop overlaps previous end - trim the end of the interval to // the scope range. @@ -941,7 +933,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // current) range create a new interval for the remainder (which // may be further trimmed). if (RStart < IStop) - I.insert(RStart, IStop, Loc); + I.insert(RStart, IStop, DbgValue); } // Advance I so that I.stop() >= RStart, and check for overlap. @@ -1038,7 +1030,7 @@ LiveDebugVariables::~LiveDebugVariables() { //===----------------------------------------------------------------------===// bool -UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, +UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs, LiveIntervals& LIS) { LLVM_DEBUG({ dbgs() << "Splitting Loc" << OldLocNo << '\t'; @@ -1068,7 +1060,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, break; // Now LII->end > LocMapI.start(). Do we have an overlap? - if (LocMapI.value().locNo() == OldLocNo && LII->start < LocMapI.stop()) { + if (LocMapI.value().getLocNo() == OldLocNo && + LII->start < LocMapI.stop()) { // Overlapping correct location. Allocate NewLocNo now. if (NewLocNo == UndefLocNo) { MachineOperand MO = MachineOperand::CreateReg(LI->reg, false); @@ -1078,8 +1071,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, } SlotIndex LStart = LocMapI.start(); - SlotIndex LStop = LocMapI.stop(); - DbgValueLocation OldLoc = LocMapI.value(); + SlotIndex LStop = LocMapI.stop(); + DbgVariableValue OldDbgValue = LocMapI.value(); // Trim LocMapI down to the LII overlap. if (LStart < LII->start) @@ -1088,17 +1081,17 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, LocMapI.setStopUnchecked(LII->end); // Change the value in the overlap. This may trigger coalescing. - LocMapI.setValue(OldLoc.changeLocNo(NewLocNo)); + LocMapI.setValue(OldDbgValue.changeLocNo(NewLocNo)); - // Re-insert any removed OldLocNo ranges. + // Re-insert any removed OldDbgValue ranges. if (LStart < LocMapI.start()) { - LocMapI.insert(LStart, LocMapI.start(), OldLoc); + LocMapI.insert(LStart, LocMapI.start(), OldDbgValue); ++LocMapI; assert(LocMapI.valid() && "Unexpected coalescing"); } if (LStop > LocMapI.stop()) { ++LocMapI; - LocMapI.insert(LII->end, LStop, OldLoc); + LocMapI.insert(LII->end, LStop, OldDbgValue); --LocMapI; } } @@ -1124,6 +1117,9 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, // register to the spill slot). So for a while we can have locations that map // to virtual registers that have been removed from both the MachineFunction // and from LiveIntervals. + // + // We may also just be using the location for a value with a different + // expression. removeLocationIfUnused(OldLocNo); LLVM_DEBUG({ @@ -1134,7 +1130,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, } bool -UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, +UserValue::splitRegister(Register OldReg, ArrayRef<Register> NewRegs, LiveIntervals &LIS) { bool DidChange = false; // Split locations referring to OldReg. Iterate backwards so splitLocation can @@ -1149,7 +1145,7 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, return DidChange; } -void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) { +void LDVImpl::splitRegister(Register OldReg, ArrayRef<Register> NewRegs) { bool DidChange = false; for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); @@ -1164,7 +1160,7 @@ void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) { } void LiveDebugVariables:: -splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) { +splitRegister(Register OldReg, ArrayRef<Register> NewRegs, LiveIntervals &LIS) { if (pImpl) static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs); } @@ -1242,13 +1238,13 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF, // DBG_VALUE intervals with different vregs that were allocated to the same // physical register. for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { - DbgValueLocation Loc = I.value(); + DbgVariableValue DbgValue = I.value(); // Undef values don't exist in locations (and thus not in LocNoMap either) // so skip over them. See getLocationNo(). - if (Loc.isUndef()) + if (DbgValue.isUndef()) continue; - unsigned NewLocNo = LocNoMap[Loc.locNo()]; - I.setValueUnchecked(Loc.changeLocNo(NewLocNo)); + unsigned NewLocNo = LocNoMap[DbgValue.getLocNo()]; + I.setValueUnchecked(DbgValue.changeLocNo(NewLocNo)); I.setStart(I.start()); } } @@ -1302,7 +1298,7 @@ findNextInsertLocation(MachineBasicBlock *MBB, } void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, - SlotIndex StopIdx, DbgValueLocation Loc, + SlotIndex StopIdx, DbgVariableValue DbgValue, bool Spilled, unsigned SpillOffset, LiveIntervals &LIS, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { @@ -1312,12 +1308,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS); // Undef values don't exist in locations so create new "noreg" register MOs // for them. See getLocationNo(). - MachineOperand MO = !Loc.isUndef() ? - locations[Loc.locNo()] : - MachineOperand::CreateReg(/* Reg */ 0, /* isDef */ false, /* isImp */ false, - /* isKill */ false, /* isDead */ false, - /* isUndef */ false, /* isEarlyClobber */ false, - /* SubReg */ 0, /* isDebug */ true); + MachineOperand MO = + !DbgValue.isUndef() + ? locations[DbgValue.getLocNo()] + : MachineOperand::CreateReg( + /* Reg */ 0, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true); ++NumInsertedDebugValues; @@ -1329,9 +1327,9 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, // original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate // that the original virtual register was a pointer. Also, add the stack slot // offset for the spilled register to the expression. - const DIExpression *Expr = Expression; + const DIExpression *Expr = DbgValue.getExpression(); uint8_t DIExprFlags = DIExpression::ApplyOffset; - bool IsIndirect = Loc.wasIndirect(); + bool IsIndirect = DbgValue.getWasIndirect(); if (Spilled) { if (IsIndirect) DIExprFlags |= DIExpression::DerefAfter; @@ -1370,9 +1368,9 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, for (LocMap::const_iterator I = locInts.begin(); I.valid();) { SlotIndex Start = I.start(); SlotIndex Stop = I.stop(); - DbgValueLocation Loc = I.value(); - auto SpillIt = - !Loc.isUndef() ? SpillOffsets.find(Loc.locNo()) : SpillOffsets.end(); + DbgVariableValue DbgValue = I.value(); + auto SpillIt = !DbgValue.isUndef() ? SpillOffsets.find(DbgValue.getLocNo()) + : SpillOffsets.end(); bool Spilled = SpillIt != SpillOffsets.end(); unsigned SpillOffset = Spilled ? SpillIt->second : 0; @@ -1382,13 +1380,14 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, if (trimmedDefs.count(Start)) Start = Start.getPrevIndex(); - LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo()); + LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop + << "):" << DbgValue.getLocNo()); MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII, - TRI); + insertDebugValue(&*MBB, Start, Stop, DbgValue, Spilled, SpillOffset, LIS, + TII, TRI); // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. while (Stop > MBBEnd) { @@ -1398,8 +1397,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, break; MBBEnd = LIS.getMBBEndIdx(&*MBB); LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII, - TRI); + insertDebugValue(&*MBB, Start, Stop, DbgValue, Spilled, SpillOffset, LIS, + TII, TRI); } LLVM_DEBUG(dbgs() << '\n'); if (MBB == MFEnd) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h index 0cbe10c6a422..74e738ec3e56 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h @@ -41,7 +41,7 @@ public: /// splitRegister - Move any user variables in OldReg to the live ranges in /// NewRegs where they are live. Mark the values as unavailable where no new /// register is live. - void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + void splitRegister(Register OldReg, ArrayRef<Register> NewRegs, LiveIntervals &LIS); /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp new file mode 100644 index 000000000000..30c2d74a71c5 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp @@ -0,0 +1,205 @@ +//===- LiveIntervalCalc.cpp - Calculate live interval --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the LiveIntervalCalc class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveIntervalCalc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <tuple> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +// Reserve an address that indicates a value that is known to be "undef". +static VNInfo UndefVNI(0xbad, SlotIndex()); + +static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, + LiveRange &LR, const MachineOperand &MO) { + const MachineInstr &MI = *MO.getParent(); + SlotIndex DefIdx = + Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber()); + + // Create the def in LR. This may find an existing def. + LR.createDeadDef(DefIdx, Alloc); +} + +void LiveIntervalCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { + const MachineRegisterInfo *MRI = getRegInfo(); + SlotIndexes *Indexes = getIndexes(); + VNInfo::Allocator *Alloc = getVNAlloc(); + + assert(MRI && Indexes && "call reset() first"); + + // Step 1: Create minimal live segments for every definition of Reg. + // Visit all def operands. If the same instruction has multiple defs of Reg, + // createDeadDef() will deduplicate. + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); + unsigned Reg = LI.reg; + for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { + if (!MO.isDef() && !MO.readsReg()) + continue; + + unsigned SubReg = MO.getSubReg(); + if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) { + LaneBitmask SubMask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); + // If this is the first time we see a subregister def, initialize + // subranges by creating a copy of the main range. + if (!LI.hasSubRanges() && !LI.empty()) { + LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg); + LI.createSubRangeFrom(*Alloc, ClassMask, LI); + } + + LI.refineSubRanges( + *Alloc, SubMask, + [&MO, Indexes, Alloc](LiveInterval::SubRange &SR) { + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, SR, MO); + }, + *Indexes, TRI); + } + + // Create the def in the main liverange. We do not have to do this if + // subranges are tracked as we recreate the main range later in this case. + if (MO.isDef() && !LI.hasSubRanges()) + createDeadDef(*Indexes, *Alloc, LI, MO); + } + + // We may have created empty live ranges for partially undefined uses, we + // can't keep them because we won't find defs in them later. + LI.removeEmptySubRanges(); + + const MachineFunction *MF = getMachineFunction(); + MachineDominatorTree *DomTree = getDomTree(); + // Step 2: Extend live segments to all uses, constructing SSA form as + // necessary. + if (LI.hasSubRanges()) { + for (LiveInterval::SubRange &S : LI.subranges()) { + LiveIntervalCalc SubLIC; + SubLIC.reset(MF, Indexes, DomTree, Alloc); + SubLIC.extendToUses(S, Reg, S.LaneMask, &LI); + } + LI.clear(); + constructMainRangeFromSubranges(LI); + } else { + resetLiveOutMap(); + extendToUses(LI, Reg, LaneBitmask::getAll()); + } +} + +void LiveIntervalCalc::constructMainRangeFromSubranges(LiveInterval &LI) { + // First create dead defs at all defs found in subranges. + LiveRange &MainRange = LI; + assert(MainRange.segments.empty() && MainRange.valnos.empty() && + "Expect empty main liverange"); + + VNInfo::Allocator *Alloc = getVNAlloc(); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + for (const VNInfo *VNI : SR.valnos) { + if (!VNI->isUnused() && !VNI->isPHIDef()) + MainRange.createDeadDef(VNI->def, *Alloc); + } + } + resetLiveOutMap(); + extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI); +} + +void LiveIntervalCalc::createDeadDefs(LiveRange &LR, Register Reg) { + const MachineRegisterInfo *MRI = getRegInfo(); + SlotIndexes *Indexes = getIndexes(); + VNInfo::Allocator *Alloc = getVNAlloc(); + assert(MRI && Indexes && "call reset() first"); + + // Visit all def operands. If the same instruction has multiple defs of Reg, + // LR.createDeadDef() will deduplicate. + for (MachineOperand &MO : MRI->def_operands(Reg)) + createDeadDef(*Indexes, *Alloc, LR, MO); +} + +void LiveIntervalCalc::extendToUses(LiveRange &LR, Register Reg, + LaneBitmask Mask, LiveInterval *LI) { + const MachineRegisterInfo *MRI = getRegInfo(); + SlotIndexes *Indexes = getIndexes(); + SmallVector<SlotIndex, 4> Undefs; + if (LI != nullptr) + LI->computeSubRangeUndefs(Undefs, Mask, *MRI, *Indexes); + + // Visit all operands that read Reg. This may include partial defs. + bool IsSubRange = !Mask.all(); + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); + for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { + // Clear all kill flags. They will be reinserted after register allocation + // by LiveIntervals::addKillFlags(). + if (MO.isUse()) + MO.setIsKill(false); + // MO::readsReg returns "true" for subregister defs. This is for keeping + // liveness of the entire register (i.e. for the main range of the live + // interval). For subranges, definitions of non-overlapping subregisters + // do not count as uses. + if (!MO.readsReg() || (IsSubRange && MO.isDef())) + continue; + + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0) { + LaneBitmask SLM = TRI.getSubRegIndexLaneMask(SubReg); + if (MO.isDef()) + SLM = ~SLM; + // Ignore uses not reading the current (sub)range. + if ((SLM & Mask).none()) + continue; + } + + // Determine the actual place of the use. + const MachineInstr *MI = MO.getParent(); + unsigned OpNo = (&MO - &MI->getOperand(0)); + SlotIndex UseIdx; + if (MI->isPHI()) { + assert(!MO.isDef() && "Cannot handle PHI def of partial register."); + // The actual place where a phi operand is used is the end of the pred + // MBB. PHI operands are paired: (Reg, PredMBB). + UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo + 1).getMBB()); + } else { + // Check for early-clobber redefs. + bool isEarlyClobber = false; + unsigned DefIdx; + if (MO.isDef()) + isEarlyClobber = MO.isEarlyClobber(); + else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) { + // FIXME: This would be a lot easier if tied early-clobber uses also + // had an early-clobber flag. + isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber(); + } + UseIdx = Indexes->getInstructionIndex(*MI).getRegSlot(isEarlyClobber); + } + + // MI is reading Reg. We may have visited MI before if it happens to be + // reading Reg multiple times. That is OK, extend() is idempotent. + extend(LR, UseIdx, Reg, Undefs); + } +}
\ No newline at end of file diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp index 9c80282bc59e..e8ee0599e1a2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp @@ -21,7 +21,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveRangeCalc.h" +#include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -101,9 +101,7 @@ LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); } -LiveIntervals::~LiveIntervals() { - delete LRCalc; -} +LiveIntervals::~LiveIntervals() { delete LICalc; } void LiveIntervals::releaseMemory() { // Free the live intervals themselves. @@ -131,8 +129,8 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); - if (!LRCalc) - LRCalc = new LiveRangeCalc(); + if (!LICalc) + LICalc = new LiveIntervalCalc(); // Allocate space for all virtual registers. VirtRegIntervals.resize(MRI->getNumVirtRegs()); @@ -192,10 +190,10 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { /// Compute the live interval of a virtual register, based on defs and uses. bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { - assert(LRCalc && "LRCalc not initialized."); + assert(LICalc && "LICalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); - LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); + LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); return computeDeadValues(LI, nullptr); } @@ -266,8 +264,8 @@ void LiveIntervals::computeRegMasks() { /// aliasing registers. The range should be empty, or contain only dead /// phi-defs from ABI blocks. void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { - assert(LRCalc && "LRCalc not initialized."); - LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + assert(LICalc && "LICalc not initialized."); + LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); // The physregs aliasing Unit are the roots and their super-registers. // Create all values as dead defs before extending to uses. Note that roots @@ -281,7 +279,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { Super.isValid(); ++Super) { unsigned Reg = *Super; if (!MRI->reg_empty(Reg)) - LRCalc->createDeadDefs(LR, Reg); + LICalc->createDeadDefs(LR, Reg); // A register unit is considered reserved if all its roots and all their // super registers are reserved. if (!MRI->isReserved(Reg)) @@ -300,7 +298,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { Super.isValid(); ++Super) { unsigned Reg = *Super; if (!MRI->reg_empty(Reg)) - LRCalc->extendToUses(LR, Reg); + LICalc->extendToUses(LR, Reg); } } } @@ -623,10 +621,10 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { void LiveIntervals::extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices, ArrayRef<SlotIndex> Undefs) { - assert(LRCalc && "LRCalc not initialized."); - LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + assert(LICalc && "LICalc not initialized."); + LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); for (SlotIndex Idx : Indices) - LRCalc->extend(LR, Idx, /*PhysReg=*/0, Undefs); + LICalc->extend(LR, Idx, /*PhysReg=*/0, Undefs); } void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, @@ -1013,6 +1011,20 @@ public: } } updateRange(LI, Reg, LaneBitmask::getNone()); + // If main range has a hole and we are moving a subrange use across + // the hole updateRange() cannot properly handle it since it only + // gets the LiveRange and not the whole LiveInterval. As a result + // we may end up with a main range not covering all subranges. + // This is extremely rare case, so let's check and reconstruct the + // main range. + for (LiveInterval::SubRange &S : LI.subranges()) { + if (LI.covers(S)) + continue; + LI.clear(); + LIS.constructMainRangeFromSubranges(LI); + break; + } + continue; } @@ -1344,7 +1356,7 @@ private: OldIdxOut->start = NewIdxDef; OldIdxVNI->def = NewIdxDef; if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdx, OldIdxIn->end)) - OldIdxIn->end = NewIdx.getRegSlot(); + OldIdxIn->end = NewIdxDef; } } else if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdxOut->start, NewIdx) @@ -1480,13 +1492,43 @@ void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) { HME.updateAllRanges(&MI); } -void LiveIntervals::handleMoveIntoBundle(MachineInstr &MI, - MachineInstr &BundleStart, - bool UpdateFlags) { - SlotIndex OldIndex = Indexes->getInstructionIndex(MI); - SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); - HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); - HME.updateAllRanges(&MI); +void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart, + bool UpdateFlags) { + assert((BundleStart.getOpcode() == TargetOpcode::BUNDLE) && + "Bundle start is not a bundle"); + SmallVector<SlotIndex, 16> ToProcess; + const SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(BundleStart); + auto BundleEnd = getBundleEnd(BundleStart.getIterator()); + + auto I = BundleStart.getIterator(); + I++; + while (I != BundleEnd) { + if (!Indexes->hasIndex(*I)) + continue; + SlotIndex OldIndex = Indexes->getInstructionIndex(*I, true); + ToProcess.push_back(OldIndex); + Indexes->removeMachineInstrFromMaps(*I, true); + I++; + } + for (SlotIndex OldIndex : ToProcess) { + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(&BundleStart); + } + + // Fix up dead defs + const SlotIndex Index = getInstructionIndex(BundleStart); + for (unsigned Idx = 0, E = BundleStart.getNumOperands(); Idx != E; ++Idx) { + MachineOperand &MO = BundleStart.getOperand(Idx); + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (Reg.isVirtual() && hasInterval(Reg) && !MO.isUndef()) { + LiveInterval &LI = getInterval(Reg); + LiveQueryResult LRQ = LI.Query(Index); + if (LRQ.isDeadDef()) + MO.setIsDead(); + } + } } void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, @@ -1587,7 +1629,7 @@ void LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, - ArrayRef<unsigned> OrigRegs) { + ArrayRef<Register> OrigRegs) { // Find anchor points, which are at the beginning/end of blocks or at // instructions that already have indexes. while (Begin != MBB->begin() && !Indexes->hasIndex(*Begin)) @@ -1618,8 +1660,8 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, } } - for (unsigned Reg : OrigRegs) { - if (!Register::isVirtualRegister(Reg)) + for (Register Reg : OrigRegs) { + if (!Reg.isVirtual()) continue; LiveInterval &LI = getInterval(Reg); @@ -1678,7 +1720,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, } void LiveIntervals::constructMainRangeFromSubranges(LiveInterval &LI) { - assert(LRCalc && "LRCalc not initialized."); - LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->constructMainRangeFromSubranges(LI); + assert(LICalc && "LICalc not initialized."); + LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LICalc->constructMainRangeFromSubranges(LI); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp index 7a5cffca3470..547970e7ab5d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -276,6 +276,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); // We walk through the block backwards and start with the live outs. LivePhysRegs LiveRegs; @@ -294,6 +295,18 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { assert(Register::isPhysicalRegister(Reg)); bool IsNotLive = LiveRegs.available(MRI, Reg); + + // Special-case return instructions for cases when a return is not + // the last instruction in the block. + if (MI.isReturn() && MFI.isCalleeSavedInfoValid()) { + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { + if (Info.getReg() == Reg) { + IsNotLive = !Info.isRestored(); + break; + } + } + } + MO->setIsDead(IsNotLive); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp index 24b57be0da00..e9c9b70d29a9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -1,4 +1,4 @@ -//===- LiveRangeCalc.cpp - Calculate live ranges --------------------------===// +//===- LiveRangeCalc.cpp - Calculate live ranges -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -61,158 +61,6 @@ void LiveRangeCalc::reset(const MachineFunction *mf, LiveIn.clear(); } -static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, - LiveRange &LR, const MachineOperand &MO) { - const MachineInstr &MI = *MO.getParent(); - SlotIndex DefIdx = - Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber()); - - // Create the def in LR. This may find an existing def. - LR.createDeadDef(DefIdx, Alloc); -} - -void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { - assert(MRI && Indexes && "call reset() first"); - - // Step 1: Create minimal live segments for every definition of Reg. - // Visit all def operands. If the same instruction has multiple defs of Reg, - // createDeadDef() will deduplicate. - const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - unsigned Reg = LI.reg; - for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { - if (!MO.isDef() && !MO.readsReg()) - continue; - - unsigned SubReg = MO.getSubReg(); - if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) { - LaneBitmask SubMask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) - : MRI->getMaxLaneMaskForVReg(Reg); - // If this is the first time we see a subregister def, initialize - // subranges by creating a copy of the main range. - if (!LI.hasSubRanges() && !LI.empty()) { - LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg); - LI.createSubRangeFrom(*Alloc, ClassMask, LI); - } - - LI.refineSubRanges(*Alloc, SubMask, - [&MO, this](LiveInterval::SubRange &SR) { - if (MO.isDef()) - createDeadDef(*Indexes, *Alloc, SR, MO); - }, - *Indexes, TRI); - } - - // Create the def in the main liverange. We do not have to do this if - // subranges are tracked as we recreate the main range later in this case. - if (MO.isDef() && !LI.hasSubRanges()) - createDeadDef(*Indexes, *Alloc, LI, MO); - } - - // We may have created empty live ranges for partially undefined uses, we - // can't keep them because we won't find defs in them later. - LI.removeEmptySubRanges(); - - // Step 2: Extend live segments to all uses, constructing SSA form as - // necessary. - if (LI.hasSubRanges()) { - for (LiveInterval::SubRange &S : LI.subranges()) { - LiveRangeCalc SubLRC; - SubLRC.reset(MF, Indexes, DomTree, Alloc); - SubLRC.extendToUses(S, Reg, S.LaneMask, &LI); - } - LI.clear(); - constructMainRangeFromSubranges(LI); - } else { - resetLiveOutMap(); - extendToUses(LI, Reg, LaneBitmask::getAll()); - } -} - -void LiveRangeCalc::constructMainRangeFromSubranges(LiveInterval &LI) { - // First create dead defs at all defs found in subranges. - LiveRange &MainRange = LI; - assert(MainRange.segments.empty() && MainRange.valnos.empty() && - "Expect empty main liverange"); - - for (const LiveInterval::SubRange &SR : LI.subranges()) { - for (const VNInfo *VNI : SR.valnos) { - if (!VNI->isUnused() && !VNI->isPHIDef()) - MainRange.createDeadDef(VNI->def, *Alloc); - } - } - resetLiveOutMap(); - extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI); -} - -void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { - assert(MRI && Indexes && "call reset() first"); - - // Visit all def operands. If the same instruction has multiple defs of Reg, - // LR.createDeadDef() will deduplicate. - for (MachineOperand &MO : MRI->def_operands(Reg)) - createDeadDef(*Indexes, *Alloc, LR, MO); -} - -void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask, - LiveInterval *LI) { - SmallVector<SlotIndex, 4> Undefs; - if (LI != nullptr) - LI->computeSubRangeUndefs(Undefs, Mask, *MRI, *Indexes); - - // Visit all operands that read Reg. This may include partial defs. - bool IsSubRange = !Mask.all(); - const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { - // Clear all kill flags. They will be reinserted after register allocation - // by LiveIntervals::addKillFlags(). - if (MO.isUse()) - MO.setIsKill(false); - // MO::readsReg returns "true" for subregister defs. This is for keeping - // liveness of the entire register (i.e. for the main range of the live - // interval). For subranges, definitions of non-overlapping subregisters - // do not count as uses. - if (!MO.readsReg() || (IsSubRange && MO.isDef())) - continue; - - unsigned SubReg = MO.getSubReg(); - if (SubReg != 0) { - LaneBitmask SLM = TRI.getSubRegIndexLaneMask(SubReg); - if (MO.isDef()) - SLM = ~SLM; - // Ignore uses not reading the current (sub)range. - if ((SLM & Mask).none()) - continue; - } - - // Determine the actual place of the use. - const MachineInstr *MI = MO.getParent(); - unsigned OpNo = (&MO - &MI->getOperand(0)); - SlotIndex UseIdx; - if (MI->isPHI()) { - assert(!MO.isDef() && "Cannot handle PHI def of partial register."); - // The actual place where a phi operand is used is the end of the pred - // MBB. PHI operands are paired: (Reg, PredMBB). - UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB()); - } else { - // Check for early-clobber redefs. - bool isEarlyClobber = false; - unsigned DefIdx; - if (MO.isDef()) - isEarlyClobber = MO.isEarlyClobber(); - else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) { - // FIXME: This would be a lot easier if tied early-clobber uses also - // had an early-clobber flag. - isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber(); - } - UseIdx = Indexes->getInstructionIndex(*MI).getRegSlot(isEarlyClobber); - } - - // MI is reading Reg. We may have visited MI before if it happens to be - // reading Reg multiple times. That is OK, extend() is idempotent. - extend(LR, UseIdx, Reg, Undefs); - } -} - void LiveRangeCalc::updateFromLiveIns() { LiveRangeUpdater Updater; for (const LiveInBlock &I : LiveIn) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp index 34bac082bcd7..9de77c19a23a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -30,7 +31,7 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, +LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg, bool createSubRanges) { Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) @@ -51,7 +52,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, return LI; } -unsigned LiveRangeEdit::createFrom(unsigned OldReg) { +Register LiveRangeEdit::createFrom(Register OldReg) { Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); @@ -69,7 +70,7 @@ unsigned LiveRangeEdit::createFrom(unsigned OldReg) { bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - AliasAnalysis *aa) { + AAResults *aa) { assert(DefMI && "Missing instruction"); ScannedRemattable = true; if (!TII.isTriviallyReMaterializable(*DefMI, aa)) @@ -78,7 +79,7 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, return true; } -void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { +void LiveRangeEdit::scanRemattable(AAResults *aa) { for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) continue; @@ -95,7 +96,7 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { ScannedRemattable = true; } -bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { +bool LiveRangeEdit::anyRematerializable(AAResults *aa) { if (!ScannedRemattable) scanRemattable(aa); return !Remattable.empty(); @@ -177,7 +178,7 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot(); } -void LiveRangeEdit::eraseVirtReg(unsigned Reg) { +void LiveRangeEdit::eraseVirtReg(Register Reg) { if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg)) LIS.removeInterval(Reg); } @@ -231,7 +232,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, return false; LLVM_DEBUG(dbgs() << " folded: " << *FoldMI); LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); - if (UseMI->isCall()) + // Update the call site info. + if (UseMI->shouldUpdateCallSiteInfo()) UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI); UseMI->eraseFromParent(); DefMI->addRegisterDead(LI->reg, nullptr); @@ -258,7 +260,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI, /// Find all live intervals that need to shrink, then remove the instruction. void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, - AliasAnalysis *AA) { + AAResults *AA) { assert(MI->allDefsAreDead() && "Def isn't really dead"); SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); @@ -381,7 +383,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // Erase any virtregs that are now empty and unused. There may be <undef> // uses around. Keep the empty live range in that case. for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { - unsigned Reg = RegsToErase[i]; + Register Reg = RegsToErase[i]; if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { ToShrink.remove(&LIS.getInterval(Reg)); eraseVirtReg(Reg); @@ -390,8 +392,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, } void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, - ArrayRef<unsigned> RegsBeingSpilled, - AliasAnalysis *AA) { + ArrayRef<Register> RegsBeingSpilled, + AAResults *AA) { ToShrinkSet ToShrink; for (;;) { @@ -450,8 +452,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, // Keep track of new virtual registers created via // MachineRegisterInfo::createVirtualRegister. void -LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg) -{ +LiveRangeEdit::MRI_NoteNewVirtualRegister(Register VReg) { if (VRM) VRM->grow(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp index 2ebc8d7576d1..26439a656917 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -234,8 +234,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock::iterator EndIter = std::next(MI.getIterator()); if (MI.getOperand(0).isReg()) for (; EndIter != MBB.end() && EndIter->isDebugValue() && - EndIter->getOperand(0).isReg() && - EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg(); + EndIter->getDebugOperandForReg(MI.getOperand(0).getReg()); ++EndIter, ++Next) IOM[&*EndIter] = NewOrder; MBB.splice(I, &MBB, MI.getIterator(), EndIter); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp index 9bd55c6f750f..6610491dd111 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp @@ -806,3 +806,31 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, VI.AliveBlocks.set(NumNew); } } + +/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All +/// variables that are live out of DomBB will be marked as passing live through +/// BB. LiveInSets[BB] is *not* updated (because it is not needed during +/// PHIElimination). +void LiveVariables::addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB, + MachineBasicBlock *SuccBB, + std::vector<SparseBitVector<>> &LiveInSets) { + const unsigned NumNew = BB->getNumber(); + + SparseBitVector<> &BV = LiveInSets[SuccBB->getNumber()]; + for (auto R = BV.begin(), E = BV.end(); R != E; R++) { + unsigned VirtReg = Register::index2VirtReg(*R); + LiveVariables::VarInfo &VI = getVarInfo(VirtReg); + VI.AliveBlocks.set(NumNew); + } + // All registers used by PHI nodes in SuccBB must be live through BB. + for (MachineBasicBlock::iterator BBI = SuccBB->begin(), + BBE = SuccBB->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + if (BBI->getOperand(i + 1).getMBB() == BB && + BBI->getOperand(i).readsReg()) + getVarInfo(BBI->getOperand(i).getReg()) + .AliveBlocks.set(NumNew); + } +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 5022726dc70a..6c5ef0255a08 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -79,11 +79,11 @@ namespace { using StackObjSet = SmallSetVector<int, 8>; void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset, - bool StackGrowsDown, unsigned &MaxAlign); + bool StackGrowsDown, Align &MaxAlign); void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs, MachineFrameInfo &MFI, bool StackGrowsDown, - int64_t &Offset, unsigned &MaxAlign); + int64_t &Offset, Align &MaxAlign); void calculateFrameObjectOffsets(MachineFunction &Fn); bool insertFrameReferenceRegisters(MachineFunction &Fn); @@ -140,22 +140,21 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { } /// AdjustStackOffset - Helper function used to adjust the stack frame offset. -void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, - int FrameIdx, int64_t &Offset, - bool StackGrowsDown, - unsigned &MaxAlign) { +void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, + int64_t &Offset, bool StackGrowsDown, + Align &MaxAlign) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) Offset += MFI.getObjectSize(FrameIdx); - unsigned Align = MFI.getObjectAlignment(FrameIdx); + Align Alignment = MFI.getObjectAlign(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); + MaxAlign = std::max(MaxAlign, Alignment); // Adjust to alignment boundary. - Offset = (Offset + Align - 1) / Align * Align; + Offset = alignTo(Offset, Alignment); int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; LLVM_DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " @@ -173,11 +172,10 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., /// those required to be close to the Stack Protector) to stack offsets. -void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs, - SmallSet<int, 16> &ProtectedObjs, - MachineFrameInfo &MFI, - bool StackGrowsDown, int64_t &Offset, - unsigned &MaxAlign) { +void LocalStackSlotPass::AssignProtectedObjSet( + const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, + Align &MaxAlign) { for (StackObjSet::const_iterator I = UnassignedObjs.begin(), E = UnassignedObjs.end(); I != E; ++I) { int i = *I; @@ -195,7 +193,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int64_t Offset = 0; - unsigned MaxAlign = 0; + Align MaxAlign; // Make sure that the stack protector comes before the local variables on the // stack. @@ -262,7 +260,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Remember how big this blob of stack space is MFI.setLocalFrameSize(Offset); - MFI.setLocalFrameMaxAlign(assumeAligned(MaxAlign)); + MFI.setLocalFrameMaxAlign(MaxAlign); } static inline bool diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp index 40dfa696a2b9..33752a1f9230 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp @@ -19,7 +19,7 @@ using namespace llvm; LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { if (auto VTy = dyn_cast<VectorType>(&Ty)) { - auto NumElements = VTy->getNumElements(); + auto NumElements = cast<FixedVectorType>(VTy)->getNumElements(); LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL); if (NumElements == 1) return ScalarTy; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp index 529d478756d4..36b863178b47 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -127,12 +128,7 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { return true; Type *GVType = GV->getValueType(); - unsigned GVAlignment = GV->getAlignment(); - if (!GVAlignment) { - // When LLVM IL declares a variable without alignment, use - // the ABI default alignment for the type. - GVAlignment = DL.getABITypeAlignment(GVType); - } + Align GVAlignment = DL.getValueOrABITypeAlignment(GV->getAlign(), GVType); // Define "__emutls_t.*" if there is InitValue GlobalVariable *EmuTlsTmplVar = nullptr; @@ -143,21 +139,20 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer"); EmuTlsTmplVar->setConstant(true); EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue)); - EmuTlsTmplVar->setAlignment(Align(GVAlignment)); + EmuTlsTmplVar->setAlignment(GVAlignment); copyLinkageVisibility(M, GV, EmuTlsTmplVar); } // Define "__emutls_v.*" with initializer and alignment. Constant *ElementValues[4] = { ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)), - ConstantInt::get(WordType, GVAlignment), - NullPtr, EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr - }; + ConstantInt::get(WordType, GVAlignment.value()), NullPtr, + EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr}; ArrayRef<Constant*> ElementValueArray(ElementValues, 4); EmuTlsVar->setInitializer( ConstantStruct::get(EmuTlsVarType, ElementValueArray)); - Align MaxAlignment(std::max(DL.getABITypeAlignment(WordType), - DL.getABITypeAlignment(VoidPtrType))); + Align MaxAlignment = + std::max(DL.getABITypeAlign(WordType), DL.getABITypeAlign(VoidPtrType)); EmuTlsVar->setAlignment(MaxAlignment); return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp new file mode 100644 index 000000000000..5110f75ebb42 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp @@ -0,0 +1,49 @@ +//===- MBFIWrapper.cpp - MachineBlockFrequencyInfo wrapper ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class keeps track of branch frequencies of newly created blocks and +// tail-merged blocks. Used by the TailDuplication and MachineBlockPlacement. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MBFIWrapper.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" + +using namespace llvm; + +BlockFrequency MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const { + auto I = MergedBBFreq.find(MBB); + + if (I != MergedBBFreq.end()) + return I->second; + + return MBFI.getBlockFreq(MBB); +} + +void MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, + BlockFrequency F) { + MergedBBFreq[MBB] = F; +} + +raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const { + return MBFI.printBlockFreq(OS, getBlockFreq(MBB)); +} + +raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS, + const BlockFrequency Freq) const { + return MBFI.printBlockFreq(OS, Freq); +} + +void MBFIWrapper::view(const Twine &Name, bool isSimple) { + MBFI.view(Name, isSimple); +} + +uint64_t MBFIWrapper::getEntryFreq() const { + return MBFI.getEntryFreq(); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 5ef907b88315..9eddb8626f60 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -107,7 +107,7 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions, II->print(OS); OS.flush(); - // Trim the assignment, or start from the begining in the case of a store. + // Trim the assignment, or start from the beginning in the case of a store. const size_t i = S.find("="); StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); } @@ -138,7 +138,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, bool Changed = false; - // Calculates the distance of MI from the begining of its parent BB. + // Calculates the distance of MI from the beginning of its parent BB. auto getInstrIdx = [](const MachineInstr &MI) { unsigned i = 0; for (auto &CurMI : *MI.getParent()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 5976f5da1569..98af46dc4872 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -11,12 +11,9 @@ //===----------------------------------------------------------------------===// #include "MILexer.h" -#include "llvm/ADT/APSInt.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include <algorithm> #include <cassert> @@ -104,6 +101,20 @@ static Cursor skipComment(Cursor C) { return C; } +/// Machine operands can have comments, enclosed between /* and */. +/// This eats up all tokens, including /* and */. +static Cursor skipMachineOperandComment(Cursor C) { + if (C.peek() != '/' || C.peek(1) != '*') + return C; + + while (C.peek() != '*' || C.peek(1) != '/') + C.advance(); + + C.advance(); + C.advance(); + return C; +} + /// Return true if the given character satisfies the following regular /// expression: [-a-zA-Z$._0-9] static bool isIdentifierChar(char C) { @@ -246,6 +257,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("liveout", MIToken::kw_liveout) .Case("address-taken", MIToken::kw_address_taken) .Case("landing-pad", MIToken::kw_landing_pad) + .Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry) .Case("liveins", MIToken::kw_liveins) .Case("successors", MIToken::kw_successors) .Case("floatpred", MIToken::kw_floatpred) @@ -254,6 +266,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker) + .Case("bbsections", MIToken::kw_bbsections) .Case("unknown-size", MIToken::kw_unknown_size) .Default(MIToken::Identifier); } @@ -518,7 +531,7 @@ static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, } static bool isValidHexFloatingPointPrefix(char C) { - return C == 'H' || C == 'K' || C == 'L' || C == 'M'; + return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R'; } static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { @@ -691,6 +704,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return C.remaining(); } + C = skipMachineOperandComment(C); + if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) return R.remaining(); if (Cursor R = maybeLexIdentifier(C, Token)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h index aaffe4a4c91b..ef16da94d21b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -15,7 +15,6 @@ #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include <string> @@ -114,6 +113,7 @@ struct MIToken { kw_liveout, kw_address_taken, kw_landing_pad, + kw_ehfunclet_entry, kw_liveins, kw_successors, kw_floatpred, @@ -122,6 +122,7 @@ struct MIToken { kw_pre_instr_symbol, kw_post_instr_symbol, kw_heap_alloc_marker, + kw_bbsections, kw_unknown_size, // Named metadata keywords diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 076ca943788b..ded31cd08fb5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -122,7 +122,7 @@ void PerTargetMIParsingState::initNames2Regs() { } bool PerTargetMIParsingState::getRegisterByName(StringRef RegName, - unsigned &Reg) { + Register &Reg) { initNames2Regs(); auto RegInfo = Names2Regs.find(RegName); if (RegInfo == Names2Regs.end()) @@ -321,7 +321,7 @@ PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF, : MF(MF), SM(&SM), IRSlots(IRSlots), Target(T) { } -VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) { +VRegInfo &PerFunctionMIParsingState::getVRegInfo(Register Num) { auto I = VRegInfos.insert(std::make_pair(Num, nullptr)); if (I.second) { MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -426,9 +426,9 @@ public: bool parseBasicBlocks(); bool parse(MachineInstr *&MI); bool parseStandaloneMBB(MachineBasicBlock *&MBB); - bool parseStandaloneNamedRegister(unsigned &Reg); + bool parseStandaloneNamedRegister(Register &Reg); bool parseStandaloneVirtualRegister(VRegInfo *&Info); - bool parseStandaloneRegister(unsigned &Reg); + bool parseStandaloneRegister(Register &Reg); bool parseStandaloneStackObject(int &FI); bool parseStandaloneMDNode(MDNode *&Node); @@ -439,10 +439,10 @@ public: bool parseBasicBlockLiveins(MachineBasicBlock &MBB); bool parseBasicBlockSuccessors(MachineBasicBlock &MBB); - bool parseNamedRegister(unsigned &Reg); + bool parseNamedRegister(Register &Reg); bool parseVirtualRegister(VRegInfo *&Info); bool parseNamedVirtualRegister(VRegInfo *&Info); - bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo); + bool parseRegister(Register &Reg, VRegInfo *&VRegInfo); bool parseRegisterFlag(unsigned &Flags); bool parseRegisterClassOrBank(VRegInfo &RegInfo); bool parseSubRegisterIndex(unsigned &SubReg); @@ -474,7 +474,7 @@ public: bool parseDILocation(MDNode *&Expr); bool parseMetadataOperand(MachineOperand &Dest); bool parseCFIOffset(int &Offset); - bool parseCFIRegister(unsigned &Reg); + bool parseCFIRegister(Register &Reg); bool parseCFIEscapeValues(std::string& Values); bool parseCFIOperand(MachineOperand &Dest); bool parseIRBlock(BasicBlock *&BB, const Function &F); @@ -495,6 +495,7 @@ public: bool parseOffset(int64_t &Offset); bool parseAlignment(unsigned &Alignment); bool parseAddrspace(unsigned &Addrspace); + bool parseSectionID(Optional<MBBSectionID> &SID); bool parseOperandsOffset(MachineOperand &Op); bool parseIRValue(const Value *&V); bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); @@ -562,7 +563,7 @@ MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, void MIParser::lex(unsigned SkipChar) { CurrentSource = lexMIToken( - CurrentSource.data() + SkipChar, Token, + CurrentSource.slice(SkipChar, StringRef::npos), Token, [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); } @@ -619,6 +620,28 @@ bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) { return true; } +// Parse Machine Basic Block Section ID. +bool MIParser::parseSectionID(Optional<MBBSectionID> &SID) { + assert(Token.is(MIToken::kw_bbsections)); + lex(); + if (Token.is(MIToken::IntegerLiteral)) { + unsigned Value = 0; + if (getUnsigned(Value)) + return error("Unknown Section ID"); + SID = MBBSectionID{Value}; + } else { + const StringRef &S = Token.stringValue(); + if (S == "Exception") + SID = MBBSectionID::ExceptionSectionID; + else if (S == "Cold") + SID = MBBSectionID::ColdSectionID; + else + return error("Unknown Section ID"); + } + lex(); + return false; +} + bool MIParser::parseBasicBlockDefinition( DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) { assert(Token.is(MIToken::MachineBasicBlockLabel)); @@ -630,6 +653,8 @@ bool MIParser::parseBasicBlockDefinition( lex(); bool HasAddressTaken = false; bool IsLandingPad = false; + bool IsEHFuncletEntry = false; + Optional<MBBSectionID> SectionID; unsigned Alignment = 0; BasicBlock *BB = nullptr; if (consumeIfPresent(MIToken::lparen)) { @@ -644,6 +669,10 @@ bool MIParser::parseBasicBlockDefinition( IsLandingPad = true; lex(); break; + case MIToken::kw_ehfunclet_entry: + IsEHFuncletEntry = true; + lex(); + break; case MIToken::kw_align: if (parseAlignment(Alignment)) return true; @@ -654,6 +683,10 @@ bool MIParser::parseBasicBlockDefinition( return true; lex(); break; + case MIToken::kw_bbsections: + if (parseSectionID(SectionID)) + return true; + break; default: break; } @@ -683,6 +716,11 @@ bool MIParser::parseBasicBlockDefinition( if (HasAddressTaken) MBB->setHasAddressTaken(); MBB->setIsEHPad(IsLandingPad); + MBB->setIsEHFuncletEntry(IsEHFuncletEntry); + if (SectionID.hasValue()) { + MBB->setSectionID(SectionID.getValue()); + MF.setBBSectionsType(BasicBlockSection::List); + } return false; } @@ -740,7 +778,7 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) { do { if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); - unsigned Reg = 0; + Register Reg; if (parseNamedRegister(Reg)) return true; lex(); @@ -750,10 +788,10 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) { if (Token.isNot(MIToken::IntegerLiteral) && Token.isNot(MIToken::HexLiteral)) return error("expected a lane mask"); - static_assert(sizeof(LaneBitmask::Type) == sizeof(unsigned), + static_assert(sizeof(LaneBitmask::Type) == sizeof(uint64_t), "Use correct get-function for lane mask"); LaneBitmask::Type V; - if (getUnsigned(V)) + if (getUint64(V)) return error("invalid lane mask value"); Mask = LaneBitmask(V); lex(); @@ -1048,7 +1086,7 @@ bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) { return false; } -bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) { +bool MIParser::parseStandaloneNamedRegister(Register &Reg) { lex(); if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); @@ -1072,7 +1110,7 @@ bool MIParser::parseStandaloneVirtualRegister(VRegInfo *&Info) { return false; } -bool MIParser::parseStandaloneRegister(unsigned &Reg) { +bool MIParser::parseStandaloneRegister(Register &Reg) { lex(); if (Token.isNot(MIToken::NamedRegister) && Token.isNot(MIToken::VirtualRegister)) @@ -1123,7 +1161,7 @@ static const char *printImplicitRegisterFlag(const MachineOperand &MO) { } static std::string getRegisterName(const TargetRegisterInfo *TRI, - unsigned Reg) { + Register Reg) { assert(Register::isPhysicalRegister(Reg) && "expected phys reg"); return StringRef(TRI->getName(Reg)).lower(); } @@ -1223,7 +1261,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { return false; } -bool MIParser::parseNamedRegister(unsigned &Reg) { +bool MIParser::parseNamedRegister(Register &Reg) { assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token"); StringRef Name = Token.stringValue(); if (PFS.Target.getRegisterByName(Name, Reg)) @@ -1251,7 +1289,7 @@ bool MIParser::parseVirtualRegister(VRegInfo *&Info) { return false; } -bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) { +bool MIParser::parseRegister(Register &Reg, VRegInfo *&Info) { switch (Token.kind()) { case MIToken::underscore: Reg = 0; @@ -1445,7 +1483,7 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, } if (!Token.isRegister()) return error("expected a register after register flags"); - unsigned Reg; + Register Reg; VRegInfo *RegInfo; if (parseRegister(Reg, RegInfo)) return true; @@ -2138,10 +2176,10 @@ bool MIParser::parseCFIOffset(int &Offset) { return false; } -bool MIParser::parseCFIRegister(unsigned &Reg) { +bool MIParser::parseCFIRegister(Register &Reg) { if (Token.isNot(MIToken::NamedRegister)) return error("expected a cfi register"); - unsigned LLVMReg; + Register LLVMReg; if (parseNamedRegister(LLVMReg)) return true; const auto *TRI = MF.getSubtarget().getRegisterInfo(); @@ -2173,7 +2211,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { auto Kind = Token.kind(); lex(); int Offset; - unsigned Reg; + Register Reg; unsigned CFIIndex; switch (Kind) { case MIToken::kw_cfi_same_value: @@ -2204,9 +2242,8 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { case MIToken::kw_cfi_def_cfa_offset: if (parseCFIOffset(Offset)) return true; - // NB: MCCFIInstruction::createDefCfaOffset negates the offset. - CFIIndex = MF.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, -Offset)); + CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset)); break; case MIToken::kw_cfi_adjust_cfa_offset: if (parseCFIOffset(Offset)) @@ -2218,9 +2255,8 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || parseCFIOffset(Offset)) return true; - // NB: MCCFIInstruction::createDefCfa negates the offset. CFIIndex = - MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); + MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, Offset)); break; case MIToken::kw_cfi_remember_state: CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); @@ -2239,7 +2275,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { CFIIndex = MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, Reg)); break; case MIToken::kw_cfi_register: { - unsigned Reg2; + Register Reg2; if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || parseCFIRegister(Reg2)) return true; @@ -2334,7 +2370,7 @@ bool MIParser::parseIntrinsicOperand(MachineOperand &Dest) { if (Token.isNot(MIToken::NamedGlobalValue)) return error("expected syntax intrinsic(@llvm.whatever)"); - std::string Name = Token.stringValue(); + std::string Name = std::string(Token.stringValue()); lex(); if (expectAndConsume(MIToken::rparen)) @@ -2469,7 +2505,7 @@ bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) { while (true) { if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); - unsigned Reg; + Register Reg; if (parseNamedRegister(Reg)) return true; lex(); @@ -2495,7 +2531,7 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { while (true) { if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); - unsigned Reg; + Register Reg; if (parseNamedRegister(Reg)) return true; lex(); @@ -3060,8 +3096,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { } if (expectAndConsume(MIToken::rparen)) return true; - Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range, - SSID, Order, FailureOrder); + Dest = MF.getMachineMemOperand(Ptr, Flags, Size, Align(BaseAlignment), AAInfo, + Range, SSID, Order, FailureOrder); return false; } @@ -3149,7 +3185,7 @@ MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) { bool MIParser::parseStringConstant(std::string &Result) { if (Token.isNot(MIToken::StringConstant)) return error("expected string constant"); - Result = Token.stringValue(); + Result = std::string(Token.stringValue()); lex(); return false; } @@ -3172,13 +3208,13 @@ bool llvm::parseMBBReference(PerFunctionMIParsingState &PFS, } bool llvm::parseRegisterReference(PerFunctionMIParsingState &PFS, - unsigned &Reg, StringRef Src, + Register &Reg, StringRef Src, SMDiagnostic &Error) { return MIParser(PFS, Error, Src).parseStandaloneRegister(Reg); } bool llvm::parseNamedRegisterReference(PerFunctionMIParsingState &PFS, - unsigned &Reg, StringRef Src, + Register &Reg, StringRef Src, SMDiagnostic &Error) { return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 10157c746b46..2e0b0e745e9e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -93,7 +93,8 @@ public: /// file. /// /// Return null if an error occurred. - std::unique_ptr<Module> parseIRModule(); + std::unique_ptr<Module> + parseIRModule(DataLayoutCallbackTy DataLayoutCallback); /// Create an empty function with the given name. Function *createDummyFunction(StringRef Name, Module &M); @@ -216,13 +217,17 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) { Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag)); } -std::unique_ptr<Module> MIRParserImpl::parseIRModule() { +std::unique_ptr<Module> +MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) { if (!In.setCurrentDocument()) { if (In.error()) return nullptr; // Create an empty module when the MIR file is empty. NoMIRDocuments = true; - return std::make_unique<Module>(Filename, Context); + auto M = std::make_unique<Module>(Filename, Context); + if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple())) + M->setDataLayout(*LayoutOverride); + return M; } std::unique_ptr<Module> M; @@ -232,7 +237,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() { dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) { SMDiagnostic Error; M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error, - Context, &IRSlots, /*UpgradeDebugInfo=*/false); + Context, &IRSlots, DataLayoutCallback); if (!M) { reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange())); return nullptr; @@ -243,6 +248,8 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() { } else { // Create an new, empty module. M = std::make_unique<Module>(Filename, Context); + if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple())) + M->setDataLayout(*LayoutOverride); NoLLVMIR = true; } return M; @@ -375,17 +382,17 @@ bool MIRParserImpl::initializeCallSiteInfo( " is not a call instruction"); MachineFunction::CallSiteInfo CSInfo; for (auto ArgRegPair : YamlCSInfo.ArgForwardingRegs) { - unsigned Reg = 0; + Register Reg; if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error)) return error(Error, ArgRegPair.Reg.SourceRange); CSInfo.emplace_back(Reg, ArgRegPair.ArgNo); } - if (TM.Options.EnableDebugEntryValues) + if (TM.Options.EmitCallSiteInfo) MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo)); } - if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues) + if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo) return error(Twine("Call site info provided but not used")); return false; } @@ -401,8 +408,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, Target.reset(new PerTargetMIParsingState(MF.getSubtarget())); } - if (YamlMF.Alignment) - MF.setAlignment(Align(YamlMF.Alignment)); + MF.setAlignment(YamlMF.Alignment.valueOrOne()); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasWinCFI(YamlMF.HasWinCFI); @@ -438,6 +444,14 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); return true; } + // Check Basic Block Section Flags. + if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) { + MF.createBBLabels(); + MF.setBBSectionsType(BasicBlockSection::Labels); + } else if (MF.hasBBSections()) { + MF.createBBLabels(); + MF.assignBeginEndSections(); + } PFS.SM = &SM; // Initialize the frame information after creating all the MBBs so that the @@ -550,10 +564,10 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS, // Parse the liveins. for (const auto &LiveIn : YamlMF.LiveIns) { - unsigned Reg = 0; + Register Reg; if (parseNamedRegisterReference(PFS, Reg, LiveIn.Register.Value, Error)) return error(Error, LiveIn.Register.SourceRange); - unsigned VReg = 0; + Register VReg; if (!LiveIn.VirtualRegister.Value.empty()) { VRegInfo *Info; if (parseVirtualRegisterReference(PFS, Info, LiveIn.VirtualRegister.Value, @@ -569,7 +583,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS, if (YamlMF.CalleeSavedRegisters) { SmallVector<MCPhysReg, 16> CalleeSavedRegisters; for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) { - unsigned Reg = 0; + Register Reg; if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error)) return error(Error, RegSource.SourceRange); CalleeSavedRegisters.push_back(Reg); @@ -587,7 +601,7 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, bool Error = false; // Create VRegs auto populateVRegInfo = [&] (const VRegInfo &Info, Twine Name) { - unsigned Reg = Info.VReg; + Register Reg = Info.VReg; switch (Info.Kind) { case VRegInfo::UNKNOWN: error(Twine("Cannot determine class/bank of virtual register ") + @@ -646,7 +660,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, MFI.setStackSize(YamlMFI.StackSize); MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment); if (YamlMFI.MaxAlignment) - MFI.ensureMaxAlignment(YamlMFI.MaxAlignment); + MFI.ensureMaxAlignment(Align(YamlMFI.MaxAlignment)); MFI.setAdjustsStack(YamlMFI.AdjustsStack); MFI.setHasCalls(YamlMFI.HasCalls); if (YamlMFI.MaxCallFrameSize != ~0u) @@ -683,7 +697,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, return error(Object.ID.SourceRange.Start, Twine("StackID is not supported by target")); MFI.setStackID(ObjectIdx, Object.StackID); - MFI.setObjectAlignment(ObjectIdx, Object.Alignment); + MFI.setObjectAlignment(ObjectIdx, Object.Alignment.valueOrOne()); if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) .second) @@ -715,10 +729,11 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, return error(Object.ID.SourceRange.Start, Twine("StackID is not supported by target")); if (Object.Type == yaml::MachineStackObject::VariableSized) - ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca); + ObjectIdx = + MFI.CreateVariableSizedObject(Object.Alignment.valueOrOne(), Alloca); else ObjectIdx = MFI.CreateStackObject( - Object.Size, Object.Alignment, + Object.Size, Object.Alignment.valueOrOne(), Object.Type == yaml::MachineStackObject::SpillSlot, Alloca, Object.StackID); MFI.setObjectOffset(ObjectIdx, Object.Offset); @@ -757,7 +772,7 @@ bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, const yaml::StringValue &RegisterSource, bool IsRestored, int FrameIdx) { if (RegisterSource.Value.empty()) return false; - unsigned Reg = 0; + Register Reg; SMDiagnostic Error; if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error)) return error(Error, RegisterSource.SourceRange); @@ -830,10 +845,9 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS, parseConstantValue(YamlConstant.Value.Value, Error, M)); if (!Value) return error(Error, YamlConstant.Value.SourceRange); - unsigned Alignment = - YamlConstant.Alignment - ? YamlConstant.Alignment - : M.getDataLayout().getPrefTypeAlignment(Value->getType()); + const Align PrefTypeAlign = + M.getDataLayout().getPrefTypeAlign(Value->getType()); + const Align Alignment = YamlConstant.Alignment.getValueOr(PrefTypeAlign); unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment); if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index)) .second) @@ -926,8 +940,9 @@ MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl) MIRParser::~MIRParser() {} -std::unique_ptr<Module> MIRParser::parseIRModule() { - return Impl->parseIRModule(); +std::unique_ptr<Module> +MIRParser::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) { + return Impl->parseIRModule(DataLayoutCallback); } bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp index e8cd3d60ccb1..fa23df6288e9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp @@ -79,6 +79,9 @@ static cl::opt<bool> SimplifyMIR( "simplify-mir", cl::Hidden, cl::desc("Leave out unnecessary information when printing MIR")); +static cl::opt<bool> PrintLocations("mir-debug-loc", cl::Hidden, cl::init(true), + cl::desc("Print MIR debug-locations")); + namespace { /// This structure describes how to print out stack object references. @@ -162,8 +165,9 @@ public: void print(const MachineInstr &MI); void printStackObjectReference(int FrameIndex); void print(const MachineInstr &MI, unsigned OpIdx, - const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies, - LLT TypeToPrint, bool PrintDef = true); + const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, + bool ShouldPrintRegisterTies, LLT TypeToPrint, + bool PrintDef = true); }; } // end namespace llvm @@ -197,7 +201,7 @@ void MIRPrinter::print(const MachineFunction &MF) { yaml::MachineFunction YamlMF; YamlMF.Name = MF.getName(); - YamlMF.Alignment = MF.getAlignment().value(); + YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasWinCFI = MF.hasWinCFI(); @@ -333,7 +337,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.HasPatchPoint = MFI.hasPatchPoint(); YamlMFI.StackSize = MFI.getStackSize(); YamlMFI.OffsetAdjustment = MFI.getOffsetAdjustment(); - YamlMFI.MaxAlignment = MFI.getMaxAlignment(); + YamlMFI.MaxAlignment = MFI.getMaxAlign().value(); YamlMFI.AdjustsStack = MFI.adjustsStack(); YamlMFI.HasCalls = MFI.hasCalls(); YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed() @@ -372,7 +376,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, : yaml::FixedMachineStackObject::DefaultType; YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); - YamlObject.Alignment = MFI.getObjectAlignment(I); + YamlObject.Alignment = MFI.getObjectAlign(I); YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I); YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); @@ -390,8 +394,8 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, yaml::MachineStackObject YamlObject; YamlObject.ID = ID; if (const auto *Alloca = MFI.getObjectAllocation(I)) - YamlObject.Name.Value = - Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>"; + YamlObject.Name.Value = std::string( + Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>"); YamlObject.Type = MFI.isSpillSlotObjectIndex(I) ? yaml::MachineStackObject::SpillSlot : MFI.isVariableSizedObjectIndex(I) @@ -399,7 +403,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, : yaml::MachineStackObject::DefaultType; YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); - YamlObject.Alignment = MFI.getObjectAlignment(I); + YamlObject.Alignment = MFI.getObjectAlign(I); YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I); YMF.StackObjects.push_back(YamlObject); @@ -513,7 +517,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, yaml::MachineConstantPoolValue YamlConstant; YamlConstant.ID = ID++; YamlConstant.Value = StrOS.str(); - YamlConstant.Alignment = Constant.getAlignment(); + YamlConstant.Alignment = Constant.getAlign(); YamlConstant.IsTargetSpecific = Constant.isMachineConstantPoolEntry(); MF.Constants.push_back(YamlConstant); @@ -629,11 +633,31 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { OS << "landing-pad"; HasAttributes = true; } - if (MBB.getAlignment() != Align::None()) { + if (MBB.isEHFuncletEntry()) { + OS << (HasAttributes ? ", " : " ("); + OS << "ehfunclet-entry"; + HasAttributes = true; + } + if (MBB.getAlignment() != Align(1)) { OS << (HasAttributes ? ", " : " ("); OS << "align " << MBB.getAlignment().value(); HasAttributes = true; } + if (MBB.getSectionID() != MBBSectionID(0)) { + OS << (HasAttributes ? ", " : " ("); + OS << "bbsections "; + switch (MBB.getSectionID().Type) { + case MBBSectionID::SectionType::Exception: + OS << "Exception"; + break; + case MBBSectionID::SectionType::Cold: + OS << "Cold"; + break; + default: + OS << MBB.getSectionID().Number; + } + HasAttributes = true; + } if (HasAttributes) OS << ")"; OS << ":\n"; @@ -721,7 +745,7 @@ void MIPrinter::print(const MachineInstr &MI) { ++I) { if (I) OS << ", "; - print(MI, I, TRI, ShouldPrintRegisterTies, + print(MI, I, TRI, TII, ShouldPrintRegisterTies, MI.getTypeToPrint(I, PrintedTypes, MRI), /*PrintDef=*/false); } @@ -754,6 +778,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "exact "; if (MI.getFlag(MachineInstr::NoFPExcept)) OS << "nofpexcept "; + if (MI.getFlag(MachineInstr::NoMerge)) + OS << "nomerge "; OS << TII->getName(MI.getOpcode()); if (I < E) @@ -763,7 +789,7 @@ void MIPrinter::print(const MachineInstr &MI) { for (; I < E; ++I) { if (NeedComma) OS << ", "; - print(MI, I, TRI, ShouldPrintRegisterTies, + print(MI, I, TRI, TII, ShouldPrintRegisterTies, MI.getTypeToPrint(I, PrintedTypes, MRI)); NeedComma = true; } @@ -792,11 +818,13 @@ void MIPrinter::print(const MachineInstr &MI) { NeedComma = true; } - if (const DebugLoc &DL = MI.getDebugLoc()) { - if (NeedComma) - OS << ','; - OS << " debug-location "; - DL->printAsOperand(OS, MST); + if (PrintLocations) { + if (const DebugLoc &DL = MI.getDebugLoc()) { + if (NeedComma) + OS << ','; + OS << " debug-location "; + DL->printAsOperand(OS, MST); + } } if (!MI.memoperands_empty()) { @@ -822,11 +850,20 @@ void MIPrinter::printStackObjectReference(int FrameIndex) { Operand.Name); } +static std::string formatOperandComment(std::string Comment) { + if (Comment.empty()) + return Comment; + return std::string(" /* " + Comment + " */"); +} + void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, bool ShouldPrintRegisterTies, LLT TypeToPrint, bool PrintDef) { const MachineOperand &Op = MI.getOperand(OpIdx); + std::string MOComment = TII->createMIROperandComment(MI, Op, OpIdx, TRI); + switch (Op.getType()) { case MachineOperand::MO_Immediate: if (MI.isOperandSubregIdx(OpIdx)) { @@ -858,6 +895,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo(); Op.print(OS, MST, TypeToPrint, OpIdx, PrintDef, /*IsStandalone=*/false, ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII); + OS << formatOperandComment(MOComment); break; } case MachineOperand::MO_FrameIndex: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index fcc40b26c527..54441301d65b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "MIRVRegNamerUtils.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -69,6 +71,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { case MachineOperand::MO_TargetIndex: return MO.getOffset() | (MO.getTargetFlags() << 16); case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: return llvm::hash_value(MO); // We could explicitly handle all the types of the MachineOperand, @@ -79,8 +83,6 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { // TODO: Handle the following Index/ID/Predicate cases. They can // be hashed on in a stable manner. - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_CFIIndex: case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: @@ -112,7 +114,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { MIOperands.push_back((unsigned)Op->getOrdering()); MIOperands.push_back((unsigned)Op->getAddrSpace()); MIOperands.push_back((unsigned)Op->getSyncScopeID()); - MIOperands.push_back((unsigned)Op->getBaseAlignment()); + MIOperands.push_back((unsigned)Op->getBaseAlign().value()); MIOperands.push_back((unsigned)Op->getFailureOrdering()); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h index 0c0a71a13248..a059bc5333c6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h @@ -17,15 +17,18 @@ #ifndef LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H #define LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/Register.h" +#include <map> +#include <vector> +#include <string> namespace llvm { + +class MachineBasicBlock; +class MachineInstr; +class MachineRegisterInfo; +class StringRef; + /// VRegRenamer - This class is used for renaming vregs in a machine basic /// block according to semantics of the instruction. class VRegRenamer { @@ -71,6 +74,7 @@ class VRegRenamer { /// Create a vreg with name and return it. unsigned createVirtualRegisterWithLowerName(unsigned VReg, StringRef Name); + /// Linearly traverse the MachineBasicBlock and rename each instruction's /// vreg definition based on the semantics of the instruction. /// Names are as follows bb<BBNum>_hash_[0-9]+ diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp index f433c4b6c90b..2d4b60435d96 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -61,12 +61,42 @@ MCSymbol *MachineBasicBlock::getSymbol() const { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); + assert(getNumber() >= 0 && "cannot get label for unreachable MBB"); - CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + - Twine(MF->getFunctionNumber()) + - "_" + Twine(getNumber())); - } + // We emit a non-temporary symbol for every basic block if we have BBLabels + // or -- with basic block sections -- when a basic block begins a section. + // With basic block symbols, we use a unary encoding which can + // compress the symbol names significantly. For basic block sections where + // this block is the first in a cluster, we use a non-temp descriptive name. + // Otherwise we fall back to use temp label. + if (MF->hasBBLabels()) { + auto Iter = MF->getBBSectionsSymbolPrefix().begin(); + if (getNumber() < 0 || + getNumber() >= (int)MF->getBBSectionsSymbolPrefix().size()) + report_fatal_error("Unreachable MBB: " + Twine(getNumber())); + // The basic blocks for function foo are named a.BB.foo, aa.BB.foo, and + // so on. + std::string Prefix(Iter + 1, Iter + getNumber() + 1); + std::reverse(Prefix.begin(), Prefix.end()); + CachedMCSymbol = + Ctx.getOrCreateSymbol(Twine(Prefix) + ".BB." + Twine(MF->getName())); + } else if (MF->hasBBSections() && isBeginSection()) { + SmallString<5> Suffix; + if (SectionID == MBBSectionID::ColdSectionID) { + Suffix += ".cold"; + } else if (SectionID == MBBSectionID::ExceptionSectionID) { + Suffix += ".eh"; + } else { + Suffix += "." + std::to_string(SectionID.Number); + } + CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix); + } else { + CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + + Twine(MF->getFunctionNumber()) + + "_" + Twine(getNumber())); + } + } return CachedMCSymbol; } @@ -247,8 +277,16 @@ LLVM_DUMP_METHOD void MachineBasicBlock::dump() const { } #endif +bool MachineBasicBlock::mayHaveInlineAsmBr() const { + for (const MachineBasicBlock *Succ : successors()) { + if (Succ->isInlineAsmBrIndirectTarget()) + return true; + } + return false; +} + bool MachineBasicBlock::isLegalToHoistInto() const { - if (isReturnBlock() || hasEHPadSuccessor()) + if (isReturnBlock() || hasEHPadSuccessor() || mayHaveInlineAsmBr()) return false; return true; } @@ -326,7 +364,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "landing-pad"; HasAttributes = true; } - if (getAlignment() != Align::None()) { + if (getAlignment() != Align(1)) { OS << (HasAttributes ? ", " : " ("); OS << "align " << Log2(getAlignment()); HasAttributes = true; @@ -479,7 +517,7 @@ void MachineBasicBlock::sortUniqueLiveIns() { LiveInVector::const_iterator J; LiveInVector::iterator Out = LiveIns.begin(); for (; I != LiveIns.end(); ++Out, I = J) { - unsigned PhysReg = I->PhysReg; + MCRegister PhysReg = I->PhysReg; LaneBitmask LaneMask = I->LaneMask; for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J) LaneMask |= J->LaneMask; @@ -489,7 +527,7 @@ void MachineBasicBlock::sortUniqueLiveIns() { LiveIns.erase(Out, LiveIns.end()); } -unsigned +Register MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) { assert(getParent() && "MBB must be inserted in function"); assert(PhysReg.isPhysical() && "Expected physreg"); @@ -529,7 +567,11 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { getParent()->splice(++NewBefore->getIterator(), getIterator()); } -void MachineBasicBlock::updateTerminator() { +void MachineBasicBlock::updateTerminator( + MachineBasicBlock *PreviousLayoutSuccessor) { + LLVM_DEBUG(dbgs() << "Updating terminators on " << printMBBReference(*this) + << "\n"); + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); // A block with no successors has no concerns with fall-through edges. if (this->succ_empty()) @@ -548,25 +590,21 @@ void MachineBasicBlock::updateTerminator() { if (isLayoutSuccessor(TBB)) TII->removeBranch(*this); } else { - // The block has an unconditional fallthrough. If its successor is not its - // layout successor, insert a branch. First we have to locate the only - // non-landing-pad successor, as that is the fallthrough block. - for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { - if ((*SI)->isEHPad()) - continue; - assert(!TBB && "Found more than one non-landing-pad successor!"); - TBB = *SI; - } - - // If there is no non-landing-pad successor, the block has no fall-through - // edges to be concerned with. - if (!TBB) + // The block has an unconditional fallthrough, or the end of the block is + // unreachable. + + // Unfortunately, whether the end of the block is unreachable is not + // immediately obvious; we must fall back to checking the successor list, + // and assuming that if the passed in block is in the succesor list and + // not an EHPad, it must be the intended target. + if (!PreviousLayoutSuccessor || !isSuccessor(PreviousLayoutSuccessor) || + PreviousLayoutSuccessor->isEHPad()) return; - // Finally update the unconditional successor to be reached via a branch - // if it would not be reached by fallthrough. - if (!isLayoutSuccessor(TBB)) - TII->insertBranch(*this, TBB, nullptr, Cond, DL); + // If the unconditional successor block is not the current layout + // successor, insert a branch to jump to it. + if (!isLayoutSuccessor(PreviousLayoutSuccessor)) + TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL); } return; } @@ -587,38 +625,20 @@ void MachineBasicBlock::updateTerminator() { return; } - // Walk through the successors and find the successor which is not a landing - // pad and is not the conditional branch destination (in TBB) as the - // fallthrough successor. - MachineBasicBlock *FallthroughBB = nullptr; - for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { - if ((*SI)->isEHPad() || *SI == TBB) - continue; - assert(!FallthroughBB && "Found more than one fallthrough successor."); - FallthroughBB = *SI; - } - - if (!FallthroughBB) { - if (canFallThrough()) { - // We fallthrough to the same basic block as the conditional jump targets. - // Remove the conditional jump, leaving unconditional fallthrough. - // FIXME: This does not seem like a reasonable pattern to support, but it - // has been seen in the wild coming out of degenerate ARM test cases. - TII->removeBranch(*this); - - // Finally update the unconditional successor to be reached via a branch if - // it would not be reached by fallthrough. - if (!isLayoutSuccessor(TBB)) - TII->insertBranch(*this, TBB, nullptr, Cond, DL); - return; - } + // We now know we're going to fallthrough to PreviousLayoutSuccessor. + assert(PreviousLayoutSuccessor); + assert(!PreviousLayoutSuccessor->isEHPad()); + assert(isSuccessor(PreviousLayoutSuccessor)); - // We enter here iff exactly one successor is TBB which cannot fallthrough - // and the rest successors if any are EHPads. In this case, we need to - // change the conditional branch into unconditional branch. + if (PreviousLayoutSuccessor == TBB) { + // We had a fallthrough to the same basic block as the conditional jump + // targets. Remove the conditional jump, leaving an unconditional + // fallthrough or an unconditional jump. TII->removeBranch(*this); - Cond.clear(); - TII->insertBranch(*this, TBB, nullptr, Cond, DL); + if (!isLayoutSuccessor(TBB)) { + Cond.clear(); + TII->insertBranch(*this, TBB, nullptr, Cond, DL); + } return; } @@ -627,14 +647,14 @@ void MachineBasicBlock::updateTerminator() { if (TII->reverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL); + TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL); return; } TII->removeBranch(*this); - TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL); - } else if (!isLayoutSuccessor(FallthroughBB)) { + TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL); + } else if (!isLayoutSuccessor(PreviousLayoutSuccessor)) { TII->removeBranch(*this); - TII->insertBranch(*this, TBB, FallthroughBB, Cond, DL); + TII->insertBranch(*this, TBB, PreviousLayoutSuccessor, Cond, DL); } } @@ -871,12 +891,14 @@ bool MachineBasicBlock::canFallThrough() { return getFallThrough() != nullptr; } -MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, - Pass &P) { +MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( + MachineBasicBlock *Succ, Pass &P, + std::vector<SparseBitVector<>> *LiveInSets) { if (!canSplitCriticalEdge(Succ)) return nullptr; MachineFunction *MF = getParent(); + MachineBasicBlock *PrevFallthrough = getNextNode(); DebugLoc DL; // FIXME: this is nowhere MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); @@ -898,7 +920,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, LiveVariables *LV = P.getAnalysisIfAvailable<LiveVariables>(); // Collect a list of virtual registers killed by the terminators. - SmallVector<unsigned, 4> KilledRegs; + SmallVector<Register, 4> KilledRegs; if (LV) for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { @@ -918,7 +940,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, } } - SmallVector<unsigned, 4> UsedRegs; + SmallVector<Register, 4> UsedRegs; if (LIS) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { @@ -947,7 +969,11 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Terminators.push_back(&*I); } - updateTerminator(); + // Since we replaced all uses of Succ with NMBB, that should also be treated + // as the fallthrough successor + if (Succ == PrevFallthrough) + PrevFallthrough = NMBB; + updateTerminator(PrevFallthrough); if (Indexes) { SmallVector<MachineInstr*, 4> NewTerminators; @@ -992,7 +1018,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { - unsigned Reg = KilledRegs.pop_back_val(); + Register Reg = KilledRegs.pop_back_val(); for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false)) continue; @@ -1003,7 +1029,10 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, } } // Update relevant live-through information. - LV->addNewBlock(NMBB, this, Succ); + if (LiveInSets != nullptr) + LV->addNewBlock(NMBB, this, Succ, *LiveInSets); + else + LV->addNewBlock(NMBB, this, Succ); } if (LIS) { @@ -1022,7 +1051,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); // Find the registers used from NMBB in PHIs in Succ. - SmallSet<unsigned, 8> PHISrcRegs; + SmallSet<Register, 8> PHISrcRegs; for (MachineBasicBlock::instr_iterator I = Succ->instr_begin(), E = Succ->instr_end(); I != E && I->isPHI(); ++I) { @@ -1045,7 +1074,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, MachineRegisterInfo *MRI = &getParent()->getRegInfo(); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) continue; @@ -1109,15 +1138,19 @@ bool MachineBasicBlock::canSplitCriticalEdge( if (Succ->isEHPad()) return false; - const MachineFunction *MF = getParent(); + // Splitting the critical edge to a callbr's indirect block isn't advised. + // Don't do it in this generic function. + if (Succ->isInlineAsmBrIndirectTarget()) + return false; + const MachineFunction *MF = getParent(); // Performance might be harmed on HW that implements branching using exec mask // where both sides of the branches are always executed. if (MF->getTarget().requiresStructuredCFG()) return false; // We may need to update this's terminator, but we can't do that if - // AnalyzeBranch fails. If this uses a jump table, we won't touch it. + // analyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; @@ -1223,68 +1256,6 @@ void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old, } } -/// Various pieces of code can cause excess edges in the CFG to be inserted. If -/// we have proven that MBB can only branch to DestA and DestB, remove any other -/// MBB successors from the CFG. DestA and DestB can be null. -/// -/// Besides DestA and DestB, retain other edges leading to LandingPads -/// (currently there can be only one; we don't check or require that here). -/// Note it is possible that DestA and/or DestB are LandingPads. -bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, - MachineBasicBlock *DestB, - bool IsCond) { - // The values of DestA and DestB frequently come from a call to the - // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial - // values from there. - // - // 1. If both DestA and DestB are null, then the block ends with no branches - // (it falls through to its successor). - // 2. If DestA is set, DestB is null, and IsCond is false, then the block ends - // with only an unconditional branch. - // 3. If DestA is set, DestB is null, and IsCond is true, then the block ends - // with a conditional branch that falls through to a successor (DestB). - // 4. If DestA and DestB is set and IsCond is true, then the block ends with a - // conditional branch followed by an unconditional branch. DestA is the - // 'true' destination and DestB is the 'false' destination. - - bool Changed = false; - - MachineBasicBlock *FallThru = getNextNode(); - - if (!DestA && !DestB) { - // Block falls through to successor. - DestA = FallThru; - DestB = FallThru; - } else if (DestA && !DestB) { - if (IsCond) - // Block ends in conditional jump that falls through to successor. - DestB = FallThru; - } else { - assert(DestA && DestB && IsCond && - "CFG in a bad state. Cannot correct CFG edges"); - } - - // Remove superfluous edges. I.e., those which aren't destinations of this - // basic block, duplicate edges, or landing pads. - SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs; - MachineBasicBlock::succ_iterator SI = succ_begin(); - while (SI != succ_end()) { - const MachineBasicBlock *MBB = *SI; - if (!SeenMBBs.insert(MBB).second || - (MBB != DestA && MBB != DestB && !MBB->isEHPad())) { - // This is a superfluous edge, remove it. - SI = removeSuccessor(SI); - Changed = true; - } else { - ++SI; - } - } - - if (Changed) - normalizeSuccProbs(); - return Changed; -} - /// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE /// instructions. Return UnknownLoc if there is none. DebugLoc @@ -1300,8 +1271,8 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { /// instructions. Return UnknownLoc if there is none. DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) { if (MBBI == instr_begin()) return {}; - // Skip debug declarations, we don't want a DebugLoc from them. - MBBI = skipDebugInstructionsBackward(std::prev(MBBI), instr_begin()); + // Skip debug instructions, we don't want a DebugLoc from them. + MBBI = prev_nodbg(MBBI, instr_begin()); if (!MBBI->isDebugInstr()) return MBBI->getDebugLoc(); return {}; } @@ -1383,7 +1354,7 @@ MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) { /// instructions after (searching just for defs) MI. MachineBasicBlock::LivenessQueryResult MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, - unsigned Reg, const_iterator Before, + MCRegister Reg, const_iterator Before, unsigned Neighborhood) const { unsigned N = Neighborhood; @@ -1503,3 +1474,7 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const { "Liveness information is accurate"); return LiveIns.begin(); } + +const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold); +const MBBSectionID + MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index d8ea3e0b9cf6..1168b01a835f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -247,6 +247,12 @@ MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) { return MBFI->isIrrLoopHeader(MBB); } +void MachineBlockFrequencyInfo::setBlockFreq(const MachineBasicBlock *MBB, + uint64_t Freq) { + assert(MBFI && "Expected analysis to be available"); + MBFI->setBlockFreq(MBB, Freq); +} + const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { return MBFI ? MBFI->getFunction() : nullptr; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 30b98ec88c24..783d22fafee9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -346,7 +346,7 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBranchProbabilityInfo *MBPI; /// A handle to the function-wide block frequency pass. - std::unique_ptr<BranchFolder::MBFIWrapper> MBFI; + std::unique_ptr<MBFIWrapper> MBFI; /// A handle to the loop info. MachineLoopInfo *MLI; @@ -374,6 +374,9 @@ class MachineBlockPlacement : public MachineFunctionPass { /// must be done inline. TailDuplicator TailDup; + /// Partial tail duplication threshold. + BlockFrequency DupThreshold; + /// Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily while processing the loop structure of @@ -399,6 +402,10 @@ class MachineBlockPlacement : public MachineFunctionPass { SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits; #endif + /// Scale the DupThreshold according to basic block size. + BlockFrequency scaleThreshold(MachineBasicBlock *BB); + void initDupThreshold(); + /// Decrease the UnscheduledPredecessors count for all blocks in chain, and /// if the count goes to 0, add them to the appropriate work list. void markChainSuccessors( @@ -421,6 +428,11 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBasicBlock *BB, const MachineBasicBlock *Succ, const BlockChain &Chain, const BlockFilterSet *BlockFilter, BranchProbability SuccProb, BranchProbability HotProb); + bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred, + BlockFilterSet *BlockFilter); + void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates, + MachineBasicBlock *BB, + BlockFilterSet *BlockFilter); bool repeatedlyTailDuplicateBlock( MachineBasicBlock *BB, MachineBasicBlock *&LPred, const MachineBasicBlock *LoopHeaderBB, @@ -1141,6 +1153,11 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( if (NumDup == 0) return false; + // If profile information is available, findDuplicateCandidates can do more + // precise benefit analysis. + if (F->getFunction().hasProfileData()) + return true; + // This is mainly for function exit BB. // The integrated tail duplication is really designed for increasing // fallthrough from predecessors from Succ to its successors. We may need @@ -1169,9 +1186,6 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( // // A small number of extra duplication may not hurt too much. We need a better // heuristic to handle it. - // - // FIXME: we should selectively tail duplicate a BB into part of its - // predecessors. if ((NumDup > Succ->succ_size()) || !Duplicate) return false; @@ -1556,7 +1570,7 @@ MachineBlockPlacement::selectBestSuccessor( // For blocks with CFG violations, we may be able to lay them out anyway with // tail-duplication. We keep this vector so we can perform the probability // calculations the minimum number of times. - SmallVector<std::tuple<BranchProbability, MachineBasicBlock *>, 4> + SmallVector<std::pair<BranchProbability, MachineBasicBlock *>, 4> DupCandidates; for (MachineBasicBlock *Succ : Successors) { auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); @@ -1570,7 +1584,7 @@ MachineBlockPlacement::selectBestSuccessor( Chain, BlockFilter)) { // If tail duplication would make Succ profitable, place it. if (allowTailDupPlacement() && shouldTailDuplicate(Succ)) - DupCandidates.push_back(std::make_tuple(SuccProb, Succ)); + DupCandidates.emplace_back(SuccProb, Succ); continue; } @@ -1799,11 +1813,11 @@ void MachineBlockPlacement::buildChain( // Placement may have changed tail duplication opportunities. // Check for that now. if (allowTailDupPlacement() && BestSucc && ShouldTailDup) { - // If the chosen successor was duplicated into all its predecessors, - // don't bother laying it out, just go round the loop again with BB as - // the chain end. - if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain, - BlockFilter, PrevUnplacedBlockIt)) + repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain, + BlockFilter, PrevUnplacedBlockIt); + // If the chosen successor was duplicated into BB, don't bother laying + // it out, just go round the loop again with BB as the chain end. + if (!BB->isSuccessor(BestSucc)) continue; } @@ -2082,8 +2096,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. bool OptForSize = F->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(L.getHeader(), PSI, - &MBFI->getMBFI()); + llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get()); if (OptForSize) return L.getHeader(); @@ -2616,7 +2629,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { void MachineBlockPlacement::buildCFGChains() { // Ensure that every BB in the function has an associated chain to simplify // the assumptions of the remaining algorithm. - SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. + SmallVector<MachineOperand, 4> Cond; // For analyzeBranch. for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) { MachineBasicBlock *BB = &*FI; @@ -2626,7 +2639,7 @@ void MachineBlockPlacement::buildCFGChains() { // the exact fallthrough behavior for. while (true) { Cond.clear(); - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; @@ -2690,6 +2703,20 @@ void MachineBlockPlacement::buildCFGChains() { assert(!BadFunc && "Detected problems with the block placement."); }); + // Remember original layout ordering, so we can update terminators after + // reordering to point to the original layout successor. + SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors( + F->getNumBlockIDs()); + { + MachineBasicBlock *LastMBB = nullptr; + for (auto &MBB : *F) { + if (LastMBB != nullptr) + OriginalLayoutSuccessors[LastMBB->getNumber()] = &MBB; + LastMBB = &MBB; + } + OriginalLayoutSuccessors[F->back().getNumber()] = nullptr; + } + // Splice the blocks into place. MachineFunction::iterator InsertPos = F->begin(); LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n"); @@ -2711,7 +2738,7 @@ void MachineBlockPlacement::buildCFGChains() { // than assert when the branch cannot be analyzed in order to remove this // boiler plate. Cond.clear(); - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. #ifndef NDEBUG if (!BlocksWithUnanalyzableExits.count(PrevBB)) { @@ -2747,15 +2774,18 @@ void MachineBlockPlacement::buildCFGChains() { // TBB = FBB = nullptr; // } // } - if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) - PrevBB->updateTerminator(); + if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) { + PrevBB->updateTerminator(OriginalLayoutSuccessors[PrevBB->getNumber()]); + } } // Fixup the last block. Cond.clear(); - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. - if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond)) - F->back().updateTerminator(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. + if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond)) { + MachineBasicBlock *PrevBB = &F->back(); + PrevBB->updateTerminator(OriginalLayoutSuccessors[PrevBB->getNumber()]); + } BlockWorkList.clear(); EHPadWorkList.clear(); @@ -2763,17 +2793,17 @@ void MachineBlockPlacement::buildCFGChains() { void MachineBlockPlacement::optimizeBranches() { BlockChain &FunctionChain = *BlockToChain[&F->front()]; - SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. + SmallVector<MachineOperand, 4> Cond; // For analyzeBranch. // Now that all the basic blocks in the chain have the proper layout, - // make a final call to AnalyzeBranch with AllowModify set. + // make a final call to analyzeBranch with AllowModify set. // Indeed, the target may be able to optimize the branches in a way we // cannot because all branches may not be analyzable. // E.g., the target may be able to remove an unconditional branch to // a fallthrough when it occurs after predicated terminators. for (MachineBasicBlock *ChainBB : FunctionChain) { Cond.clear(); - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) { // If PrevBB has a two-way branch, try to re-order the branches // such that we branch to the successor with higher probability first. @@ -2789,7 +2819,6 @@ void MachineBlockPlacement::optimizeBranches() { DebugLoc dl; // FIXME: this is nowhere TII->removeBranch(*ChainBB); TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl); - ChainBB->updateTerminator(); } } } @@ -2841,7 +2870,7 @@ void MachineBlockPlacement::alignBlocks() { continue; // If the global profiles indicates so, don't align it. - if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) && + if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) && !TLI->alignLoopsWithOptSize()) continue; @@ -2901,10 +2930,7 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( // duplicated into is still small enough to be duplicated again. // No need to call markBlockSuccessors in this case, as the blocks being // duplicated from here on are already scheduled. - // Note that DuplicatedToLPred always implies Removed. - while (DuplicatedToLPred) { - assert(Removed && "Block must have been removed to be duplicated into its " - "layout predecessor."); + while (DuplicatedToLPred && Removed) { MachineBasicBlock *DupBB, *DupPred; // The removal callback causes Chain.end() to be updated when a block is // removed. On the first pass through the loop, the chain end should be the @@ -2943,8 +2969,7 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( /// chosen in the given order due to unnatural CFG /// only needed if \p BB is removed and /// \p PrevUnplacedBlockIt pointed to \p BB. -/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will -/// only be true if the block was removed. +/// \p DuplicatedToLPred - True if the block was duplicated into LPred. /// \return - True if the block was duplicated into all preds and removed. bool MachineBlockPlacement::maybeTailDuplicateBlock( MachineBasicBlock *BB, MachineBasicBlock *LPred, @@ -3012,8 +3037,18 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( SmallVector<MachineBasicBlock *, 8> DuplicatedPreds; bool IsSimple = TailDup.isSimpleBB(BB); - TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred, - &DuplicatedPreds, &RemovalCallbackRef); + SmallVector<MachineBasicBlock *, 8> CandidatePreds; + SmallVectorImpl<MachineBasicBlock *> *CandidatePtr = nullptr; + if (F->getFunction().hasProfileData()) { + // We can do partial duplication with precise profile information. + findDuplicateCandidates(CandidatePreds, BB, BlockFilter); + if (CandidatePreds.size() == 0) + return false; + if (CandidatePreds.size() < BB->pred_size()) + CandidatePtr = &CandidatePreds; + } + TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred, &DuplicatedPreds, + &RemovalCallbackRef, CandidatePtr); // Update UnscheduledPredecessors to reflect tail-duplication. DuplicatedToLPred = false; @@ -3036,6 +3071,191 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( return Removed; } +// Count the number of actual machine instructions. +static uint64_t countMBBInstruction(MachineBasicBlock *MBB) { + uint64_t InstrCount = 0; + for (MachineInstr &MI : *MBB) { + if (!MI.isPHI() && !MI.isMetaInstruction()) + InstrCount += 1; + } + return InstrCount; +} + +// The size cost of duplication is the instruction size of the duplicated block. +// So we should scale the threshold accordingly. But the instruction size is not +// available on all targets, so we use the number of instructions instead. +BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) { + return DupThreshold.getFrequency() * countMBBInstruction(BB); +} + +// Returns true if BB is Pred's best successor. +bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB, + MachineBasicBlock *Pred, + BlockFilterSet *BlockFilter) { + if (BB == Pred) + return false; + if (BlockFilter && !BlockFilter->count(Pred)) + return false; + BlockChain *PredChain = BlockToChain[Pred]; + if (PredChain && (Pred != *std::prev(PredChain->end()))) + return false; + + // Find the successor with largest probability excluding BB. + BranchProbability BestProb = BranchProbability::getZero(); + for (MachineBasicBlock *Succ : Pred->successors()) + if (Succ != BB) { + if (BlockFilter && !BlockFilter->count(Succ)) + continue; + BlockChain *SuccChain = BlockToChain[Succ]; + if (SuccChain && (Succ != *SuccChain->begin())) + continue; + BranchProbability SuccProb = MBPI->getEdgeProbability(Pred, Succ); + if (SuccProb > BestProb) + BestProb = SuccProb; + } + + BranchProbability BBProb = MBPI->getEdgeProbability(Pred, BB); + if (BBProb <= BestProb) + return false; + + // Compute the number of reduced taken branches if Pred falls through to BB + // instead of another successor. Then compare it with threshold. + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + BlockFrequency Gain = PredFreq * (BBProb - BestProb); + return Gain > scaleThreshold(BB); +} + +// Find out the predecessors of BB and BB can be beneficially duplicated into +// them. +void MachineBlockPlacement::findDuplicateCandidates( + SmallVectorImpl<MachineBasicBlock *> &Candidates, + MachineBasicBlock *BB, + BlockFilterSet *BlockFilter) { + MachineBasicBlock *Fallthrough = nullptr; + BranchProbability DefaultBranchProb = BranchProbability::getZero(); + BlockFrequency BBDupThreshold(scaleThreshold(BB)); + SmallVector<MachineBasicBlock *, 8> Preds(BB->pred_begin(), BB->pred_end()); + SmallVector<MachineBasicBlock *, 8> Succs(BB->succ_begin(), BB->succ_end()); + + // Sort for highest frequency. + auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) { + return MBPI->getEdgeProbability(BB, A) > MBPI->getEdgeProbability(BB, B); + }; + auto CmpPred = [&](MachineBasicBlock *A, MachineBasicBlock *B) { + return MBFI->getBlockFreq(A) > MBFI->getBlockFreq(B); + }; + llvm::stable_sort(Succs, CmpSucc); + llvm::stable_sort(Preds, CmpPred); + + auto SuccIt = Succs.begin(); + if (SuccIt != Succs.end()) { + DefaultBranchProb = MBPI->getEdgeProbability(BB, *SuccIt).getCompl(); + } + + // For each predecessors of BB, compute the benefit of duplicating BB, + // if it is larger than the threshold, add it into Candidates. + // + // If we have following control flow. + // + // PB1 PB2 PB3 PB4 + // \ | / /\ + // \ | / / \ + // \ |/ / \ + // BB----/ OB + // /\ + // / \ + // SB1 SB2 + // + // And it can be partially duplicated as + // + // PB2+BB + // | PB1 PB3 PB4 + // | | / /\ + // | | / / \ + // | |/ / \ + // | BB----/ OB + // |\ /| + // | X | + // |/ \| + // SB2 SB1 + // + // The benefit of duplicating into a predecessor is defined as + // Orig_taken_branch - Duplicated_taken_branch + // + // The Orig_taken_branch is computed with the assumption that predecessor + // jumps to BB and the most possible successor is laid out after BB. + // + // The Duplicated_taken_branch is computed with the assumption that BB is + // duplicated into PB, and one successor is layout after it (SB1 for PB1 and + // SB2 for PB2 in our case). If there is no available successor, the combined + // block jumps to all BB's successor, like PB3 in this example. + // + // If a predecessor has multiple successors, so BB can't be duplicated into + // it. But it can beneficially fall through to BB, and duplicate BB into other + // predecessors. + for (MachineBasicBlock *Pred : Preds) { + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + + if (!TailDup.canTailDuplicate(BB, Pred)) { + // BB can't be duplicated into Pred, but it is possible to be layout + // below Pred. + if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) { + Fallthrough = Pred; + if (SuccIt != Succs.end()) + SuccIt++; + } + continue; + } + + BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb; + BlockFrequency DupCost; + if (SuccIt == Succs.end()) { + // Jump to all successors; + if (Succs.size() > 0) + DupCost += PredFreq; + } else { + // Fallthrough to *SuccIt, jump to all other successors; + DupCost += PredFreq; + DupCost -= PredFreq * MBPI->getEdgeProbability(BB, *SuccIt); + } + + assert(OrigCost >= DupCost); + OrigCost -= DupCost; + if (OrigCost > BBDupThreshold) { + Candidates.push_back(Pred); + if (SuccIt != Succs.end()) + SuccIt++; + } + } + + // No predecessors can optimally fallthrough to BB. + // So we can change one duplication into fallthrough. + if (!Fallthrough) { + if ((Candidates.size() < Preds.size()) && (Candidates.size() > 0)) { + Candidates[0] = Candidates.back(); + Candidates.pop_back(); + } + } +} + +void MachineBlockPlacement::initDupThreshold() { + DupThreshold = 0; + if (!F->getFunction().hasProfileData()) + return; + + BlockFrequency MaxFreq = 0; + for (MachineBasicBlock &MBB : *F) { + BlockFrequency Freq = MBFI->getBlockFreq(&MBB); + if (Freq > MaxFreq) + MaxFreq = Freq; + } + + // FIXME: we may use profile count instead of frequency, + // and need more fine tuning. + BranchProbability ThresholdProb(TailDupPlacementPenalty, 100); + DupThreshold = MaxFreq * ThresholdProb; +} + bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -3046,7 +3266,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { F = &MF; MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - MBFI = std::make_unique<BranchFolder::MBFIWrapper>( + MBFI = std::make_unique<MBFIWrapper>( getAnalysis<MachineBlockFrequencyInfo>()); MLI = &getAnalysis<MachineLoopInfo>(); TII = MF.getSubtarget().getInstrInfo(); @@ -3054,6 +3274,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { MPDT = nullptr; PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + initDupThreshold(); + // Initialize PreferredLoopExit to nullptr here since it may never be set if // there are no MachineLoops. PreferredLoopExit = nullptr; @@ -3088,7 +3310,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (OptForSize) TailDupSize = 1; bool PreRegAlloc = false; - TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI, + TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI, /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); } @@ -3107,9 +3329,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, PSI, TailMergeSize); - auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); - if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), - MMIWP ? &MMIWP->getMMI() : nullptr, MLI, + if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI, /*AfterPlacement=*/true)) { // Redo the layout if tail merging creates/removes/moves blocks. BlockToChain.clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp index 9561a06ce8df..09531276bc10 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp @@ -747,9 +747,8 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { do { Node = WorkList.pop_back_val(); Scopes.push_back(Node); - const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); - OpenChildren[Node] = Children.size(); - for (MachineDomTreeNode *Child : Children) + OpenChildren[Node] = Node->getNumChildren(); + for (MachineDomTreeNode *Child : Node->children()) WorkList.push_back(Child); } while (!WorkList.empty()); @@ -831,6 +830,13 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, continue; MachineInstr &NewMI = TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI); + + // When hoisting, make sure we don't carry the debug location of + // the original instruction, as that's not correct and can cause + // unexpected jumps when debugging optimized code. + auto EmptyDL = DebugLoc(); + NewMI.setDebugLoc(EmptyDL); + NewMI.getOperand(0).setReg(NewReg); PREMap[MI] = CMBB; @@ -855,8 +861,7 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { BBs.push_back(DT->getRootNode()); do { auto Node = BBs.pop_back_val(); - const std::vector<MachineDomTreeNode *> &Children = Node->getChildren(); - for (MachineDomTreeNode *Child : Children) + for (MachineDomTreeNode *Child : Node->children()) BBs.push_back(Child); MachineBasicBlock *MBB = Node->getBlock(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp index 73895bdf834f..f241435a0482 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp @@ -269,6 +269,8 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { case MachineCombinerPattern::REASSOC_AX_YB: case MachineCombinerPattern::REASSOC_XA_BY: case MachineCombinerPattern::REASSOC_XA_YB: + case MachineCombinerPattern::REASSOC_XY_AMM_BMM: + case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: return CombinerObjective::MustReduceDepth; default: return CombinerObjective::Default; @@ -406,12 +408,14 @@ bool MachineCombiner::preservesResourceLen( << ResLenBeforeCombine << " and after: " << ResLenAfterCombine << "\n";); LLVM_DEBUG( - ResLenAfterCombine <= ResLenBeforeCombine + ResLenAfterCombine <= + ResLenBeforeCombine + TII->getExtendResourceLenLimit() ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n" : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource " "Length\n"); - return ResLenAfterCombine <= ResLenBeforeCombine; + return ResLenAfterCombine <= + ResLenBeforeCombine + TII->getExtendResourceLenLimit(); } /// \returns true when new instruction sequence should be generated diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp index c316b167059b..70d6dcc2e3e2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -51,6 +51,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" @@ -113,7 +114,8 @@ public: // Since Reg might be a subreg of some registers, only invalidate Reg is not // enough. We have to find the COPY defines Reg or registers defined by Reg // and invalidate all of them. - DenseSet<unsigned> RegsToInvalidate{Reg}; + SmallSet<unsigned, 8> RegsToInvalidate; + RegsToInvalidate.insert(Reg); for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { auto I = Copies.find(*RUI); if (I != Copies.end()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp new file mode 100644 index 000000000000..bf57ec0e8c28 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp @@ -0,0 +1,172 @@ +//===- MachineDebugify.cpp - Attach synthetic debug info to everything ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This pass attaches synthetic debug info to everything. It can be used +/// to create targeted tests for debug info preservation, or test for CodeGen +/// differences with vs. without debug info. +/// +/// This isn't intended to have feature parity with Debugify. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Debugify.h" + +#define DEBUG_TYPE "mir-debugify" + +using namespace llvm; + +namespace { +bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, + DIBuilder &DIB, Function &F) { + MachineFunction *MaybeMF = MMI.getMachineFunction(F); + if (!MaybeMF) + return false; + MachineFunction &MF = *MaybeMF; + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + + DISubprogram *SP = F.getSubprogram(); + assert(SP && "IR Debugify just created it?"); + + Module &M = *F.getParent(); + LLVMContext &Ctx = M.getContext(); + + unsigned NextLine = SP->getLine(); + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + // This will likely emit line numbers beyond the end of the imagined + // source function and into subsequent ones. We don't do anything about + // that as it doesn't really matter to the compiler where the line is in + // the imaginary source code. + MI.setDebugLoc(DILocation::get(Ctx, NextLine++, 1, SP)); + } + } + + // Find local variables defined by debugify. No attempt is made to match up + // MIR-level regs to the 'correct' IR-level variables: there isn't a simple + // way to do that, and it isn't necessary to find interesting CodeGen bugs. + // Instead, simply keep track of one variable per line. Later, we can insert + // DBG_VALUE insts that point to these local variables. Emitting DBG_VALUEs + // which cover a wide range of lines can help stress the debug info passes: + // if we can't do that, fall back to using the local variable which precedes + // all the others. + Function *DbgValF = M.getFunction("llvm.dbg.value"); + DbgValueInst *EarliestDVI = nullptr; + DenseMap<unsigned, DILocalVariable *> Line2Var; + DIExpression *Expr = nullptr; + if (DbgValF) { + for (const Use &U : DbgValF->uses()) { + auto *DVI = dyn_cast<DbgValueInst>(U.getUser()); + if (!DVI || DVI->getFunction() != &F) + continue; + unsigned Line = DVI->getDebugLoc().getLine(); + assert(Line != 0 && "debugify should not insert line 0 locations"); + Line2Var[Line] = DVI->getVariable(); + if (!EarliestDVI || Line < EarliestDVI->getDebugLoc().getLine()) + EarliestDVI = DVI; + Expr = DVI->getExpression(); + } + } + if (Line2Var.empty()) + return true; + + // Now, try to insert a DBG_VALUE instruction after each real instruction. + // Do this by introducing debug uses of each register definition. If that is + // not possible (e.g. we have a phi or a meta instruction), emit a constant. + uint64_t NextImm = 0; + const MCInstrDesc &DbgValDesc = TII.get(TargetOpcode::DBG_VALUE); + for (MachineBasicBlock &MBB : MF) { + MachineBasicBlock::iterator FirstNonPHIIt = MBB.getFirstNonPHI(); + for (auto I = MBB.begin(), E = MBB.end(); I != E; ) { + MachineInstr &MI = *I; + ++I; + + // `I` may point to a DBG_VALUE created in the previous loop iteration. + if (MI.isDebugInstr()) + continue; + + // It's not allowed to insert DBG_VALUEs after a terminator. + if (MI.isTerminator()) + continue; + + // Find a suitable insertion point for the DBG_VALUE. + auto InsertBeforeIt = MI.isPHI() ? FirstNonPHIIt : I; + + // Find a suitable local variable for the DBG_VALUE. + unsigned Line = MI.getDebugLoc().getLine(); + if (!Line2Var.count(Line)) + Line = EarliestDVI->getDebugLoc().getLine(); + DILocalVariable *LocalVar = Line2Var[Line]; + assert(LocalVar && "No variable for current line?"); + + // Emit DBG_VALUEs for register definitions. + SmallVector<MachineOperand *, 4> RegDefs; + for (MachineOperand &MO : MI.operands()) + if (MO.isReg() && MO.isDef() && MO.getReg()) + RegDefs.push_back(&MO); + for (MachineOperand *MO : RegDefs) + BuildMI(MBB, InsertBeforeIt, MI.getDebugLoc(), DbgValDesc, + /*IsIndirect=*/false, *MO, LocalVar, Expr); + + // OK, failing that, emit a constant DBG_VALUE. + if (RegDefs.empty()) { + auto ImmOp = MachineOperand::CreateImm(NextImm++); + BuildMI(MBB, InsertBeforeIt, MI.getDebugLoc(), DbgValDesc, + /*IsIndirect=*/false, ImmOp, LocalVar, Expr); + } + } + } + + return true; +} + +/// ModulePass for attaching synthetic debug info to everything, used with the +/// legacy module pass manager. +struct DebugifyMachineModule : public ModulePass { + bool runOnModule(Module &M) override { + MachineModuleInfo &MMI = + getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); + return applyDebugifyMetadata( + M, M.functions(), + "ModuleDebugify: ", [&](DIBuilder &DIB, Function &F) -> bool { + return applyDebugifyMetadataToMachineFunction(MMI, DIB, F); + }); + } + + DebugifyMachineModule() : ModulePass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addPreserved<MachineModuleInfoWrapperPass>(); + AU.setPreservesCFG(); + } + + static char ID; // Pass identification. +}; +char DebugifyMachineModule::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(DebugifyMachineModule, DEBUG_TYPE, + "Machine Debugify Module", false, false) +INITIALIZE_PASS_END(DebugifyMachineModule, DEBUG_TYPE, + "Machine Debugify Module", false, false) + +ModulePass *llvm::createDebugifyMachineModulePass() { + return new DebugifyMachineModule(); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp index 22ab2c7a6d77..7ba27ff1c856 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -41,8 +41,9 @@ static inline Align clampStackAlignment(bool ShouldClamp, Align Alignment, Align StackAlignment) { if (!ShouldClamp || Alignment <= StackAlignment) return Alignment; - LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Alignment.value() - << " exceeds the stack alignment " << StackAlignment.value() + LLVM_DEBUG(dbgs() << "Warning: requested alignment " << DebugStr(Alignment) + << " exceeds the stack alignment " + << DebugStr(StackAlignment) << " when stack realignment is off" << '\n'); return StackAlignment; } @@ -89,7 +90,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // stack needs realignment, we can't assume that the stack will in fact be // aligned. Align Alignment = - commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset); + commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, @@ -102,7 +103,7 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable) { Align Alignment = - commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset); + commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, @@ -136,7 +137,7 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - unsigned MaxAlign = getMaxAlignment(); + Align MaxAlign = getMaxAlign(); int64_t Offset = 0; // This code is very, very similar to PEI::calculateFrameObjectOffsets(). @@ -155,11 +156,11 @@ uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { if (isDeadObjectIndex(i) || getStackID(i) != TargetStackID::Default) continue; Offset += getObjectSize(i); - unsigned Align = getObjectAlignment(i); + Align Alignment = getObjectAlign(i); // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; + Offset = alignTo(Offset, Alignment); - MaxAlign = std::max(Align, MaxAlign); + MaxAlign = std::max(Alignment, MaxAlign); } if (adjustsStack() && TFI->hasReservedCallFrame(MF)) @@ -170,20 +171,17 @@ uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. - unsigned StackAlign; + Align StackAlign; if (adjustsStack() || hasVarSizedObjects() || (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); + StackAlign = TFI->getStackAlign(); else - StackAlign = TFI->getTransientStackAlignment(); + StackAlign = TFI->getTransientStackAlign(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (uint64_t)Offset; + return alignTo(Offset, StackAlign); } void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp index 4612690644fe..6d45f08804ed 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -71,6 +72,7 @@ #include <cstdint> #include <iterator> #include <string> +#include <type_traits> #include <utility> #include <vector> @@ -96,6 +98,7 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) { case P::RegBankSelected: return "RegBankSelected"; case P::Selected: return "Selected"; case P::TracksLiveness: return "TracksLiveness"; + case P::TiedOpsRewritten: return "TiedOpsRewritten"; } llvm_unreachable("Invalid machine function property"); } @@ -128,11 +131,10 @@ static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, const Function &F) { if (F.hasFnAttribute(Attribute::StackAlignment)) return F.getFnStackAlignment(); - return STI->getFrameLowering()->getStackAlignment(); + return STI->getFrameLowering()->getStackAlign().value(); } -MachineFunction::MachineFunction(const Function &F, - const LLVMTargetMachine &Target, +MachineFunction::MachineFunction(Function &F, const LLVMTargetMachine &Target, const TargetSubtargetInfo &STI, unsigned FunctionNum, MachineModuleInfo &mmi) : F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) { @@ -170,7 +172,7 @@ void MachineFunction::init() { F.hasFnAttribute(Attribute::StackAlignment)); if (F.hasFnAttribute(Attribute::StackAlignment)) - FrameInfo->ensureMaxAlignment(F.getFnStackAlignment()); + FrameInfo->ensureMaxAlignment(*F.getFnStackAlign()); ConstantPool = new (Allocator) MachineConstantPool(getDataLayout()); Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); @@ -271,18 +273,20 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { } DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const { + if (&FPType == &APFloat::IEEEsingle()) { + Attribute Attr = F.getFnAttribute("denormal-fp-math-f32"); + StringRef Val = Attr.getValueAsString(); + if (!Val.empty()) + return parseDenormalFPAttribute(Val); + + // If the f32 variant of the attribute isn't specified, try to use the + // generic one. + } + // TODO: Should probably avoid the connection to the IR and store directly // in the MachineFunction. Attribute Attr = F.getFnAttribute("denormal-fp-math"); - - // FIXME: This should assume IEEE behavior on an unspecified - // attribute. However, the one current user incorrectly assumes a non-IEEE - // target by default. - StringRef Val = Attr.getValueAsString(); - if (Val.empty()) - return DenormalMode::Invalid; - - return parseDenormalFPAttribute(Val); + return parseDenormalFPAttribute(Attr.getValueAsString()); } /// Should we be emitting segmented stack stuff for the function @@ -337,6 +341,49 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBNumbering.resize(BlockNo); } +/// This is used with -fbasic-block-sections or -fbasicblock-labels option. +/// A unary encoding of basic block labels is done to keep ".strtab" sizes +/// small. +void MachineFunction::createBBLabels() { + const TargetInstrInfo *TII = getSubtarget().getInstrInfo(); + this->BBSectionsSymbolPrefix.resize(getNumBlockIDs(), 'a'); + for (auto MBBI = begin(), E = end(); MBBI != E; ++MBBI) { + assert( + (MBBI->getNumber() >= 0 && MBBI->getNumber() < (int)getNumBlockIDs()) && + "BasicBlock number was out of range!"); + // 'a' - Normal block. + // 'r' - Return block. + // 'l' - Landing Pad. + // 'L' - Return and landing pad. + bool isEHPad = MBBI->isEHPad(); + bool isRetBlock = MBBI->isReturnBlock() && !TII->isTailCall(MBBI->back()); + char type = 'a'; + if (isEHPad && isRetBlock) + type = 'L'; + else if (isEHPad) + type = 'l'; + else if (isRetBlock) + type = 'r'; + BBSectionsSymbolPrefix[MBBI->getNumber()] = type; + } +} + +/// This method iterates over the basic blocks and assigns their IsBeginSection +/// and IsEndSection fields. This must be called after MBB layout is finalized +/// and the SectionID's are assigned to MBBs. +void MachineFunction::assignBeginEndSections() { + front().setIsBeginSection(); + auto CurrentSectionID = front().getSectionID(); + for (auto MBBI = std::next(begin()), E = end(); MBBI != E; ++MBBI) { + if (MBBI->getSectionID() == CurrentSectionID) + continue; + MBBI->setIsBeginSection(); + std::prev(MBBI)->setIsEndSection(); + CurrentSectionID = MBBI->getSectionID(); + } + back().setIsEndSection(); +} + /// Allocate a new MachineInstr. Use this instead of `new MachineInstr'. MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, @@ -370,6 +417,11 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB, break; ++I; } + // Copy over call site info to the cloned instruction if needed. If Orig is in + // a bundle, copyCallSiteInfo takes care of finding the call instruction in + // the bundle. + if (Orig.shouldUpdateCallSiteInfo()) + copyCallSiteInfo(&Orig, FirstClone); return *FirstClone; } @@ -383,7 +435,7 @@ MachineFunction::DeleteMachineInstr(MachineInstr *MI) { // be triggered during the implementation of support for the // call site info of a new architecture. If the assertion is triggered, // back trace will tell where to insert a call to updateCallSiteInfo(). - assert((!MI->isCall(MachineInstr::IgnoreBundle) || + assert((!MI->isCandidateForCallSiteEntry() || CallSitesInfo.find(MI) == CallSitesInfo.end()) && "Call site info was not updated!"); // Strip it for parts. The operand array and the MI object itself are @@ -414,7 +466,7 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand *MachineFunction::getMachineMemOperand( MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, - unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, + Align base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) { return new (Allocator) @@ -429,13 +481,13 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, // If there is no pointer value, the offset isn't tracked so we need to adjust // the base alignment. - unsigned Align = PtrInfo.V.isNull() - ? MinAlign(MMO->getBaseAlignment(), Offset) - : MMO->getBaseAlignment(); + Align Alignment = PtrInfo.V.isNull() + ? commonAlignment(MMO->getBaseAlign(), Offset) + : MMO->getBaseAlign(); return new (Allocator) MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size, - Align, AAMDNodes(), nullptr, MMO->getSyncScopeID(), + Alignment, AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } @@ -446,18 +498,17 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachinePointerInfo(MMO->getValue(), MMO->getOffset()) : MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()); - return new (Allocator) - MachineMemOperand(MPI, MMO->getFlags(), MMO->getSize(), - MMO->getBaseAlignment(), AAInfo, - MMO->getRanges(), MMO->getSyncScopeID(), - MMO->getOrdering(), MMO->getFailureOrdering()); + return new (Allocator) MachineMemOperand( + MPI, MMO->getFlags(), MMO->getSize(), MMO->getBaseAlign(), AAInfo, + MMO->getRanges(), MMO->getSyncScopeID(), MMO->getOrdering(), + MMO->getFailureOrdering()); } MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachineMemOperand::Flags Flags) { return new (Allocator) MachineMemOperand( - MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlignment(), + MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlign(), MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } @@ -608,10 +659,10 @@ void MachineFunction::viewCFGOnly() const /// Add the specified physical register as a live-in value and /// create a corresponding virtual register for it. -unsigned MachineFunction::addLiveIn(unsigned PReg, +Register MachineFunction::addLiveIn(MCRegister PReg, const TargetRegisterClass *RC) { MachineRegisterInfo &MRI = getRegInfo(); - unsigned VReg = MRI.getLiveInVirtReg(PReg); + Register VReg = MRI.getLiveInVirtReg(PReg); if (VReg) { const TargetRegisterClass *VRegRC = MRI.getRegClass(VReg); (void)VRegRC; @@ -853,28 +904,34 @@ try_next:; MachineFunction::CallSiteInfoMap::iterator MachineFunction::getCallSiteInfo(const MachineInstr *MI) { - assert(MI->isCall() && "Call site info refers only to call instructions!"); + assert(MI->isCandidateForCallSiteEntry() && + "Call site info refers only to call (MI) candidates"); - if (!Target.Options.EnableDebugEntryValues) + if (!Target.Options.EmitCallSiteInfo) return CallSitesInfo.end(); return CallSitesInfo.find(MI); } -void MachineFunction::moveCallSiteInfo(const MachineInstr *Old, - const MachineInstr *New) { - assert(New->isCall() && "Call site info refers only to call instructions!"); +/// Return the call machine instruction or find a call within bundle. +static const MachineInstr *getCallInstr(const MachineInstr *MI) { + if (!MI->isBundle()) + return MI; - CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old); - if (CSIt == CallSitesInfo.end()) - return; + for (auto &BMI : make_range(getBundleStart(MI->getIterator()), + getBundleEnd(MI->getIterator()))) + if (BMI.isCandidateForCallSiteEntry()) + return &BMI; - CallSiteInfo CSInfo = std::move(CSIt->second); - CallSitesInfo.erase(CSIt); - CallSitesInfo[New] = CSInfo; + llvm_unreachable("Unexpected bundle without a call site candidate"); } void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) { - CallSiteInfoMap::iterator CSIt = getCallSiteInfo(MI); + assert(MI->shouldUpdateCallSiteInfo() && + "Call site info refers only to call (MI) candidates or " + "candidates inside bundles"); + + const MachineInstr *CallMI = getCallInstr(MI); + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(CallMI); if (CSIt == CallSitesInfo.end()) return; CallSitesInfo.erase(CSIt); @@ -882,9 +939,15 @@ void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) { void MachineFunction::copyCallSiteInfo(const MachineInstr *Old, const MachineInstr *New) { - assert(New->isCall() && "Call site info refers only to call instructions!"); + assert(Old->shouldUpdateCallSiteInfo() && + "Call site info refers only to call (MI) candidates or " + "candidates inside bundles"); + + if (!New->isCandidateForCallSiteEntry()) + return eraseCallSiteInfo(Old); - CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old); + const MachineInstr *OldCallMI = getCallInstr(Old); + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(OldCallMI); if (CSIt == CallSitesInfo.end()) return; @@ -892,6 +955,25 @@ void MachineFunction::copyCallSiteInfo(const MachineInstr *Old, CallSitesInfo[New] = CSInfo; } +void MachineFunction::moveCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New) { + assert(Old->shouldUpdateCallSiteInfo() && + "Call site info refers only to call (MI) candidates or " + "candidates inside bundles"); + + if (!New->isCandidateForCallSiteEntry()) + return eraseCallSiteInfo(Old); + + const MachineInstr *OldCallMI = getCallInstr(Old); + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(OldCallMI); + if (CSIt == CallSitesInfo.end()) + return; + + CallSiteInfo CSInfo = std::move(CSIt->second); + CallSitesInfo.erase(CSIt); + CallSitesInfo[New] = CSInfo; +} + /// \} //===----------------------------------------------------------------------===// @@ -1095,8 +1177,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, /// Create a new entry in the constant pool or return an existing one. /// User must specify the log2 of the minimum required alignment for the object. unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, - unsigned Alignment) { - assert(Alignment && "Alignment must be specified!"); + Align Alignment) { if (Alignment > PoolAlignment) PoolAlignment = Alignment; // Check to see if we already have this constant. @@ -1105,7 +1186,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (!Constants[i].isMachineConstantPoolEntry() && CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, DL)) { - if ((unsigned)Constants[i].getAlignment() < Alignment) + if (Constants[i].getAlign() < Alignment) Constants[i].Alignment = Alignment; return i; } @@ -1115,8 +1196,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, } unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, - unsigned Alignment) { - assert(Alignment && "Alignment must be specified!"); + Align Alignment) { if (Alignment > PoolAlignment) PoolAlignment = Alignment; // Check to see if we already have this constant. @@ -1142,7 +1222,7 @@ void MachineConstantPool::print(raw_ostream &OS) const { Constants[i].Val.MachineCPVal->print(OS); else Constants[i].Val.ConstVal->printAsOperand(OS, /*PrintType=*/false); - OS << ", align=" << Constants[i].getAlignment(); + OS << ", align=" << Constants[i].getAlign().value(); OS << "\n"; } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp index 08d786f8f12c..d4181591deab 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp @@ -61,6 +61,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -696,6 +697,26 @@ void MachineInstr::eraseFromBundle() { getParent()->erase_instr(this); } +bool MachineInstr::isCandidateForCallSiteEntry(QueryType Type) const { + if (!isCall(Type)) + return false; + switch (getOpcode()) { + case TargetOpcode::PATCHABLE_EVENT_CALL: + case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: + case TargetOpcode::PATCHPOINT: + case TargetOpcode::STACKMAP: + case TargetOpcode::STATEPOINT: + return false; + } + return true; +} + +bool MachineInstr::shouldUpdateCallSiteInfo() const { + if (isBundle()) + return isCandidateForCallSiteEntry(MachineInstr::AnyInBundle); + return isCandidateForCallSiteEntry(); +} + unsigned MachineInstr::getNumExplicitOperands() const { unsigned NumOperands = MCID->getNumOperands(); if (!MCID->isVariadic()) @@ -813,11 +834,26 @@ const DILabel *MachineInstr::getDebugLabel() const { return cast<DILabel>(getOperand(0).getMetadata()); } +const MachineOperand &MachineInstr::getDebugVariableOp() const { + assert(isDebugValue() && "not a DBG_VALUE"); + return getOperand(2); +} + +MachineOperand &MachineInstr::getDebugVariableOp() { + assert(isDebugValue() && "not a DBG_VALUE"); + return getOperand(2); +} + const DILocalVariable *MachineInstr::getDebugVariable() const { assert(isDebugValue() && "not a DBG_VALUE"); return cast<DILocalVariable>(getOperand(2).getMetadata()); } +MachineOperand &MachineInstr::getDebugExpressionOp() { + assert(isDebugValue() && "not a DBG_VALUE"); + return getOperand(3); +} + const DIExpression *MachineInstr::getDebugExpression() const { assert(isDebugValue() && "not a DBG_VALUE"); return cast<DIExpression>(getOperand(3).getMetadata()); @@ -1199,6 +1235,10 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, if (!mayStore() && !Other.mayStore()) return false; + // Both instructions must be memory operations to be able to alias. + if (!mayLoadOrStore() || !Other.mayLoadOrStore()) + return false; + // Let the target decide if memory accesses cannot possibly overlap. if (TII->areMemAccessesTriviallyDisjoint(*this, Other)) return false; @@ -1449,6 +1489,37 @@ LLVM_DUMP_METHOD void MachineInstr::dump() const { dbgs() << " "; print(dbgs()); } + +LLVM_DUMP_METHOD void MachineInstr::dumprImpl( + const MachineRegisterInfo &MRI, unsigned Depth, unsigned MaxDepth, + SmallPtrSetImpl<const MachineInstr *> &AlreadySeenInstrs) const { + if (Depth >= MaxDepth) + return; + if (!AlreadySeenInstrs.insert(this).second) + return; + // PadToColumn always inserts at least one space. + // Don't mess up the alignment if we don't want any space. + if (Depth) + fdbgs().PadToColumn(Depth * 2); + print(fdbgs()); + for (const MachineOperand &MO : operands()) { + if (!MO.isReg() || MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (Reg.isPhysical()) + continue; + const MachineInstr *NewMI = MRI.getUniqueVRegDef(Reg); + if (NewMI == nullptr) + continue; + NewMI->dumprImpl(MRI, Depth + 1, MaxDepth, AlreadySeenInstrs); + } +} + +LLVM_DUMP_METHOD void MachineInstr::dumpr(const MachineRegisterInfo &MRI, + unsigned MaxDepth) const { + SmallPtrSet<const MachineInstr *, 16> AlreadySeenInstrs; + dumprImpl(MRI, 0, MaxDepth, AlreadySeenInstrs); +} #endif void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers, @@ -1473,7 +1544,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, bool IsStandalone, bool SkipOpers, bool SkipDebugLoc, bool AddNewLine, const TargetInstrInfo *TII) const { // We can be a bit tidier if we know the MachineFunction. - const MachineFunction *MF = nullptr; const TargetRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; const TargetIntrinsicInfo *IntrinsicInfo = nullptr; @@ -1540,6 +1610,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "exact "; if (getFlag(MachineInstr::NoFPExcept)) OS << "nofpexcept "; + if (getFlag(MachineInstr::NoMerge)) + OS << "nomerge "; // Print the opcode name. if (TII) @@ -1618,15 +1690,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, // Pretty print the inline asm operand descriptor. OS << '$' << AsmOpCount++; unsigned Flag = MO.getImm(); - switch (InlineAsm::getKind(Flag)) { - case InlineAsm::Kind_RegUse: OS << ":[reguse"; break; - case InlineAsm::Kind_RegDef: OS << ":[regdef"; break; - case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break; - case InlineAsm::Kind_Clobber: OS << ":[clobber"; break; - case InlineAsm::Kind_Imm: OS << ":[imm"; break; - case InlineAsm::Kind_Mem: OS << ":[mem"; break; - default: OS << ":[??" << InlineAsm::getKind(Flag); break; - } + OS << ":["; + OS << InlineAsm::getKindName(InlineAsm::getKind(Flag)); unsigned RCID = 0; if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) && @@ -1639,29 +1704,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (InlineAsm::isMemKind(Flag)) { unsigned MCID = InlineAsm::getMemoryConstraintID(Flag); - switch (MCID) { - case InlineAsm::Constraint_es: OS << ":es"; break; - case InlineAsm::Constraint_i: OS << ":i"; break; - case InlineAsm::Constraint_m: OS << ":m"; break; - case InlineAsm::Constraint_o: OS << ":o"; break; - case InlineAsm::Constraint_v: OS << ":v"; break; - case InlineAsm::Constraint_Q: OS << ":Q"; break; - case InlineAsm::Constraint_R: OS << ":R"; break; - case InlineAsm::Constraint_S: OS << ":S"; break; - case InlineAsm::Constraint_T: OS << ":T"; break; - case InlineAsm::Constraint_Um: OS << ":Um"; break; - case InlineAsm::Constraint_Un: OS << ":Un"; break; - case InlineAsm::Constraint_Uq: OS << ":Uq"; break; - case InlineAsm::Constraint_Us: OS << ":Us"; break; - case InlineAsm::Constraint_Ut: OS << ":Ut"; break; - case InlineAsm::Constraint_Uv: OS << ":Uv"; break; - case InlineAsm::Constraint_Uy: OS << ":Uy"; break; - case InlineAsm::Constraint_X: OS << ":X"; break; - case InlineAsm::Constraint_Z: OS << ":Z"; break; - case InlineAsm::Constraint_ZC: OS << ":ZC"; break; - case InlineAsm::Constraint_Zy: OS << ":Zy"; break; - default: OS << ":?"; break; - } + OS << ":" << InlineAsm::getMemConstraintName(MCID); } unsigned TiedTo = 0; @@ -1758,21 +1801,13 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } // Print extra comments for DEBUG_VALUE. - if (isDebugValue() && getOperand(e - 2).isMetadata()) { + if (isDebugValue() && getDebugVariableOp().isMetadata()) { if (!HaveSemi) { OS << ";"; HaveSemi = true; } - auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata()); + auto *DV = getDebugVariable(); OS << " line no:" << DV->getLine(); - if (auto *InlinedAt = debugLoc->getInlinedAt()) { - DebugLoc InlinedAtDL(InlinedAt); - if (InlinedAtDL && MF) { - OS << " inlined @[ "; - InlinedAtDL.print(OS); - OS << " ]"; - } - } if (isIndirectDebugValue()) OS << " indirect"; } @@ -2077,7 +2112,8 @@ static const DIExpression *computeExprForSpill(const MachineInstr &MI) { const DIExpression *Expr = MI.getDebugExpression(); if (MI.isIndirectDebugValue()) { - assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); + assert(MI.getDebugOffset().getImm() == 0 && + "DBG_VALUE with nonzero offset"); Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); } return Expr; @@ -2097,9 +2133,9 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) { const DIExpression *Expr = computeExprForSpill(Orig); - Orig.getOperand(0).ChangeToFrameIndex(FrameIndex); - Orig.getOperand(1).ChangeToImmediate(0U); - Orig.getOperand(3).setMetadata(Expr); + Orig.getDebugOperand(0).ChangeToFrameIndex(FrameIndex); + Orig.getDebugOffset().ChangeToImmediate(0U); + Orig.getDebugExpressionOp().setMetadata(Expr); } void MachineInstr::collectDebugValues( @@ -2113,8 +2149,7 @@ void MachineInstr::collectDebugValues( DI != DE; ++DI) { if (!DI->isDebugValue()) return; - if (DI->getOperand(0).isReg() && - DI->getOperand(0).getReg() == MI.getOperand(0).getReg()) + if (DI->getDebugOperandForReg(MI.getOperand(0).getReg())) DbgValues.push_back(&*DI); } } @@ -2126,26 +2161,25 @@ void MachineInstr::changeDebugValuesDefReg(Register Reg) { if (!getOperand(0).isReg()) return; - unsigned DefReg = getOperand(0).getReg(); + Register DefReg = getOperand(0).getReg(); auto *MRI = getRegInfo(); for (auto &MO : MRI->use_operands(DefReg)) { auto *DI = MO.getParent(); if (!DI->isDebugValue()) continue; - if (DI->getOperand(0).isReg() && - DI->getOperand(0).getReg() == DefReg){ + if (DI->getDebugOperandForReg(DefReg)) { DbgValues.push_back(DI); } } // Propagate Reg to debug value instructions. for (auto *DBI : DbgValues) - DBI->getOperand(0).setReg(Reg); + DBI->getDebugOperandForReg(DefReg)->setReg(Reg); } using MMOList = SmallVector<const MachineMemOperand *, 2>; -static unsigned getSpillSlotSize(MMOList &Accesses, +static unsigned getSpillSlotSize(const MMOList &Accesses, const MachineFrameInfo &MFI) { unsigned Size = 0; for (auto A : Accesses) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp index 94865b0e9031..50456e489ea1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -136,14 +136,14 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, BuildMI(MF, getDebugLoc(FirstMI, LastMI), TII->get(TargetOpcode::BUNDLE)); Bundle.prepend(MIB); - SmallVector<unsigned, 32> LocalDefs; - SmallSet<unsigned, 32> LocalDefSet; - SmallSet<unsigned, 8> DeadDefSet; - SmallSet<unsigned, 16> KilledDefSet; - SmallVector<unsigned, 8> ExternUses; - SmallSet<unsigned, 8> ExternUseSet; - SmallSet<unsigned, 8> KilledUseSet; - SmallSet<unsigned, 8> UndefUseSet; + SmallVector<Register, 32> LocalDefs; + SmallSet<Register, 32> LocalDefSet; + SmallSet<Register, 8> DeadDefSet; + SmallSet<Register, 16> KilledDefSet; + SmallVector<Register, 8> ExternUses; + SmallSet<Register, 8> ExternUseSet; + SmallSet<Register, 8> KilledUseSet; + SmallSet<Register, 8> UndefUseSet; SmallVector<MachineOperand*, 4> Defs; for (auto MII = FirstMI; MII != LastMI; ++MII) { for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { @@ -207,9 +207,9 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, Defs.clear(); } - SmallSet<unsigned, 32> Added; + SmallSet<Register, 32> Added; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { - unsigned Reg = LocalDefs[i]; + Register Reg = LocalDefs[i]; if (Added.insert(Reg).second) { // If it's not live beyond end of the bundle, mark it dead. bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg); @@ -219,7 +219,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, } for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) { - unsigned Reg = ExternUses[i]; + Register Reg = ExternUses[i]; bool isKill = KilledUseSet.count(Reg); bool isUndef = UndefUseSet.count(Reg); MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) | @@ -279,7 +279,7 @@ bool llvm::finalizeBundles(MachineFunction &MF) { } VirtRegInfo llvm::AnalyzeVirtRegInBundle( - MachineInstr &MI, unsigned Reg, + MachineInstr &MI, Register Reg, SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops) { VirtRegInfo RI = {false, false, false}; for (MIBundleOperands O(MI); O.isValid(); ++O) { @@ -308,13 +308,12 @@ VirtRegInfo llvm::AnalyzeVirtRegInBundle( return RI; } -PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, unsigned Reg, +PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI) { bool AllDefsDead = true; PhysRegInfo PRI = {false, false, false, false, false, false, false, false}; - assert(Register::isPhysicalRegister(Reg) && - "analyzePhysReg not given a physical register!"); + assert(Reg.isPhysical() && "analyzePhysReg not given a physical register!"); for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { const MachineOperand &MO = *O; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp index 462d4d3b3726..5e8a916b3b3b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp @@ -635,6 +635,12 @@ void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *MBB = MI->getParent(); Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); + // Since we are moving the instruction out of its basic block, we do not + // retain its debug location. Doing so would degrade the debugging + // experience and adversely affect the accuracy of profiling information. + assert(!MI->isDebugInstr() && "Should not hoist debug inst"); + MI->setDebugLoc(DebugLoc()); + // Add register to livein list to all the BBs in the current loop since a // loop invariant must be kept live throughout the whole loop. This is // important to ensure later passes do not scavenge the def register. @@ -731,8 +737,7 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { continue; Scopes.push_back(Node); - const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); - unsigned NumChildren = Children.size(); + unsigned NumChildren = Node->getNumChildren(); // Don't hoist things out of a large switch statement. This often causes // code to be hoisted that wasn't going to be executed, and increases @@ -741,13 +746,14 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { NumChildren = 0; OpenChildren[Node] = NumChildren; - // Add children in reverse order as then the next popped worklist node is - // the first child of this node. This means we ultimately traverse the - // DOM tree in exactly the same order as if we'd recursed. - for (int i = (int)NumChildren-1; i >= 0; --i) { - MachineDomTreeNode *Child = Children[i]; - ParentMap[Child] = Node; - WorkList.push_back(Child); + if (NumChildren) { + // Add children in reverse order as then the next popped worklist node is + // the first child of this node. This means we ultimately traverse the + // DOM tree in exactly the same order as if we'd recursed. + for (MachineDomTreeNode *Child : reverse(Node->children())) { + ParentMap[Child] = Node; + WorkList.push_back(Child); + } } } @@ -829,7 +835,15 @@ void MachineLICMBase::SinkIntoLoop() { } if (!CanSink || !B || B == Preheader) continue; + + LLVM_DEBUG(dbgs() << "Sinking to " << printMBBReference(*B) << " from " + << printMBBReference(*I->getParent()) << ": " << *I); B->splice(B->getFirstNonPHI(), Preheader, I); + + // The instruction is is moved from its basic block, so do not retain the + // debug information. + assert(!I->isDebugInstr() && "Should not sink debug inst"); + I->setDebugLoc(DebugLoc()); } } @@ -1367,6 +1381,11 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { UpdateRegPressure(NewMIs[1]); // Otherwise we successfully unfolded a load that we can hoist. + + // Update the call site info. + if (MI->shouldUpdateCallSiteInfo()) + MF.eraseCallSiteInfo(MI); + MI->eraseFromParent(); return NewMIs[0]; } @@ -1519,6 +1538,7 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // Since we are moving the instruction out of its basic block, we do not // retain its debug location. Doing so would degrade the debugging // experience and adversely affect the accuracy of profiling information. + assert(!MI->isDebugInstr() && "Should not hoist debug inst"); MI->setDebugLoc(DebugLoc()); // Update register pressure for BBs from header to this block. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp index cf30e28449cd..2295e1ca6d4e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp @@ -42,8 +42,7 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, else MF.insert(std::next(Loop->getIterator()), NewBB); - // FIXME: Add DenseMapInfo trait for Register so we can use it as a key. - DenseMap<unsigned, Register> Remaps; + DenseMap<Register, Register> Remaps; auto InsertPt = NewBB->end(); for (MachineInstr &MI : *Loop) { MachineInstr *NewMI = MF.CloneMachineInstr(&MI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp index 0094a923e039..f866c7ca53c6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -76,25 +76,11 @@ class MMIAddrLabelMap { /// we get notified if a block is deleted or RAUWd. std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks; - /// This is a per-function list of symbols whose corresponding BasicBlock got - /// deleted. These symbols need to be emitted at some point in the file, so - /// AsmPrinter emits them after the function body. - DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>> - DeletedAddrLabelsNeedingEmission; - public: MMIAddrLabelMap(MCContext &context) : Context(context) {} - ~MMIAddrLabelMap() { - assert(DeletedAddrLabelsNeedingEmission.empty() && - "Some labels for deleted blocks never got emitted"); - } - ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB); - void takeDeletedSymbolsForFunction(Function *F, - std::vector<MCSymbol*> &Result); - void UpdateForDeletedBlock(BasicBlock *BB); void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); }; @@ -119,33 +105,10 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { Entry.Index = BBCallbacks.size() - 1; Entry.Fn = BB->getParent(); MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken()); - if (Context.getObjectFileInfo()->getTargetTriple().isOSBinFormatXCOFF()) { - MCSymbol *FnEntryPointSym = - Context.lookupSymbol("." + Entry.Fn->getName()); - assert(FnEntryPointSym && "The function entry pointer symbol should have" - " already been initialized."); - MCSectionXCOFF *Csect = - cast<MCSymbolXCOFF>(FnEntryPointSym)->getContainingCsect(); - cast<MCSymbolXCOFF>(Sym)->setContainingCsect(Csect); - } Entry.Symbols.push_back(Sym); return Entry.Symbols; } -/// If we have any deleted symbols for F, return them. -void MMIAddrLabelMap:: -takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { - DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I = - DeletedAddrLabelsNeedingEmission.find(F); - - // If there are no entries for the function, just return. - if (I == DeletedAddrLabelsNeedingEmission.end()) return; - - // Otherwise, take the list. - std::swap(Result, I->second); - DeletedAddrLabelsNeedingEmission.erase(I); -} - void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { // If the block got deleted, there is no need for the symbol. If the symbol // was already emitted, we can just forget about it, otherwise we need to @@ -158,16 +121,8 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) && "Block/parent mismatch"); - for (MCSymbol *Sym : Entry.Symbols) { - if (Sym->isDefined()) - return; - - // If the block is not yet defined, we need to emit it at the end of the - // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list - // for the containing Function. Since the block is being deleted, its - // parent may already be removed, we have to get the function from 'Entry'. - DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); - } + assert(llvm::all_of(Entry.Symbols, [](MCSymbol *Sym) { + return Sym->isDefined(); })); } void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { @@ -252,15 +207,6 @@ MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) { return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB)); } -void MachineModuleInfo:: -takeDeletedSymbolsForFunction(const Function *F, - std::vector<MCSymbol*> &Result) { - // If no blocks have had their addresses taken, we're done. - if (!AddrLabelSymbols) return; - return AddrLabelSymbols-> - takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result); -} - /// \name Exception Handling /// \{ @@ -279,8 +225,7 @@ MachineModuleInfo::getMachineFunction(const Function &F) const { return I != MachineFunctions.end() ? I->second.get() : nullptr; } -MachineFunction & -MachineModuleInfo::getOrCreateMachineFunction(const Function &F) { +MachineFunction &MachineModuleInfo::getOrCreateMachineFunction(Function &F) { // Shortcut for the common case where a sequence of MachineFunctionPasses // all query for the same Function. if (LastRequest == &F) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp index 7b8f01100929..2b4fd654e46c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" @@ -24,6 +25,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCDwarf.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -668,7 +670,7 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, size_t e = CFI.getValues().size() - 1; for (size_t i = 0; i < e; ++i) OS << format("0x%02x", uint8_t(CFI.getValues()[i])) << ", "; - OS << format("0x%02x", uint8_t(CFI.getValues()[e])) << ", "; + OS << format("0x%02x", uint8_t(CFI.getValues()[e])); } break; } @@ -969,8 +971,7 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, return false; return isDereferenceableAndAlignedPointer( - BasePtr, Align::None(), APInt(DL.getPointerSizeInBits(), Offset + Size), - DL); + BasePtr, Align(1), APInt(DL.getPointerSizeInBits(), Offset + Size), DL); } /// getConstantPool - Return a MachinePointerInfo record that refers to the @@ -1004,17 +1005,16 @@ MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) { } MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, - uint64_t s, uint64_t a, + uint64_t s, Align a, const AAMDNodes &AAInfo, const MDNode *Ranges, SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) - : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), - AAInfo(AAInfo), Ranges(Ranges) { + : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlign(a), AAInfo(AAInfo), + Ranges(Ranges) { assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() || isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) && "invalid pointer value"); - assert(getBaseAlignment() == a && a != 0 && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); AtomicInfo.SSID = static_cast<unsigned>(SSID); @@ -1032,7 +1032,7 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(Size); ID.AddPointer(getOpaqueValue()); ID.AddInteger(getFlags()); - ID.AddInteger(getBaseAlignment()); + ID.AddInteger(getBaseAlign().value()); } void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { @@ -1041,9 +1041,9 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); assert(MMO->getSize() == getSize() && "Size mismatch!"); - if (MMO->getBaseAlignment() >= getBaseAlignment()) { + if (MMO->getBaseAlign() >= getBaseAlign()) { // Update the alignment value. - BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1; + BaseAlign = MMO->getBaseAlign(); // Also update the base and offset, because the new alignment may // not be applicable with the old ones. PtrInfo = MMO->PtrInfo; @@ -1052,8 +1052,12 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { /// getAlignment - Return the minimum known alignment in bytes of the /// actual memory reference. -uint64_t MachineMemOperand::getAlignment() const { - return MinAlign(getBaseAlignment(), getOffset()); +uint64_t MachineMemOperand::getAlignment() const { return getAlign().value(); } + +/// getAlign - Return the minimum known alignment in bytes of the +/// actual memory reference. +Align MachineMemOperand::getAlign() const { + return commonAlignment(getBaseAlign(), getOffset()); } void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, @@ -1148,8 +1152,8 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, } } MachineOperand::printOperandOffset(OS, getOffset()); - if (getBaseAlignment() != getSize()) - OS << ", align " << getBaseAlignment(); + if (getBaseAlign() != getSize()) + OS << ", align " << getBaseAlign().value(); auto AAInfo = getAAInfo(); if (AAInfo.TBAA) { OS << ", !tbaa "; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index d656953f9115..dcb8e4073ea3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -24,7 +24,7 @@ using namespace llvm; DiagnosticInfoMIROptimization::MachineArgument::MachineArgument( StringRef MKey, const MachineInstr &MI) : Argument() { - Key = MKey; + Key = std::string(MKey); raw_string_ostream OS(Val); MI.print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp index 3a9104bda0d1..f9d099e02995 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp @@ -56,6 +56,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineFunction.h" @@ -69,9 +70,9 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Mangler.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/SuffixTree.h" #include "llvm/Support/raw_ostream.h" #include <functional> #include <tuple> @@ -96,514 +97,15 @@ static cl::opt<bool> EnableLinkOnceODROutlining( cl::desc("Enable the machine outliner on linkonceodr functions"), cl::init(false)); -namespace { - -/// Represents an undefined index in the suffix tree. -const unsigned EmptyIdx = -1; - -/// A node in a suffix tree which represents a substring or suffix. -/// -/// Each node has either no children or at least two children, with the root -/// being a exception in the empty tree. -/// -/// Children are represented as a map between unsigned integers and nodes. If -/// a node N has a child M on unsigned integer k, then the mapping represented -/// by N is a proper prefix of the mapping represented by M. Note that this, -/// although similar to a trie is somewhat different: each node stores a full -/// substring of the full mapping rather than a single character state. -/// -/// Each internal node contains a pointer to the internal node representing -/// the same string, but with the first character chopped off. This is stored -/// in \p Link. Each leaf node stores the start index of its respective -/// suffix in \p SuffixIdx. -struct SuffixTreeNode { - - /// The children of this node. - /// - /// A child existing on an unsigned integer implies that from the mapping - /// represented by the current node, there is a way to reach another - /// mapping by tacking that character on the end of the current string. - DenseMap<unsigned, SuffixTreeNode *> Children; - - /// The start index of this node's substring in the main string. - unsigned StartIdx = EmptyIdx; - - /// The end index of this node's substring in the main string. - /// - /// Every leaf node must have its \p EndIdx incremented at the end of every - /// step in the construction algorithm. To avoid having to update O(N) - /// nodes individually at the end of every step, the end index is stored - /// as a pointer. - unsigned *EndIdx = nullptr; - - /// For leaves, the start index of the suffix represented by this node. - /// - /// For all other nodes, this is ignored. - unsigned SuffixIdx = EmptyIdx; - - /// For internal nodes, a pointer to the internal node representing - /// the same sequence with the first character chopped off. - /// - /// This acts as a shortcut in Ukkonen's algorithm. One of the things that - /// Ukkonen's algorithm does to achieve linear-time construction is - /// keep track of which node the next insert should be at. This makes each - /// insert O(1), and there are a total of O(N) inserts. The suffix link - /// helps with inserting children of internal nodes. - /// - /// Say we add a child to an internal node with associated mapping S. The - /// next insertion must be at the node representing S - its first character. - /// This is given by the way that we iteratively build the tree in Ukkonen's - /// algorithm. The main idea is to look at the suffixes of each prefix in the - /// string, starting with the longest suffix of the prefix, and ending with - /// the shortest. Therefore, if we keep pointers between such nodes, we can - /// move to the next insertion point in O(1) time. If we don't, then we'd - /// have to query from the root, which takes O(N) time. This would make the - /// construction algorithm O(N^2) rather than O(N). - SuffixTreeNode *Link = nullptr; - - /// The length of the string formed by concatenating the edge labels from the - /// root to this node. - unsigned ConcatLen = 0; - - /// Returns true if this node is a leaf. - bool isLeaf() const { return SuffixIdx != EmptyIdx; } - - /// Returns true if this node is the root of its owning \p SuffixTree. - bool isRoot() const { return StartIdx == EmptyIdx; } - - /// Return the number of elements in the substring associated with this node. - size_t size() const { - - // Is it the root? If so, it's the empty string so return 0. - if (isRoot()) - return 0; - - assert(*EndIdx != EmptyIdx && "EndIdx is undefined!"); - - // Size = the number of elements in the string. - // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1. - return *EndIdx - StartIdx + 1; - } - - SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link) - : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {} - - SuffixTreeNode() {} -}; - -/// A data structure for fast substring queries. -/// -/// Suffix trees represent the suffixes of their input strings in their leaves. -/// A suffix tree is a type of compressed trie structure where each node -/// represents an entire substring rather than a single character. Each leaf -/// of the tree is a suffix. -/// -/// A suffix tree can be seen as a type of state machine where each state is a -/// substring of the full string. The tree is structured so that, for a string -/// of length N, there are exactly N leaves in the tree. This structure allows -/// us to quickly find repeated substrings of the input string. -/// -/// In this implementation, a "string" is a vector of unsigned integers. -/// These integers may result from hashing some data type. A suffix tree can -/// contain 1 or many strings, which can then be queried as one large string. -/// -/// The suffix tree is implemented using Ukkonen's algorithm for linear-time -/// suffix tree construction. Ukkonen's algorithm is explained in more detail -/// in the paper by Esko Ukkonen "On-line construction of suffix trees. The -/// paper is available at -/// -/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf -class SuffixTree { -public: - /// Each element is an integer representing an instruction in the module. - ArrayRef<unsigned> Str; - - /// A repeated substring in the tree. - struct RepeatedSubstring { - /// The length of the string. - unsigned Length; - - /// The start indices of each occurrence. - std::vector<unsigned> StartIndices; - }; - -private: - /// Maintains each node in the tree. - SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator; - - /// The root of the suffix tree. - /// - /// The root represents the empty string. It is maintained by the - /// \p NodeAllocator like every other node in the tree. - SuffixTreeNode *Root = nullptr; - - /// Maintains the end indices of the internal nodes in the tree. - /// - /// Each internal node is guaranteed to never have its end index change - /// during the construction algorithm; however, leaves must be updated at - /// every step. Therefore, we need to store leaf end indices by reference - /// to avoid updating O(N) leaves at every step of construction. Thus, - /// every internal node must be allocated its own end index. - BumpPtrAllocator InternalEndIdxAllocator; - - /// The end index of each leaf in the tree. - unsigned LeafEndIdx = -1; - - /// Helper struct which keeps track of the next insertion point in - /// Ukkonen's algorithm. - struct ActiveState { - /// The next node to insert at. - SuffixTreeNode *Node = nullptr; - - /// The index of the first character in the substring currently being added. - unsigned Idx = EmptyIdx; - - /// The length of the substring we have to add at the current step. - unsigned Len = 0; - }; - - /// The point the next insertion will take place at in the - /// construction algorithm. - ActiveState Active; - - /// Allocate a leaf node and add it to the tree. - /// - /// \param Parent The parent of this node. - /// \param StartIdx The start index of this node's associated string. - /// \param Edge The label on the edge leaving \p Parent to this node. - /// - /// \returns A pointer to the allocated leaf node. - SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx, - unsigned Edge) { - - assert(StartIdx <= LeafEndIdx && "String can't start after it ends!"); - - SuffixTreeNode *N = new (NodeAllocator.Allocate()) - SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr); - Parent.Children[Edge] = N; - - return N; - } - - /// Allocate an internal node and add it to the tree. - /// - /// \param Parent The parent of this node. Only null when allocating the root. - /// \param StartIdx The start index of this node's associated string. - /// \param EndIdx The end index of this node's associated string. - /// \param Edge The label on the edge leaving \p Parent to this node. - /// - /// \returns A pointer to the allocated internal node. - SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx, - unsigned EndIdx, unsigned Edge) { - - assert(StartIdx <= EndIdx && "String can't start after it ends!"); - assert(!(!Parent && StartIdx != EmptyIdx) && - "Non-root internal nodes must have parents!"); - - unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx); - SuffixTreeNode *N = - new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root); - if (Parent) - Parent->Children[Edge] = N; - - return N; - } - - /// Set the suffix indices of the leaves to the start indices of their - /// respective suffixes. - void setSuffixIndices() { - // List of nodes we need to visit along with the current length of the - // string. - std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit; - - // Current node being visited. - SuffixTreeNode *CurrNode = Root; - - // Sum of the lengths of the nodes down the path to the current one. - unsigned CurrNodeLen = 0; - ToVisit.push_back({CurrNode, CurrNodeLen}); - while (!ToVisit.empty()) { - std::tie(CurrNode, CurrNodeLen) = ToVisit.back(); - ToVisit.pop_back(); - CurrNode->ConcatLen = CurrNodeLen; - for (auto &ChildPair : CurrNode->Children) { - assert(ChildPair.second && "Node had a null child!"); - ToVisit.push_back( - {ChildPair.second, CurrNodeLen + ChildPair.second->size()}); - } - - // No children, so we are at the end of the string. - if (CurrNode->Children.size() == 0 && !CurrNode->isRoot()) - CurrNode->SuffixIdx = Str.size() - CurrNodeLen; - } - } - - /// Construct the suffix tree for the prefix of the input ending at - /// \p EndIdx. - /// - /// Used to construct the full suffix tree iteratively. At the end of each - /// step, the constructed suffix tree is either a valid suffix tree, or a - /// suffix tree with implicit suffixes. At the end of the final step, the - /// suffix tree is a valid tree. - /// - /// \param EndIdx The end index of the current prefix in the main string. - /// \param SuffixesToAdd The number of suffixes that must be added - /// to complete the suffix tree at the current phase. - /// - /// \returns The number of suffixes that have not been added at the end of - /// this step. - unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) { - SuffixTreeNode *NeedsLink = nullptr; - - while (SuffixesToAdd > 0) { - - // Are we waiting to add anything other than just the last character? - if (Active.Len == 0) { - // If not, then say the active index is the end index. - Active.Idx = EndIdx; - } - - assert(Active.Idx <= EndIdx && "Start index can't be after end index!"); - - // The first character in the current substring we're looking at. - unsigned FirstChar = Str[Active.Idx]; - - // Have we inserted anything starting with FirstChar at the current node? - if (Active.Node->Children.count(FirstChar) == 0) { - // If not, then we can just insert a leaf and move too the next step. - insertLeaf(*Active.Node, EndIdx, FirstChar); - - // The active node is an internal node, and we visited it, so it must - // need a link if it doesn't have one. - if (NeedsLink) { - NeedsLink->Link = Active.Node; - NeedsLink = nullptr; - } - } else { - // There's a match with FirstChar, so look for the point in the tree to - // insert a new node. - SuffixTreeNode *NextNode = Active.Node->Children[FirstChar]; - - unsigned SubstringLen = NextNode->size(); - - // Is the current suffix we're trying to insert longer than the size of - // the child we want to move to? - if (Active.Len >= SubstringLen) { - // If yes, then consume the characters we've seen and move to the next - // node. - Active.Idx += SubstringLen; - Active.Len -= SubstringLen; - Active.Node = NextNode; - continue; - } - - // Otherwise, the suffix we're trying to insert must be contained in the - // next node we want to move to. - unsigned LastChar = Str[EndIdx]; - - // Is the string we're trying to insert a substring of the next node? - if (Str[NextNode->StartIdx + Active.Len] == LastChar) { - // If yes, then we're done for this step. Remember our insertion point - // and move to the next end index. At this point, we have an implicit - // suffix tree. - if (NeedsLink && !Active.Node->isRoot()) { - NeedsLink->Link = Active.Node; - NeedsLink = nullptr; - } +/// Number of times to re-run the outliner. This is not the total number of runs +/// as the outliner will run at least one time. The default value is set to 0, +/// meaning the outliner will run one time and rerun zero times after that. +static cl::opt<unsigned> OutlinerReruns( + "machine-outliner-reruns", cl::init(0), cl::Hidden, + cl::desc( + "Number of times to rerun the outliner after the initial outline")); - Active.Len++; - break; - } - - // The string we're trying to insert isn't a substring of the next node, - // but matches up to a point. Split the node. - // - // For example, say we ended our search at a node n and we're trying to - // insert ABD. Then we'll create a new node s for AB, reduce n to just - // representing C, and insert a new leaf node l to represent d. This - // allows us to ensure that if n was a leaf, it remains a leaf. - // - // | ABC ---split---> | AB - // n s - // C / \ D - // n l - - // The node s from the diagram - SuffixTreeNode *SplitNode = - insertInternalNode(Active.Node, NextNode->StartIdx, - NextNode->StartIdx + Active.Len - 1, FirstChar); - - // Insert the new node representing the new substring into the tree as - // a child of the split node. This is the node l from the diagram. - insertLeaf(*SplitNode, EndIdx, LastChar); - - // Make the old node a child of the split node and update its start - // index. This is the node n from the diagram. - NextNode->StartIdx += Active.Len; - SplitNode->Children[Str[NextNode->StartIdx]] = NextNode; - - // SplitNode is an internal node, update the suffix link. - if (NeedsLink) - NeedsLink->Link = SplitNode; - - NeedsLink = SplitNode; - } - - // We've added something new to the tree, so there's one less suffix to - // add. - SuffixesToAdd--; - - if (Active.Node->isRoot()) { - if (Active.Len > 0) { - Active.Len--; - Active.Idx = EndIdx - SuffixesToAdd + 1; - } - } else { - // Start the next phase at the next smallest suffix. - Active.Node = Active.Node->Link; - } - } - - return SuffixesToAdd; - } - -public: - /// Construct a suffix tree from a sequence of unsigned integers. - /// - /// \param Str The string to construct the suffix tree for. - SuffixTree(const std::vector<unsigned> &Str) : Str(Str) { - Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0); - Active.Node = Root; - - // Keep track of the number of suffixes we have to add of the current - // prefix. - unsigned SuffixesToAdd = 0; - - // Construct the suffix tree iteratively on each prefix of the string. - // PfxEndIdx is the end index of the current prefix. - // End is one past the last element in the string. - for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; - PfxEndIdx++) { - SuffixesToAdd++; - LeafEndIdx = PfxEndIdx; // Extend each of the leaves. - SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd); - } - - // Set the suffix indices of each leaf. - assert(Root && "Root node can't be nullptr!"); - setSuffixIndices(); - } - - /// Iterator for finding all repeated substrings in the suffix tree. - struct RepeatedSubstringIterator { - private: - /// The current node we're visiting. - SuffixTreeNode *N = nullptr; - - /// The repeated substring associated with this node. - RepeatedSubstring RS; - - /// The nodes left to visit. - std::vector<SuffixTreeNode *> ToVisit; - - /// The minimum length of a repeated substring to find. - /// Since we're outlining, we want at least two instructions in the range. - /// FIXME: This may not be true for targets like X86 which support many - /// instruction lengths. - const unsigned MinLength = 2; - - /// Move the iterator to the next repeated substring. - void advance() { - // Clear the current state. If we're at the end of the range, then this - // is the state we want to be in. - RS = RepeatedSubstring(); - N = nullptr; - - // Each leaf node represents a repeat of a string. - std::vector<SuffixTreeNode *> LeafChildren; - - // Continue visiting nodes until we find one which repeats more than once. - while (!ToVisit.empty()) { - SuffixTreeNode *Curr = ToVisit.back(); - ToVisit.pop_back(); - LeafChildren.clear(); - - // Keep track of the length of the string associated with the node. If - // it's too short, we'll quit. - unsigned Length = Curr->ConcatLen; - - // Iterate over each child, saving internal nodes for visiting, and - // leaf nodes in LeafChildren. Internal nodes represent individual - // strings, which may repeat. - for (auto &ChildPair : Curr->Children) { - // Save all of this node's children for processing. - if (!ChildPair.second->isLeaf()) - ToVisit.push_back(ChildPair.second); - - // It's not an internal node, so it must be a leaf. If we have a - // long enough string, then save the leaf children. - else if (Length >= MinLength) - LeafChildren.push_back(ChildPair.second); - } - - // The root never represents a repeated substring. If we're looking at - // that, then skip it. - if (Curr->isRoot()) - continue; - - // Do we have any repeated substrings? - if (LeafChildren.size() >= 2) { - // Yes. Update the state to reflect this, and then bail out. - N = Curr; - RS.Length = Length; - for (SuffixTreeNode *Leaf : LeafChildren) - RS.StartIndices.push_back(Leaf->SuffixIdx); - break; - } - } - - // At this point, either NewRS is an empty RepeatedSubstring, or it was - // set in the above loop. Similarly, N is either nullptr, or the node - // associated with NewRS. - } - - public: - /// Return the current repeated substring. - RepeatedSubstring &operator*() { return RS; } - - RepeatedSubstringIterator &operator++() { - advance(); - return *this; - } - - RepeatedSubstringIterator operator++(int I) { - RepeatedSubstringIterator It(*this); - advance(); - return It; - } - - bool operator==(const RepeatedSubstringIterator &Other) { - return N == Other.N; - } - bool operator!=(const RepeatedSubstringIterator &Other) { - return !(*this == Other); - } - - RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) { - // Do we have a non-null node? - if (N) { - // Yes. At the first step, we need to visit all of N's children. - // Note: This means that we visit N last. - ToVisit.push_back(N); - advance(); - } - } - }; - - typedef RepeatedSubstringIterator iterator; - iterator begin() { return iterator(Root); } - iterator end() { return iterator(nullptr); } -}; +namespace { /// Maps \p MachineInstrs to unsigned integers and stores the mappings. struct InstructionMapper { @@ -841,6 +343,9 @@ struct MachineOutliner : public ModulePass { /// linkonceodr linkage. bool OutlineFromLinkOnceODRs = false; + /// The current repeat number of machine outlining. + unsigned OutlineRepeatedNum = 0; + /// Set to true if the outliner should run on all functions in the module /// considered safe for outlining. /// Set to true by default for compatibility with llc's -run-pass option. @@ -899,7 +404,7 @@ struct MachineOutliner : public ModulePass { InstructionMapper &Mapper, unsigned Name); - /// Calls 'doOutline()'. + /// Calls 'doOutline()' 1 + OutlinerReruns times. bool runOnModule(Module &M) override; /// Construct a suffix tree on the instructions in \p M and outline repeated @@ -1098,7 +603,10 @@ MachineFunction *MachineOutliner::createOutlinedFunction( // Create the function name. This should be unique. // FIXME: We should have a better naming scheme. This should be stable, // regardless of changes to the outliner's cost model/traversal order. - std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str(); + std::string FunctionName = "OUTLINED_FUNCTION_"; + if (OutlineRepeatedNum > 0) + FunctionName += std::to_string(OutlineRepeatedNum + 1) + "_"; + FunctionName += std::to_string(Name); // Create the function using an IR-level function. LLVMContext &C = M.getContext(); @@ -1110,9 +618,6 @@ MachineFunction *MachineOutliner::createOutlinedFunction( F->setLinkage(GlobalValue::InternalLinkage); F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's - // necessary. - // Set optsize/minsize, so we don't insert padding between outlined // functions. F->addFnAttr(Attribute::OptimizeForSize); @@ -1127,6 +632,12 @@ MachineFunction *MachineOutliner::createOutlinedFunction( if (ParentFn.hasFnAttribute("target-features")) F->addFnAttr(ParentFn.getFnAttribute("target-features")); + // Set nounwind, so we don't generate eh_frame. + if (llvm::all_of(OF.Candidates, [](const outliner::Candidate &C) { + return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind); + })) + F->addFnAttr(Attribute::NoUnwind); + BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); Builder.CreateRetVoid(); @@ -1140,9 +651,17 @@ MachineFunction *MachineOutliner::createOutlinedFunction( // Insert the new function into the module. MF.insert(MF.begin(), &MBB); + MachineFunction *OriginalMF = FirstCand.front()->getMF(); + const std::vector<MCCFIInstruction> &Instrs = + OriginalMF->getFrameInstructions(); for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E; ++I) { MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + if (I->isCFIInstruction()) { + unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex(); + MCCFIInstruction CFI = Instrs[CFIIndex]; + (void)MF.addFrameInst(CFI); + } NewMI->dropMemRefs(MF); // Don't keep debug information for outlined instructions. @@ -1150,12 +669,35 @@ MachineFunction *MachineOutliner::createOutlinedFunction( MBB.insert(MBB.end(), NewMI); } - TII.buildOutlinedFrame(MBB, MF, OF); - - // Outlined functions shouldn't preserve liveness. - MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); + // Set normal properties for a late MachineFunction. + MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); + MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs); + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); + MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness); MF.getRegInfo().freezeReservedRegs(MF); + // Compute live-in set for outlined fn + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + LivePhysRegs LiveIns(TRI); + for (auto &Cand : OF.Candidates) { + // Figure out live-ins at the first instruction. + MachineBasicBlock &OutlineBB = *Cand.front()->getParent(); + LivePhysRegs CandLiveIns(TRI); + CandLiveIns.addLiveOuts(OutlineBB); + for (const MachineInstr &MI : + reverse(make_range(Cand.front(), OutlineBB.end()))) + CandLiveIns.stepBackward(MI); + + // The live-in set for the outlined function is the union of the live-ins + // from all the outlining points. + for (MCPhysReg Reg : make_range(CandLiveIns.begin(), CandLiveIns.end())) + LiveIns.addReg(Reg); + } + addLiveIns(MBB, LiveIns); + + TII.buildOutlinedFrame(MBB, MF, OF); + // If there's a DISubprogram associated with this outlined function, then // emit debug info for the outlined function. if (DISubprogram *SP = getSubprogramOrNull(OF)) { @@ -1245,31 +787,54 @@ bool MachineOutliner::outline(Module &M, // make sure that the ranges we yank things out of aren't wrong. if (MBB.getParent()->getProperties().hasProperty( MachineFunctionProperties::Property::TracksLiveness)) { - // Helper lambda for adding implicit def operands to the call + // The following code is to add implicit def operands to the call // instruction. It also updates call site information for moved // code. - auto CopyDefsAndUpdateCalls = [&CallInst](MachineInstr &MI) { - for (MachineOperand &MOP : MI.operands()) { - // Skip over anything that isn't a register. - if (!MOP.isReg()) - continue; - - // If it's a def, add it to the call instruction. - if (MOP.isDef()) - CallInst->addOperand(MachineOperand::CreateReg( - MOP.getReg(), true, /* isDef = true */ - true /* isImp = true */)); - } - if (MI.isCall()) - MI.getMF()->eraseCallSiteInfo(&MI); - }; + SmallSet<Register, 2> UseRegs, DefRegs; // Copy over the defs in the outlined range. // First inst in outlined range <-- Anything that's defined in this // ... .. range has to be added as an // implicit Last inst in outlined range <-- def to the call // instruction. Also remove call site information for outlined block - // of code. - std::for_each(CallInst, std::next(EndIt), CopyDefsAndUpdateCalls); + // of code. The exposed uses need to be copied in the outlined range. + for (MachineBasicBlock::reverse_iterator + Iter = EndIt.getReverse(), + Last = std::next(CallInst.getReverse()); + Iter != Last; Iter++) { + MachineInstr *MI = &*Iter; + for (MachineOperand &MOP : MI->operands()) { + // Skip over anything that isn't a register. + if (!MOP.isReg()) + continue; + + if (MOP.isDef()) { + // Introduce DefRegs set to skip the redundant register. + DefRegs.insert(MOP.getReg()); + if (UseRegs.count(MOP.getReg())) + // Since the regiester is modeled as defined, + // it is not necessary to be put in use register set. + UseRegs.erase(MOP.getReg()); + } else if (!MOP.isUndef()) { + // Any register which is not undefined should + // be put in the use register set. + UseRegs.insert(MOP.getReg()); + } + } + if (MI->isCandidateForCallSiteEntry()) + MI->getMF()->eraseCallSiteInfo(MI); + } + + for (const Register &I : DefRegs) + // If it's a def, add it to the call instruction. + CallInst->addOperand( + MachineOperand::CreateReg(I, true, /* isDef = true */ + true /* isImp = true */)); + + for (const Register &I : UseRegs) + // If it's a exposed use, add it to the call instruction. + CallInst->addOperand( + MachineOperand::CreateReg(I, false, /* isDef = false */ + true /* isImp = true */)); } // Erase from the point after where the call was inserted up to, and @@ -1289,7 +854,6 @@ bool MachineOutliner::outline(Module &M, } LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";); - return OutlinedSomething; } @@ -1377,7 +941,7 @@ void MachineOutliner::emitInstrCountChangedRemark( if (!MF) continue; - std::string Fname = F.getName(); + std::string Fname = std::string(F.getName()); unsigned FnCountAfter = MF->getInstructionCount(); unsigned FnCountBefore = 0; @@ -1424,8 +988,22 @@ bool MachineOutliner::runOnModule(Module &M) { // Number to append to the current outlined function. unsigned OutlinedFunctionNum = 0; + OutlineRepeatedNum = 0; if (!doOutline(M, OutlinedFunctionNum)) return false; + + for (unsigned I = 0; I < OutlinerReruns; ++I) { + OutlinedFunctionNum = 0; + OutlineRepeatedNum++; + if (!doOutline(M, OutlinedFunctionNum)) { + LLVM_DEBUG({ + dbgs() << "Did not outline on iteration " << I + 2 << " out of " + << OutlinerReruns + 1 << "\n"; + }); + break; + } + } + return true; } @@ -1482,5 +1060,11 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) { if (ShouldEmitSizeRemarks && OutlinedSomething) emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount); + LLVM_DEBUG({ + if (!OutlinedSomething) + dbgs() << "Stopped outlining at iteration " << OutlineRepeatedNum + << " because no changes were found.\n"; + }); + return OutlinedSomething; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp index ef22caa877c9..ef4b02ca9e3e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -217,6 +217,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MLI = &getAnalysis<MachineLoopInfo>(); MDT = &getAnalysis<MachineDominatorTree>(); + ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); TII = MF->getSubtarget().getInstrInfo(); RegClassInfo.runOnMachineFunction(*MF); @@ -248,6 +249,12 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { setPragmaPipelineOptions(L); if (!canPipelineLoop(L)) { LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n"); + ORE->emit([&]() { + return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "Failed to pipeline loop"; + }); + return Changed; } @@ -259,6 +266,9 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { } void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { + // Reset the pragma for the next loop in iteration. + disabledByPragma = false; + MachineBasicBlock *LBLK = L.getTopBlock(); if (LBLK == nullptr) @@ -306,11 +316,24 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { /// restricted to loops with a single basic block. Make sure that the /// branch in the loop can be analyzed. bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { - if (L.getNumBlocks() != 1) + if (L.getNumBlocks() != 1) { + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "Not a single basic block: " + << ore::NV("NumBlocks", L.getNumBlocks()); + }); return false; + } - if (disabledByPragma) + if (disabledByPragma) { + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "Disabled by Pragma."; + }); return false; + } // Check if the branch can't be understood because we can't do pipelining // if that's the case. @@ -318,25 +341,37 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.FBB = nullptr; LI.BrCond.clear(); if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) { - LLVM_DEBUG( - dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n"); + LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n"); NumFailBranch++; + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "The branch can't be understood"; + }); return false; } LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { - LLVM_DEBUG( - dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n"); + LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n"); NumFailLoop++; + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "The loop structure is not supported"; + }); return false; } if (!L.getLoopPreheader()) { - LLVM_DEBUG( - dbgs() << "Preheader not found, can NOT pipeline current Loop\n"); + LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n"); NumFailPreheader++; + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "No loop preheader found"; + }); return false; } @@ -454,10 +489,13 @@ void SwingSchedulerDAG::schedule() { // Can't schedule a loop without a valid MII. if (MII == 0) { - LLVM_DEBUG( - dbgs() - << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n"); + LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n"); NumFailZeroMII++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Invalid Minimal Initiation Interval: 0"; + }); return; } @@ -466,6 +504,14 @@ void SwingSchedulerDAG::schedule() { LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii << ", we don't pipleline large loops\n"); NumFailLargeMaxMII++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Minimal Initiation Interval too large: " + << ore::NV("MII", (int)MII) << " > " + << ore::NV("SwpMaxMii", SwpMaxMii) << "." + << "Refer to -pipeliner-max-mii."; + }); return; } @@ -508,15 +554,24 @@ void SwingSchedulerDAG::schedule() { if (!Scheduled){ LLVM_DEBUG(dbgs() << "No schedule found, return\n"); NumFailNoSchedule++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Unable to find schedule"; + }); return; } unsigned numStages = Schedule.getMaxStageCount(); // No need to generate pipeline if there are no overlapped iterations. if (numStages == 0) { - LLVM_DEBUG( - dbgs() << "No overlapped iterations, no need to generate pipeline\n"); + LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n"); NumFailZeroStage++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "No need to pipeline - no overlapped iterations in schedule."; + }); return; } // Check that the maximum stage count is less than user-defined limit. @@ -524,9 +579,23 @@ void SwingSchedulerDAG::schedule() { LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages << " : too many stages, abort\n"); NumFailLargeMaxStage++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Too many stages in schedule: " + << ore::NV("numStages", (int)numStages) << " > " + << ore::NV("SwpMaxStages", SwpMaxStages) + << ". Refer to -pipeliner-max-stages."; + }); return; } + Pass.ORE->emit([&]() { + return MachineOptimizationRemark(DEBUG_TYPE, "schedule", Loop.getStartLoc(), + Loop.getHeader()) + << "Pipelined succesfully!"; + }); + // Generate the schedule as a ModuloSchedule. DenseMap<MachineInstr *, int> Cycles, Stages; std::vector<MachineInstr *> OrderedInsts; @@ -693,9 +762,13 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { // offset, then mark the dependence as loop carried potentially. const MachineOperand *BaseOp1, *BaseOp2; int64_t Offset1, Offset2; - if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) && - TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { + bool Offset1IsScalable, Offset2IsScalable; + if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, + Offset1IsScalable, TRI) && + TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, + Offset2IsScalable, TRI)) { if (BaseOp1->isIdenticalTo(*BaseOp2) && + Offset1IsScalable == Offset2IsScalable && (int)Offset1 < (int)Offset2) { assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) && "What happened to the chain edge?"); @@ -802,7 +875,7 @@ void SwingSchedulerDAG::updatePhiDependences() { if (!MI->isPHI()) { SDep Dep(SU, SDep::Data, Reg); Dep.setLatency(0); - ST.adjustSchedDependency(SU, &I, Dep); + ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep); I.addPred(Dep); } else { HasPhiUse = Reg; @@ -905,7 +978,7 @@ namespace { struct FuncUnitSorter { const InstrItineraryData *InstrItins; const MCSubtargetInfo *STI; - DenseMap<unsigned, unsigned> Resources; + DenseMap<InstrStage::FuncUnits, unsigned> Resources; FuncUnitSorter(const TargetSubtargetInfo &TSI) : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {} @@ -913,14 +986,15 @@ struct FuncUnitSorter { // Compute the number of functional unit alternatives needed // at each stage, and take the minimum value. We prioritize the // instructions by the least number of choices first. - unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const { + unsigned minFuncUnits(const MachineInstr *Inst, + InstrStage::FuncUnits &F) const { unsigned SchedClass = Inst->getDesc().getSchedClass(); unsigned min = UINT_MAX; if (InstrItins && !InstrItins->isEmpty()) { for (const InstrStage &IS : make_range(InstrItins->beginStage(SchedClass), InstrItins->endStage(SchedClass))) { - unsigned funcUnits = IS.getUnits(); + InstrStage::FuncUnits funcUnits = IS.getUnits(); unsigned numAlternatives = countPopulation(funcUnits); if (numAlternatives < min) { min = numAlternatives; @@ -966,7 +1040,7 @@ struct FuncUnitSorter { for (const InstrStage &IS : make_range(InstrItins->beginStage(SchedClass), InstrItins->endStage(SchedClass))) { - unsigned FuncUnits = IS.getUnits(); + InstrStage::FuncUnits FuncUnits = IS.getUnits(); if (countPopulation(FuncUnits) == 1) Resources[FuncUnits]++; } @@ -994,7 +1068,7 @@ struct FuncUnitSorter { /// Return true if IS1 has less priority than IS2. bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const { - unsigned F1 = 0, F2 = 0; + InstrStage::FuncUnits F1 = 0, F2 = 0; unsigned MFUs1 = minFuncUnits(IS1, F1); unsigned MFUs2 = minFuncUnits(IS2, F2); if (MFUs1 == MFUs2) @@ -1072,7 +1146,7 @@ unsigned SwingSchedulerDAG::calculateResMII() { } } int Resmii = Resources.size(); - LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n"); + LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n"); // Delete the memory for each of the DFAs that were created earlier. for (ResourceManager *RI : Resources) { ResourceManager *D = RI; @@ -2044,9 +2118,16 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II << ")\n"); - if (scheduleFound) + if (scheduleFound) { Schedule.finalizeSchedule(this); - else + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Schedule found with Initiation Interval: " << ore::NV("II", II) + << ", MaxStageCount: " + << ore::NV("MaxStageCount", Schedule.getMaxStageCount()); + }); + } else Schedule.reset(); return scheduleFound && Schedule.getMaxStageCount() > 0; @@ -2058,7 +2139,12 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + return false; + + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) return false; if (!BaseOp->isReg()) @@ -2236,11 +2322,17 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, const MachineOperand *BaseOpS, *BaseOpD; int64_t OffsetS, OffsetD; + bool OffsetSIsScalable, OffsetDIsScalable; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) || - !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI)) + if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable, + TRI) || + !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable, + TRI)) return true; + assert(!OffsetSIsScalable && !OffsetDIsScalable && + "Expected offsets to be byte offsets"); + if (!BaseOpS->isIdenticalTo(*BaseOpD)) return true; @@ -2352,7 +2444,7 @@ int SMSchedule::earliestCycleInChain(const SDep &Dep) { continue; EarlyCycle = std::min(EarlyCycle, it->second); for (const auto &PI : PrevSU->Preds) - if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output) + if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output) Worklist.push_back(PI); Visited.insert(PrevSU); } @@ -2375,7 +2467,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) { continue; LateCycle = std::max(LateCycle, it->second); for (const auto &SI : SuccSU->Succs) - if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output) + if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output) Worklist.push_back(SI); Visited.insert(SuccSU); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp index b88d4ea462ef..4c733738840a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -55,18 +55,18 @@ MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) /// setRegClass - Set the register class of the specified virtual register. /// void -MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { +MachineRegisterInfo::setRegClass(Register Reg, const TargetRegisterClass *RC) { assert(RC && RC->isAllocatable() && "Invalid RC for virtual register"); VRegInfo[Reg].first = RC; } -void MachineRegisterInfo::setRegBank(unsigned Reg, +void MachineRegisterInfo::setRegBank(Register Reg, const RegisterBank &RegBank) { VRegInfo[Reg].first = &RegBank; } static const TargetRegisterClass * -constrainRegClass(MachineRegisterInfo &MRI, unsigned Reg, +constrainRegClass(MachineRegisterInfo &MRI, Register Reg, const TargetRegisterClass *OldRC, const TargetRegisterClass *RC, unsigned MinNumRegs) { if (OldRC == RC) @@ -82,15 +82,15 @@ constrainRegClass(MachineRegisterInfo &MRI, unsigned Reg, } const TargetRegisterClass * -MachineRegisterInfo::constrainRegClass(unsigned Reg, +MachineRegisterInfo::constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs) { return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs); } bool -MachineRegisterInfo::constrainRegAttrs(unsigned Reg, - unsigned ConstrainingReg, +MachineRegisterInfo::constrainRegAttrs(Register Reg, + Register ConstrainingReg, unsigned MinNumRegs) { const LLT RegTy = getType(Reg); const LLT ConstrainingRegTy = getType(ConstrainingReg); @@ -119,7 +119,7 @@ MachineRegisterInfo::constrainRegAttrs(unsigned Reg, } bool -MachineRegisterInfo::recomputeRegClass(unsigned Reg) { +MachineRegisterInfo::recomputeRegClass(Register Reg) { const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); const TargetRegisterClass *NewRC = @@ -143,8 +143,8 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) { return true; } -unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) { - unsigned Reg = Register::index2VirtReg(getNumVirtRegs()); +Register MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) { + Register Reg = Register::index2VirtReg(getNumVirtRegs()); VRegInfo.grow(Reg); RegAllocHints.grow(Reg); insertVRegByName(Name, Reg); @@ -162,7 +162,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, "Virtual register RegClass must be allocatable."); // New virtual register number. - unsigned Reg = createIncompleteVirtualRegister(Name); + Register Reg = createIncompleteVirtualRegister(Name); VRegInfo[Reg].first = RegClass; if (TheDelegate) TheDelegate->MRI_NoteNewVirtualRegister(Reg); @@ -171,7 +171,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, Register MachineRegisterInfo::cloneVirtualRegister(Register VReg, StringRef Name) { - unsigned Reg = createIncompleteVirtualRegister(Name); + Register Reg = createIncompleteVirtualRegister(Name); VRegInfo[Reg].first = VRegInfo[VReg].first; setType(Reg, getType(VReg)); if (TheDelegate) @@ -179,7 +179,7 @@ Register MachineRegisterInfo::cloneVirtualRegister(Register VReg, return Reg; } -void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) { +void MachineRegisterInfo::setType(Register VReg, LLT Ty) { VRegToType.grow(VReg); VRegToType[VReg] = Ty; } @@ -187,7 +187,7 @@ void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) { Register MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) { // New virtual register number. - unsigned Reg = createIncompleteVirtualRegister(Name); + Register Reg = createIncompleteVirtualRegister(Name); // FIXME: Should we use a dummy register class? VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr); setType(Reg, Ty); @@ -202,7 +202,7 @@ void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); } void MachineRegisterInfo::clearVirtRegs() { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); if (!VRegInfo[Reg].second) continue; verifyUseList(Reg); @@ -214,7 +214,7 @@ void MachineRegisterInfo::clearVirtRegs() { I.second = 0; } -void MachineRegisterInfo::verifyUseList(unsigned Reg) const { +void MachineRegisterInfo::verifyUseList(Register Reg) const { #ifndef NDEBUG bool Valid = true; for (MachineOperand &M : reg_operands(Reg)) { @@ -377,7 +377,7 @@ void MachineRegisterInfo::moveOperands(MachineOperand *Dst, /// except that it also changes any definitions of the register as well. /// If ToReg is a physical register we apply the sub register to obtain the /// final/proper physical register. -void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { +void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) { assert(FromReg != ToReg && "Cannot replace a reg with itself"); const TargetRegisterInfo *TRI = getTargetRegisterInfo(); @@ -397,7 +397,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { /// getVRegDef - Return the machine instr that defines the specified virtual /// register or null if none is found. This assumes that the code is in SSA /// form, so there should only be one definition. -MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { +MachineInstr *MachineRegisterInfo::getVRegDef(Register Reg) const { // Since we are in SSA form, we can use the first definition. def_instr_iterator I = def_instr_begin(Reg); assert((I.atEnd() || std::next(I) == def_instr_end()) && @@ -408,7 +408,7 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { /// getUniqueVRegDef - Return the unique machine instr that defines the /// specified virtual register or null if none is found. If there are /// multiple definitions or no definition, return null. -MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { +MachineInstr *MachineRegisterInfo::getUniqueVRegDef(Register Reg) const { if (def_empty(Reg)) return nullptr; def_instr_iterator I = def_instr_begin(Reg); if (std::next(I) != def_instr_end()) @@ -416,14 +416,14 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { return &*I; } -bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { +bool MachineRegisterInfo::hasOneNonDBGUse(Register RegNo) const { use_nodbg_iterator UI = use_nodbg_begin(RegNo); if (UI == use_nodbg_end()) return false; return ++UI == use_nodbg_end(); } -bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const { +bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const { use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo); if (UI == use_instr_nodbg_end()) return false; @@ -434,34 +434,34 @@ bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const { /// clear the kill flag from the MachineOperand. This function is used by /// optimization passes which extend register lifetimes and need only /// preserve conservative kill flag information. -void MachineRegisterInfo::clearKillFlags(unsigned Reg) const { +void MachineRegisterInfo::clearKillFlags(Register Reg) const { for (MachineOperand &MO : use_operands(Reg)) MO.setIsKill(false); } -bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { +bool MachineRegisterInfo::isLiveIn(Register Reg) const { for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) - if (I->first == Reg || I->second == Reg) + if ((Register)I->first == Reg || I->second == Reg) return true; return false; } /// getLiveInPhysReg - If VReg is a live-in virtual register, return the /// corresponding live-in physical register. -unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { +MCRegister MachineRegisterInfo::getLiveInPhysReg(Register VReg) const { for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) if (I->second == VReg) return I->first; - return 0; + return MCRegister(); } /// getLiveInVirtReg - If PReg is a live-in physical register, return the /// corresponding live-in physical register. -unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const { +Register MachineRegisterInfo::getLiveInVirtReg(MCRegister PReg) const { for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) if (I->first == PReg) return I->second; - return 0; + return Register(); } /// EmitLiveInCopies - Emit copies to initialize livein virtual registers @@ -496,7 +496,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, } } -LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const { +LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(Register Reg) const { // Lane masks are only defined for vregs. assert(Register::isVirtualRegister(Reg)); const TargetRegisterClass &TRC = *getRegClass(Reg); @@ -504,7 +504,7 @@ LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(unsigned Reg) const { +LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(Register Reg) const { for (MachineInstr &I : use_instructions(Reg)) I.dump(); } @@ -516,7 +516,7 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { "Invalid ReservedRegs vector from target"); } -bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const { +bool MachineRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { assert(Register::isPhysicalRegister(PhysReg)); const TargetRegisterInfo *TRI = getTargetRegisterInfo(); @@ -533,7 +533,7 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const { } bool -MachineRegisterInfo::isCallerPreservedOrConstPhysReg(unsigned PhysReg) const { +MachineRegisterInfo::isCallerPreservedOrConstPhysReg(MCRegister PhysReg) const { const TargetRegisterInfo *TRI = getTargetRegisterInfo(); return isConstantPhysReg(PhysReg) || TRI->isCallerPreservedPhysReg(PhysReg, *MF); @@ -542,7 +542,7 @@ MachineRegisterInfo::isCallerPreservedOrConstPhysReg(unsigned PhysReg) const { /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. -void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const { +void MachineRegisterInfo::markUsesInDebugValueAsUndef(Register Reg) const { // Mark any DBG_VALUE that uses Reg as undef (but don't delete it.) MachineRegisterInfo::use_instr_iterator nextI; for (use_instr_iterator I = use_instr_begin(Reg), E = use_instr_end(); @@ -550,7 +550,7 @@ void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const { nextI = std::next(I); // I is invalidated by the setReg MachineInstr *UseMI = &*I; if (UseMI->isDebugValue()) - UseMI->getOperand(0).setReg(0U); + UseMI->getDebugOperandForReg(Reg)->setReg(0U); } } @@ -583,7 +583,7 @@ static bool isNoReturnDef(const MachineOperand &MO) { !Called->hasFnAttribute(Attribute::NoUnwind)); } -bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg, +bool MachineRegisterInfo::isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef) const { if (UsedPhysRegMask.test(PhysReg)) return true; @@ -598,7 +598,7 @@ bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg, return false; } -bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const { +bool MachineRegisterInfo::isPhysRegUsed(MCRegister PhysReg) const { if (UsedPhysRegMask.test(PhysReg)) return true; const TargetRegisterInfo *TRI = getTargetRegisterInfo(); @@ -610,7 +610,7 @@ bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const { return false; } -void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) { +void MachineRegisterInfo::disableCalleeSavedRegister(MCRegister Reg) { const TargetRegisterInfo *TRI = getTargetRegisterInfo(); assert(Reg && (Reg < TRI->getNumRegs()) && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 258a5f9e0482..b12557d6d326 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -34,7 +34,7 @@ using namespace llvm; #define DEBUG_TYPE "machine-ssaupdater" -using AvailableValsTy = DenseMap<MachineBasicBlock *, unsigned>; +using AvailableValsTy = DenseMap<MachineBasicBlock *, Register>; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); @@ -51,7 +51,7 @@ MachineSSAUpdater::~MachineSSAUpdater() { /// Initialize - Reset this object to get ready for a new set of SSA /// updates. ProtoValue is the value used to name PHI nodes. -void MachineSSAUpdater::Initialize(unsigned V) { +void MachineSSAUpdater::Initialize(Register V) { if (!AV) AV = new AvailableValsTy(); else @@ -69,25 +69,25 @@ bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const { /// AddAvailableValue - Indicate that a rewritten value is available in the /// specified block with the specified value. -void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) { +void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, Register V) { getAvailableVals(AV)[BB] = V; } /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is /// live at the end of the specified block. -unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { +Register MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { return GetValueAtEndOfBlockInternal(BB); } static -unsigned LookForIdenticalPHI(MachineBasicBlock *BB, - SmallVectorImpl<std::pair<MachineBasicBlock *, unsigned>> &PredValues) { +Register LookForIdenticalPHI(MachineBasicBlock *BB, + SmallVectorImpl<std::pair<MachineBasicBlock *, Register>> &PredValues) { if (BB->empty()) - return 0; + return Register(); MachineBasicBlock::iterator I = BB->begin(); if (!I->isPHI()) - return 0; + return Register(); AvailableValsTy AVals; for (unsigned i = 0, e = PredValues.size(); i != e; ++i) @@ -106,7 +106,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB, return I->getOperand(0).getReg(); ++I; } - return 0; + return Register(); } /// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define @@ -140,7 +140,7 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode, /// their respective blocks. However, the use of X happens in the *middle* of /// a block. Because of this, we need to insert a new PHI node in SomeBB to /// merge the appropriate values, and this value isn't live out of the block. -unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { +Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. if (!HasValueForBlock(BB)) @@ -157,14 +157,14 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // Otherwise, we have the hard case. Get the live-in values for each // predecessor. - SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues; - unsigned SingularValue = 0; + SmallVector<std::pair<MachineBasicBlock*, Register>, 8> PredValues; + Register SingularValue; bool isFirstPred = true; for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) { MachineBasicBlock *PredBB = *PI; - unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); + Register PredVal = GetValueAtEndOfBlockInternal(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. @@ -172,15 +172,15 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { SingularValue = PredVal; isFirstPred = false; } else if (PredVal != SingularValue) - SingularValue = 0; + SingularValue = Register(); } // Otherwise, if all the merged values are the same, just use it. - if (SingularValue != 0) + if (SingularValue) return SingularValue; // If an identical PHI is already in BB, just reuse it. - unsigned DupPHI = LookForIdenticalPHI(BB, PredValues); + Register DupPHI = LookForIdenticalPHI(BB, PredValues); if (DupPHI) return DupPHI; @@ -204,7 +204,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); - return InsertedPHI->getOperand(0).getReg(); + return InsertedPHI.getReg(0); } static @@ -222,7 +222,7 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI, /// which use their value in the corresponding predecessor. void MachineSSAUpdater::RewriteUse(MachineOperand &U) { MachineInstr *UseMI = U.getParent(); - unsigned NewVR = 0; + Register NewVR; if (UseMI->isPHI()) { MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U); NewVR = GetValueAtEndOfBlockInternal(SourceBB); @@ -241,7 +241,7 @@ template<> class SSAUpdaterTraits<MachineSSAUpdater> { public: using BlkT = MachineBasicBlock; - using ValT = unsigned; + using ValT = Register; using PhiT = MachineInstr; using BlkSucc_iterator = MachineBasicBlock::succ_iterator; @@ -288,7 +288,7 @@ public: /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register. /// Add it into the specified block and return the register. - static unsigned GetUndefVal(MachineBasicBlock *BB, + static Register GetUndefVal(MachineBasicBlock *BB, MachineSSAUpdater *Updater) { // Insert an implicit_def to represent an undef value. MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, @@ -300,7 +300,7 @@ public: /// CreateEmptyPHI - Create a PHI instruction that defines a new register. /// Add it into the specified block and return the register. - static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, + static Register CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, MachineSSAUpdater *Updater) { MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, @@ -311,7 +311,7 @@ public: /// AddPHIOperand - Add the specified value as an operand of the PHI for /// the specified predecessor block. - static void AddPHIOperand(MachineInstr *PHI, unsigned Val, + static void AddPHIOperand(MachineInstr *PHI, Register Val, MachineBasicBlock *Pred) { MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred); } @@ -325,13 +325,13 @@ public: /// ValueIsPHI - Check if the instruction that defines the specified register /// is a PHI instruction. - static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) { + static MachineInstr *ValueIsPHI(Register Val, MachineSSAUpdater *Updater) { return InstrIsPHI(Updater->MRI->getVRegDef(Val)); } /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source /// operands, i.e., it was just added. - static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) { + static MachineInstr *ValueIsNewPHI(Register Val, MachineSSAUpdater *Updater) { MachineInstr *PHI = ValueIsPHI(Val, Updater); if (PHI && PHI->getNumOperands() <= 1) return PHI; @@ -340,7 +340,7 @@ public: /// GetPHIValue - For the specified PHI instruction, return the register /// that it defines. - static unsigned GetPHIValue(MachineInstr *PHI) { + static Register GetPHIValue(MachineInstr *PHI) { return PHI->getOperand(0).getReg(); } }; @@ -351,9 +351,9 @@ public: /// for the specified BB and if so, return it. If not, construct SSA form by /// first calculating the required placement of PHIs and then inserting new /// PHIs where needed. -unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ +Register MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ AvailableValsTy &AvailableVals = getAvailableVals(AV); - if (unsigned V = AvailableVals[BB]) + if (Register V = AvailableVals[BB]) return V; SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp index e42701b9c6ca..cf75d531deb2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1471,41 +1471,48 @@ namespace { class BaseMemOpClusterMutation : public ScheduleDAGMutation { struct MemOpInfo { SUnit *SU; - const MachineOperand *BaseOp; + SmallVector<const MachineOperand *, 4> BaseOps; int64_t Offset; - - MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs) - : SU(su), BaseOp(Op), Offset(ofs) {} - - bool operator<(const MemOpInfo &RHS) const { - if (BaseOp->getType() != RHS.BaseOp->getType()) - return BaseOp->getType() < RHS.BaseOp->getType(); - - if (BaseOp->isReg()) - return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < - std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, - RHS.SU->NodeNum); - if (BaseOp->isFI()) { - const MachineFunction &MF = - *BaseOp->getParent()->getParent()->getParent(); + unsigned Width; + + MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps, + int64_t Offset, unsigned Width) + : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset), + Width(Width) {} + + static bool Compare(const MachineOperand *const &A, + const MachineOperand *const &B) { + if (A->getType() != B->getType()) + return A->getType() < B->getType(); + if (A->isReg()) + return A->getReg() < B->getReg(); + if (A->isFI()) { + const MachineFunction &MF = *A->getParent()->getParent()->getParent(); const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; - // Can't use tuple comparison here since we might need to use a - // different order when the stack grows down. - if (BaseOp->getIndex() != RHS.BaseOp->getIndex()) - return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex() - : BaseOp->getIndex() < RHS.BaseOp->getIndex(); - - if (Offset != RHS.Offset) - return Offset < RHS.Offset; - - return SU->NodeNum < RHS.SU->NodeNum; + return StackGrowsDown ? A->getIndex() > B->getIndex() + : A->getIndex() < B->getIndex(); } llvm_unreachable("MemOpClusterMutation only supports register or frame " "index bases."); } + + bool operator<(const MemOpInfo &RHS) const { + // FIXME: Don't compare everything twice. Maybe use C++20 three way + // comparison instead when it's available. + if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(), + RHS.BaseOps.begin(), RHS.BaseOps.end(), + Compare)) + return true; + if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(), + BaseOps.begin(), BaseOps.end(), Compare)) + return false; + if (Offset != RHS.Offset) + return Offset < RHS.Offset; + return SU->NodeNum < RHS.SU->NodeNum; + } }; const TargetInstrInfo *TII; @@ -1560,41 +1567,78 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; for (SUnit *SU : MemOps) { - const MachineOperand *BaseOp; + const MachineInstr &MI = *SU->getInstr(); + SmallVector<const MachineOperand *, 4> BaseOps; int64_t Offset; - if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) - MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); + bool OffsetIsScalable; + unsigned Width; + if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, + OffsetIsScalable, Width, TRI)) { + MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width)); + + LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " + << Offset << ", OffsetIsScalable: " << OffsetIsScalable + << ", Width: " << Width << "\n"); + } +#ifndef NDEBUG + for (auto *Op : BaseOps) + assert(Op); +#endif } if (MemOpRecords.size() < 2) return; llvm::sort(MemOpRecords); + + // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to + // cluster mem ops collected within `MemOpRecords` array. unsigned ClusterLength = 1; + unsigned CurrentClusterBytes = MemOpRecords[0].Width; for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { - SUnit *SUa = MemOpRecords[Idx].SU; - SUnit *SUb = MemOpRecords[Idx+1].SU; + // Decision to cluster mem ops is taken based on target dependent logic + auto MemOpa = MemOpRecords[Idx]; + auto MemOpb = MemOpRecords[Idx + 1]; + ++ClusterLength; + CurrentClusterBytes += MemOpb.Width; + if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, + CurrentClusterBytes)) { + // Current mem ops pair could not be clustered, reset cluster length, and + // go to next pair + ClusterLength = 1; + CurrentClusterBytes = MemOpb.Width; + continue; + } + + SUnit *SUa = MemOpa.SU; + SUnit *SUb = MemOpb.SU; if (SUa->NodeNum > SUb->NodeNum) std::swap(SUa, SUb); - if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp, - *MemOpRecords[Idx + 1].BaseOp, - ClusterLength) && - DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { - LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" - << SUb->NodeNum << ")\n"); - // Copy successor edges from SUa to SUb. Interleaving computation - // dependent on SUa can prevent load combining due to register reuse. - // Predecessor edges do not need to be copied from SUb to SUa since nearby - // loads should have effectively the same inputs. - for (const SDep &Succ : SUa->Succs) { - if (Succ.getSUnit() == SUb) - continue; - LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum - << ")\n"); - DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); - } - ++ClusterLength; - } else + + // FIXME: Is this check really required? + if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { ClusterLength = 1; + CurrentClusterBytes = MemOpb.Width; + continue; + } + + LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" + << SUb->NodeNum << ")\n"); + + // Copy successor edges from SUa to SUb. Interleaving computation + // dependent on SUa can prevent load combining due to register reuse. + // Predecessor edges do not need to be copied from SUb to SUa since + // nearby loads should have effectively the same inputs. + for (const SDep &Succ : SUa->Succs) { + if (Succ.getSUnit() == SUb) + continue; + LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum + << ")\n"); + DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); + } + + LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength + << ", Curr cluster bytes: " << CurrentClusterBytes + << "\n"); } } @@ -1609,7 +1653,7 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { unsigned ChainPredID = DAG->SUnits.size(); for (const SDep &Pred : SU.Preds) { - if (Pred.isCtrl()) { + if (Pred.isCtrl() && !Pred.isArtificial()) { ChainPredID = Pred.getSUnit()->NodeNum; break; } @@ -2389,16 +2433,14 @@ SUnit *SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); - if (CurrMOps > 0) { - // Defer any ready instrs that now have a hazard. - for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { - if (checkHazard(*I)) { - Pending.push(*I); - I = Available.remove(I); - continue; - } - ++I; + // Defer any ready instrs that now have a hazard. + for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { + if (checkHazard(*I)) { + Pending.push(*I); + I = Available.remove(I); + continue; } + ++I; } for (unsigned i = 0; Available.empty(); ++i) { // FIXME: Re-enable assert once PR20057 is resolved. @@ -2720,6 +2762,9 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; + if (RegionPolicy.ComputeDFSResult) + DAG->computeDFSResult(); + Rem.init(DAG, SchedModel); Top.init(DAG, SchedModel, &Rem); Bot.init(DAG, SchedModel, &Rem); @@ -3684,7 +3729,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { - return G->MF.getName(); + return std::string(G->MF.getName()); } static bool renderGraphFromBottomUp() { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index 239b6fd6fd68..5f958bbc31b7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -91,7 +91,7 @@ namespace { MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree MachineLoopInfo *LI; - const MachineBlockFrequencyInfo *MBFI; + MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; @@ -279,7 +279,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // // %bb.2: // %p = PHI %y, %bb.0, %def, %bb.1 - if (llvm::all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) { + if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) { MachineInstr *UseInst = MO.getParent(); unsigned OpNo = UseInst->getOperandNo(&MO); MachineBasicBlock *UseBlock = UseInst->getParent(); @@ -347,6 +347,11 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { << printMBBReference(*Pair.first) << " -- " << printMBBReference(*NewSucc) << " -- " << printMBBReference(*Pair.second) << '\n'); + if (MBFI) { + auto NewSuccFreq = MBFI->getBlockFreq(Pair.first) * + MBPI->getEdgeProbability(Pair.first, NewSucc); + MBFI->setBlockFreq(NewSucc, NewSuccFreq.getFrequency()); + } MadeChange = true; ++NumSplit; } else @@ -427,7 +432,7 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) { MI.getDebugLoc()->getInlinedAt()); bool SeenBefore = SeenDbgVars.count(Var) != 0; - MachineOperand &MO = MI.getOperand(0); + MachineOperand &MO = MI.getDebugOperand(0); if (MO.isReg() && MO.getReg().isVirtual()) SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore)); @@ -618,14 +623,13 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, // if () {} else {} // use x // - const std::vector<MachineDomTreeNode *> &Children = - DT->getNode(MBB)->getChildren(); - for (const auto &DTChild : Children) + for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) { // DomTree children of MBB that have MBB as immediate dominator are added. if (DTChild->getIDom()->getBlock() == MI.getParent() && // Skip MBBs already added to the AllSuccs vector above. !MBB->isSuccessor(DTChild->getBlock())) AllSuccs.push_back(DTChild->getBlock()); + } // Sort Successors according to their loop depth or block frequency info. llvm::stable_sort( @@ -729,6 +733,13 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, if (SuccToSinkTo && SuccToSinkTo->isEHPad()) return nullptr; + // It ought to be okay to sink instructions into an INLINEASM_BR target, but + // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in + // the source block (which this code does not yet do). So for now, forbid + // doing so. + if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget()) + return nullptr; + return SuccToSinkTo; } @@ -760,7 +771,8 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) return false; if (!BaseOp->isReg()) @@ -790,7 +802,7 @@ static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) { // Copy DBG_VALUE operand and set the original to undef. We then check to // see whether this is something that can be copy-forwarded. If it isn't, // continue around the loop. - MachineOperand DbgMO = DbgMI.getOperand(0); + MachineOperand &DbgMO = DbgMI.getDebugOperand(0); const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; auto CopyOperands = TII.isCopyInstr(SinkInst); @@ -824,8 +836,8 @@ static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) { if (PostRA && DbgMO.getReg() != DstMO->getReg()) return false; - DbgMI.getOperand(0).setReg(SrcMO->getReg()); - DbgMI.getOperand(0).setSubReg(SrcMO->getSubReg()); + DbgMO.setReg(SrcMO->getReg()); + DbgMO.setSubReg(SrcMO->getSubReg()); return true; } @@ -860,7 +872,7 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, SuccToSinkTo.insert(InsertPos, NewDbgMI); if (!attemptDebugCopyProp(MI, *DbgMI)) - DbgMI->getOperand(0).setReg(0); + DbgMI->setDebugValueUndef(); } } @@ -994,7 +1006,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, // This DBG_VALUE would re-order assignments. If we can't copy-propagate // it, it can't be recovered. Set it undef. if (!attemptDebugCopyProp(MI, *DbgMI)) - DbgMI->getOperand(0).setReg(0); + DbgMI->setDebugValueUndef(); } else { DbgUsersToSink.push_back(DbgMI); } @@ -1043,7 +1055,7 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy( if (User.getParent() == MI.getParent()) continue; - assert(User.getOperand(0).isReg() && + assert(User.getDebugOperand(0).isReg() && "DBG_VALUE user of vreg, but non reg operand?"); DbgDefUsers.push_back(&User); } @@ -1052,8 +1064,8 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy( // Point the users of this copy that are no longer dominated, at the source // of the copy. for (auto *User : DbgDefUsers) { - User->getOperand(0).setReg(MI.getOperand(1).getReg()); - User->getOperand(0).setSubReg(MI.getOperand(1).getSubReg()); + User->getDebugOperand(0).setReg(MI.getOperand(1).getReg()); + User->getDebugOperand(0).setSubReg(MI.getOperand(1).getSubReg()); } } @@ -1299,7 +1311,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // We must sink this DBG_VALUE if its operand is sunk. To avoid searching // for DBG_VALUEs later, record them when they're encountered. if (MI->isDebugValue()) { - auto &MO = MI->getOperand(0); + auto &MO = MI->getDebugOperand(0); if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { // Bail if we can already tell the sink would be rejected, rather // than needlessly accumulating lots of DBG_VALUEs. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp index aff67f9cfd55..584d43b42004 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -23,6 +24,7 @@ extern cl::opt<bool> ForcePGSO; extern cl::opt<int> PgsoCutoffInstrProf; extern cl::opt<int> PgsoCutoffSampleProf; +namespace { namespace machine_size_opts_detail { /// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock. @@ -33,6 +35,13 @@ bool isColdBlock(const MachineBasicBlock *MBB, return Count && PSI->isColdCount(*Count); } +bool isColdBlock(BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency()); + return Count && PSI->isColdCount(*Count); +} + /// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock. static bool isHotBlockNthPercentile(int PercentileCutoff, const MachineBasicBlock *MBB, @@ -42,6 +51,30 @@ static bool isHotBlockNthPercentile(int PercentileCutoff, return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); } +static bool isHotBlockNthPercentile(int PercentileCutoff, + BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency()); + return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); +} + +static bool isColdBlockNthPercentile(int PercentileCutoff, + const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getBlockProfileCount(MBB); + return Count && PSI->isColdCountNthPercentile(PercentileCutoff, *Count); +} + +static bool isColdBlockNthPercentile(int PercentileCutoff, + BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency()); + return Count && PSI->isColdCountNthPercentile(PercentileCutoff, *Count); +} + /// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for /// MachineFunction. bool isFunctionColdInCallGraph( @@ -73,9 +106,21 @@ bool isFunctionHotInCallGraphNthPercentile( return true; return false; } + +bool isFunctionColdInCallGraphNthPercentile( + int PercentileCutoff, const MachineFunction *MF, ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo &MBFI) { + if (auto FunctionCount = MF->getFunction().getEntryCount()) + if (!PSI->isColdCountNthPercentile(PercentileCutoff, + FunctionCount.getCount())) + return false; + for (const auto &MBB : *MF) + if (!isColdBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI)) + return false; + return true; +} } // namespace machine_size_opts_detail -namespace { struct MachineBasicBlockBFIAdapter { static bool isFunctionColdInCallGraph(const MachineFunction *MF, ProfileSummaryInfo *PSI, @@ -90,11 +135,22 @@ struct MachineBasicBlockBFIAdapter { return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile( CutOff, MF, PSI, MBFI); } + static bool isFunctionColdInCallGraphNthPercentile( + int CutOff, const MachineFunction *MF, ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo &MBFI) { + return machine_size_opts_detail::isFunctionColdInCallGraphNthPercentile( + CutOff, MF, PSI, MBFI); + } static bool isColdBlock(const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI) { return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI); } + static bool isColdBlock(BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI); + } static bool isHotBlockNthPercentile(int CutOff, const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, @@ -102,6 +158,25 @@ struct MachineBasicBlockBFIAdapter { return machine_size_opts_detail::isHotBlockNthPercentile( CutOff, MBB, PSI, MBFI); } + static bool isHotBlockNthPercentile(int CutOff, + BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isHotBlockNthPercentile( + CutOff, BlockFreq, PSI, MBFI); + } + static bool isColdBlockNthPercentile(int CutOff, const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isColdBlockNthPercentile(CutOff, MBB, PSI, + MBFI); + } + static bool isColdBlockNthPercentile(int CutOff, BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isColdBlockNthPercentile(CutOff, BlockFreq, + PSI, MBFI); + } }; } // end anonymous namespace @@ -117,6 +192,19 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, PGSOQueryType QueryType) { + assert(MBB); return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>( MBB, PSI, MBFI, QueryType); } + +bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + MBFIWrapper *MBFIW, + PGSOQueryType QueryType) { + assert(MBB); + if (!PSI || !MBFIW) + return false; + BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB); + return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>( + BlockFreq, PSI, &MBFIW->getMBFI(), QueryType); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp new file mode 100644 index 000000000000..a1cb12f91275 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp @@ -0,0 +1,111 @@ +//===- MachineStripDebug.cpp - Strip debug info ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This removes debug info from everything. It can be used to ensure +/// tests can be debugified without affecting the output MIR. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Debugify.h" + +#define DEBUG_TYPE "mir-strip-debug" + +using namespace llvm; + +namespace { +cl::opt<bool> + OnlyDebugifiedDefault("mir-strip-debugify-only", + cl::desc("Should mir-strip-debug only strip debug " + "info from debugified modules by default"), + cl::init(true)); + +struct StripDebugMachineModule : public ModulePass { + bool runOnModule(Module &M) override { + if (OnlyDebugified) { + NamedMDNode *DebugifyMD = M.getNamedMetadata("llvm.debugify"); + if (!DebugifyMD) { + LLVM_DEBUG(dbgs() << "Not stripping debug info" + " (debugify metadata not found)?\n"); + return false; + } + } + + MachineModuleInfo &MMI = + getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); + + bool Changed = false; + for (Function &F : M.functions()) { + MachineFunction *MaybeMF = MMI.getMachineFunction(F); + if (!MaybeMF) + continue; + MachineFunction &MF = *MaybeMF; + for (MachineBasicBlock &MBB : MF) { + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E;) { + if (I->isDebugInstr()) { + // FIXME: We should remove all of them. However, AArch64 emits an + // invalid `DBG_VALUE $lr` with only one operand instead of + // the usual three and has a test that depends on it's + // preservation. Preserve it for now. + if (I->getNumOperands() > 1) { + LLVM_DEBUG(dbgs() << "Removing debug instruction " << *I); + I = MBB.erase(I); + Changed |= true; + continue; + } + } + if (I->getDebugLoc()) { + LLVM_DEBUG(dbgs() << "Removing location " << *I); + I->setDebugLoc(DebugLoc()); + Changed |= true; + ++I; + continue; + } + LLVM_DEBUG(dbgs() << "Keeping " << *I); + ++I; + } + } + } + + Changed |= stripDebugifyMetadata(M); + + return Changed; + } + + StripDebugMachineModule() : StripDebugMachineModule(OnlyDebugifiedDefault) {} + StripDebugMachineModule(bool OnlyDebugified) + : ModulePass(ID), OnlyDebugified(OnlyDebugified) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addPreserved<MachineModuleInfoWrapperPass>(); + AU.setPreservesCFG(); + } + + static char ID; // Pass identification. + +protected: + bool OnlyDebugified; +}; +char StripDebugMachineModule::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(StripDebugMachineModule, DEBUG_TYPE, + "Machine Strip Debug Module", false, false) +INITIALIZE_PASS_END(StripDebugMachineModule, DEBUG_TYPE, + "Machine Strip Debug Module", false, false) + +ModulePass *llvm::createStripDebugMachineModulePass(bool OnlyDebugified) { + return new StripDebugMachineModule(OnlyDebugified); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp index 6c0402df8489..c1a2c4e0bc6e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp @@ -16,16 +16,15 @@ // Register live intervals: Registers must be defined only once, and must be // defined before use. // -// The machine code verifier is enabled from LLVMTargetMachine.cpp with the -// command-line option -verify-machineinstrs, or by defining the environment -// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive -// the verifier errors. +// The machine code verifier is enabled with the command-line option +// -verify-machineinstrs. //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallPtrSet.h" @@ -35,8 +34,8 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -157,25 +156,6 @@ namespace { BBInfo() = default; - // Add register to vregsPassed if it belongs there. Return true if - // anything changed. - bool addPassed(unsigned Reg) { - if (!Register::isVirtualRegister(Reg)) - return false; - if (regsKilled.count(Reg) || regsLiveOut.count(Reg)) - return false; - return vregsPassed.insert(Reg).second; - } - - // Same for a full set. - bool addPassed(const RegSet &RS) { - bool changed = false; - for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) - if (addPassed(*I)) - changed = true; - return changed; - } - // Add register to vregsRequired if it belongs there. Return true if // anything changed. bool addRequired(unsigned Reg) { @@ -188,20 +168,18 @@ namespace { // Same for a full set. bool addRequired(const RegSet &RS) { - bool changed = false; - for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) - if (addRequired(*I)) - changed = true; - return changed; + bool Changed = false; + for (unsigned Reg : RS) + Changed |= addRequired(Reg); + return Changed; } // Same for a full map. bool addRequired(const RegMap &RM) { - bool changed = false; - for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I) - if (addRequired(I->first)) - changed = true; - return changed; + bool Changed = false; + for (const auto &I : RM) + Changed |= addRequired(I.first); + return Changed; } // Live-out registers are either in regsLiveOut or vregsPassed. @@ -236,7 +214,6 @@ namespace { void verifyPreISelGenericInstruction(const MachineInstr *MI); void visitMachineInstrBefore(const MachineInstr *MI); void visitMachineOperand(const MachineOperand *MO, unsigned MONum); - void visitMachineInstrAfter(const MachineInstr *MI); void visitMachineBundleAfter(const MachineInstr *MI); void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); void visitMachineFunctionAfter(); @@ -376,13 +353,11 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { if (isFunctionFailedISel) return foundErrors; - isFunctionRegBankSelected = - !isFunctionFailedISel && - MF.getProperties().hasProperty( - MachineFunctionProperties::Property::RegBankSelected); - isFunctionSelected = !isFunctionFailedISel && - MF.getProperties().hasProperty( - MachineFunctionProperties::Property::Selected); + isFunctionRegBankSelected = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::RegBankSelected); + isFunctionSelected = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected); + LiveVars = nullptr; LiveInts = nullptr; LiveStks = nullptr; @@ -401,43 +376,40 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { verifyProperties(MF); visitMachineFunctionBefore(); - for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); - MFI!=MFE; ++MFI) { - visitMachineBasicBlockBefore(&*MFI); + for (const MachineBasicBlock &MBB : MF) { + visitMachineBasicBlockBefore(&MBB); // Keep track of the current bundle header. const MachineInstr *CurBundle = nullptr; // Do we expect the next instruction to be part of the same bundle? bool InBundle = false; - for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), - MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { - if (MBBI->getParent() != &*MFI) { - report("Bad instruction parent pointer", &*MFI); - errs() << "Instruction: " << *MBBI; + for (const MachineInstr &MI : MBB.instrs()) { + if (MI.getParent() != &MBB) { + report("Bad instruction parent pointer", &MBB); + errs() << "Instruction: " << MI; continue; } // Check for consistent bundle flags. - if (InBundle && !MBBI->isBundledWithPred()) + if (InBundle && !MI.isBundledWithPred()) report("Missing BundledPred flag, " "BundledSucc was set on predecessor", - &*MBBI); - if (!InBundle && MBBI->isBundledWithPred()) + &MI); + if (!InBundle && MI.isBundledWithPred()) report("BundledPred flag is set, " "but BundledSucc not set on predecessor", - &*MBBI); + &MI); // Is this a bundle header? - if (!MBBI->isInsideBundle()) { + if (!MI.isInsideBundle()) { if (CurBundle) visitMachineBundleAfter(CurBundle); - CurBundle = &*MBBI; + CurBundle = &MI; visitMachineBundleBefore(CurBundle); } else if (!CurBundle) - report("No bundle header", &*MBBI); - visitMachineInstrBefore(&*MBBI); - for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { - const MachineInstr &MI = *MBBI; + report("No bundle header", &MI); + visitMachineInstrBefore(&MI); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &Op = MI.getOperand(I); if (Op.getParent() != &MI) { // Make sure to use correct addOperand / RemoveOperand / ChangeTo @@ -448,16 +420,14 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { visitMachineOperand(&Op, I); } - visitMachineInstrAfter(&*MBBI); - // Was this the last bundled instruction? - InBundle = MBBI->isBundledWithSucc(); + InBundle = MI.isBundledWithSucc(); } if (CurBundle) visitMachineBundleAfter(CurBundle); if (InBundle) - report("BundledSucc flag set on last instruction in block", &MFI->back()); - visitMachineBasicBlockAfter(&*MFI); + report("BundledSucc flag set on last instruction in block", &MBB.back()); + visitMachineBasicBlockAfter(&MBB); } visitMachineFunctionAfter(); @@ -568,9 +538,8 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { MInfo.reachable = true; - for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), - SuE = MBB->succ_end(); SuI != SuE; ++SuI) - markReachable(*SuI); + for (const MachineBasicBlock *Succ : MBB->successors()) + markReachable(Succ); } } @@ -604,16 +573,6 @@ void MachineVerifier::visitMachineFunctionBefore() { verifyStackFrame(); } -// Does iterator point to a and b as the first two elements? -static bool matchPair(MachineBasicBlock::const_succ_iterator i, - const MachineBasicBlock *a, const MachineBasicBlock *b) { - if (*i == a) - return *++i == b; - if (*i == b) - return *++i == a; - return false; -} - void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { FirstTerminator = nullptr; @@ -633,29 +592,27 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } // Count the number of landing pad successors. - SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - if ((*I)->isEHPad()) - LandingPadSuccs.insert(*I); - if (!FunctionBlocks.count(*I)) + SmallPtrSet<const MachineBasicBlock*, 4> LandingPadSuccs; + for (const auto *succ : MBB->successors()) { + if (succ->isEHPad()) + LandingPadSuccs.insert(succ); + if (!FunctionBlocks.count(succ)) report("MBB has successor that isn't part of the function.", MBB); - if (!MBBInfoMap[*I].Preds.count(MBB)) { + if (!MBBInfoMap[succ].Preds.count(MBB)) { report("Inconsistent CFG", MBB); errs() << "MBB is not in the predecessor list of the successor " - << printMBBReference(*(*I)) << ".\n"; + << printMBBReference(*succ) << ".\n"; } } // Check the predecessor list. - for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), - E = MBB->pred_end(); I != E; ++I) { - if (!FunctionBlocks.count(*I)) + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + if (!FunctionBlocks.count(Pred)) report("MBB has predecessor that isn't part of the function.", MBB); - if (!MBBInfoMap[*I].Succs.count(MBB)) { + if (!MBBInfoMap[Pred].Succs.count(MBB)) { report("Inconsistent CFG", MBB); errs() << "MBB is not in the successor list of the predecessor " - << printMBBReference(*(*I)) << ".\n"; + << printMBBReference(*Pred) << ".\n"; } } @@ -669,32 +626,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { !isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) report("MBB has more than one landing pad successor", MBB); - // Call AnalyzeBranch. If it succeeds, there several more conditions to check. + // Call analyzeBranch. If it succeeds, there several more conditions to check. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (!TII->analyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB, Cond)) { - // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's + // Ok, analyzeBranch thinks it knows what's going on with this block. Let's // check whether its answers match up with reality. if (!TBB && !FBB) { // Block falls through to its successor. - MachineFunction::const_iterator MBBI = MBB->getIterator(); - ++MBBI; - if (MBBI == MF->end()) { - // It's possible that the block legitimately ends with a noreturn - // call or an unreachable, in which case it won't actually fall - // out the bottom of the function. - } else if (MBB->succ_size() == LandingPadSuccs.size()) { - // It's possible that the block legitimately ends with a noreturn - // call or an unreachable, in which case it won't actually fall - // out of the block. - } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) { - report("MBB exits via unconditional fall-through but doesn't have " - "exactly one CFG successor!", MBB); - } else if (!MBB->isSuccessor(&*MBBI)) { - report("MBB exits via unconditional fall-through but its successor " - "differs from its CFG successor!", MBB); - } if (!MBB->empty() && MBB->back().isBarrier() && !TII->isPredicated(MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " @@ -706,17 +646,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } } else if (TBB && !FBB && Cond.empty()) { // Block unconditionally branches somewhere. - // If the block has exactly one successor, that happens to be a - // landingpad, accept it as valid control flow. - if (MBB->succ_size() != 1+LandingPadSuccs.size() && - (MBB->succ_size() != 1 || LandingPadSuccs.size() != 1 || - *MBB->succ_begin() != *LandingPadSuccs.begin())) { - report("MBB exits via unconditional branch but doesn't have " - "exactly one CFG successor!", MBB); - } else if (!MBB->isSuccessor(TBB)) { - report("MBB exits via unconditional branch but the CFG " - "successor doesn't match the actual successor!", MBB); - } if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); @@ -729,25 +658,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } } else if (TBB && !FBB && !Cond.empty()) { // Block conditionally branches somewhere, otherwise falls through. - MachineFunction::const_iterator MBBI = MBB->getIterator(); - ++MBBI; - if (MBBI == MF->end()) { - report("MBB conditionally falls through out of function!", MBB); - } else if (MBB->succ_size() == 1) { - // A conditional branch with only one successor is weird, but allowed. - if (&*MBBI != TBB) - report("MBB exits via conditional branch/fall-through but only has " - "one CFG successor!", MBB); - else if (TBB != *MBB->succ_begin()) - report("MBB exits via conditional branch/fall-through but the CFG " - "successor don't match the actual successor!", MBB); - } else if (MBB->succ_size() != 2) { - report("MBB exits via conditional branch/fall-through but doesn't have " - "exactly two CFG successors!", MBB); - } else if (!matchPair(MBB->succ_begin(), TBB, &*MBBI)) { - report("MBB exits via conditional branch/fall-through but the CFG " - "successors don't match the actual successors!", MBB); - } if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); @@ -761,21 +671,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (TBB && FBB) { // Block conditionally branches somewhere, otherwise branches // somewhere else. - if (MBB->succ_size() == 1) { - // A conditional branch with only one successor is weird, but allowed. - if (FBB != TBB) - report("MBB exits via conditional branch/branch through but only has " - "one CFG successor!", MBB); - else if (TBB != *MBB->succ_begin()) - report("MBB exits via conditional branch/branch through but the CFG " - "successor don't match the actual successor!", MBB); - } else if (MBB->succ_size() != 2) { - report("MBB exits via conditional branch/branch but doesn't have " - "exactly two CFG successors!", MBB); - } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { - report("MBB exits via conditional branch/branch but the CFG " - "successors don't match the actual successors!", MBB); - } if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); @@ -791,7 +686,54 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { "condition!", MBB); } } else { - report("AnalyzeBranch returned invalid data!", MBB); + report("analyzeBranch returned invalid data!", MBB); + } + + // Now check that the successors match up with the answers reported by + // analyzeBranch. + if (TBB && !MBB->isSuccessor(TBB)) + report("MBB exits via jump or conditional branch, but its target isn't a " + "CFG successor!", + MBB); + if (FBB && !MBB->isSuccessor(FBB)) + report("MBB exits via conditional branch, but its target isn't a CFG " + "successor!", + MBB); + + // There might be a fallthrough to the next block if there's either no + // unconditional true branch, or if there's a condition, and one of the + // branches is missing. + bool Fallthrough = !TBB || (!Cond.empty() && !FBB); + + // A conditional fallthrough must be an actual CFG successor, not + // unreachable. (Conversely, an unconditional fallthrough might not really + // be a successor, because the block might end in unreachable.) + if (!Cond.empty() && !FBB) { + MachineFunction::const_iterator MBBI = std::next(MBB->getIterator()); + if (MBBI == MF->end()) { + report("MBB conditionally falls through out of function!", MBB); + } else if (!MBB->isSuccessor(&*MBBI)) + report("MBB exits via conditional branch/fall-through but the CFG " + "successors don't match the actual successors!", + MBB); + } + + // Verify that there aren't any extra un-accounted-for successors. + for (const MachineBasicBlock *SuccMBB : MBB->successors()) { + // If this successor is one of the branch targets, it's okay. + if (SuccMBB == TBB || SuccMBB == FBB) + continue; + // If we might have a fallthrough, and the successor is the fallthrough + // block, that's also ok. + if (Fallthrough && SuccMBB == MBB->getNextNode()) + continue; + // Also accept successors which are for exception-handling or might be + // inlineasm_br targets. + if (SuccMBB->isEHPad() || SuccMBB->isInlineAsmBrIndirectTarget()) + continue; + report("MBB has unexpected successors which are not branch targets, " + "fallthrough, EHPads, or inlineasm_br targets.", + MBB); } } @@ -839,7 +781,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { if (MI->isTerminator() && !TII->isPredicated(*MI)) { if (!FirstTerminator) FirstTerminator = MI; - } else if (FirstTerminator && !MI->isDebugEntryValue()) { + } else if (FirstTerminator) { report("Non-terminator instruction after the first terminator", MI); errs() << "First terminator was:\t" << *FirstTerminator; } @@ -920,6 +862,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MI->getNumOperands(); + // Branches must reference a basic block if they are not indirect + if (MI->isBranch() && !MI->isIndirectBranch()) { + bool HasMBB = false; + for (const MachineOperand &Op : MI->operands()) { + if (Op.isMBB()) { + HasMBB = true; + break; + } + } + + if (!HasMBB) { + report("Branch instruction is missing a basic block operand or " + "isIndirectBranch property", + MI); + } + } + // Check types. SmallVector<LLT, 4> Types; for (unsigned I = 0, E = std::min(MCID.getNumOperands(), NumOps); @@ -972,9 +931,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: { - if (MI->getNumOperands() < MCID.getNumOperands()) - break; - LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); if (DstTy.isVector()) report("Instruction cannot use a vector result type", MI); @@ -1062,6 +1018,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (SrcTy.getSizeInBits() != DstTy.getSizeInBits()) report("bitcast sizes must match", MI); + + if (SrcTy == DstTy) + report("bitcast must change the type", MI); + break; } case TargetOpcode::G_INTTOPTR: @@ -1115,6 +1075,22 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { // TODO: Is the offset allowed to be a scalar with a vector? break; } + case TargetOpcode::G_PTRMASK: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + LLT MaskTy = MRI->getType(MI->getOperand(2).getReg()); + if (!DstTy.isValid() || !SrcTy.isValid() || !MaskTy.isValid()) + break; + + if (!DstTy.getScalarType().isPointer()) + report("ptrmask result type must be a pointer", MI); + + if (!MaskTy.getScalarType().isScalar()) + report("ptrmask mask type must be an integer", MI); + + verifyVectorElementMatch(DstTy, MaskTy, MI); + break; + } case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: @@ -1485,13 +1461,18 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (MI->isInlineAsm()) verifyInlineAsm(MI); + // A fully-formed DBG_VALUE must have a location. Ignore partially formed + // DBG_VALUEs: these are convenient to use in tests, but should never get + // generated. + if (MI->isDebugValue() && MI->getNumOperands() == 4) + if (!MI->getDebugLoc()) + report("Missing DebugLoc for debug instruction", MI); + // Check the MachineMemOperands for basic consistency. - for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); - I != E; ++I) { - if ((*I)->isLoad() && !MI->mayLoad()) + for (MachineMemOperand *Op : MI->memoperands()) { + if (Op->isLoad() && !MI->mayLoad()) report("Missing mayLoad flag", MI); - if ((*I)->isStore() && !MI->mayStore()) + if (Op->isStore() && !MI->mayStore()) report("Missing mayStore flag", MI); } @@ -1552,26 +1533,27 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } break; } - case TargetOpcode::STATEPOINT: - if (!MI->getOperand(StatepointOpers::IDPos).isImm() || - !MI->getOperand(StatepointOpers::NBytesPos).isImm() || - !MI->getOperand(StatepointOpers::NCallArgsPos).isImm()) + case TargetOpcode::STATEPOINT: { + StatepointOpers SO(MI); + if (!MI->getOperand(SO.getIDPos()).isImm() || + !MI->getOperand(SO.getNBytesPos()).isImm() || + !MI->getOperand(SO.getNCallArgsPos()).isImm()) { report("meta operands to STATEPOINT not constant!", MI); - break; + break; + } auto VerifyStackMapConstant = [&](unsigned Offset) { - if (!MI->getOperand(Offset).isImm() || - MI->getOperand(Offset).getImm() != StackMaps::ConstantOp || - !MI->getOperand(Offset + 1).isImm()) + if (!MI->getOperand(Offset - 1).isImm() || + MI->getOperand(Offset - 1).getImm() != StackMaps::ConstantOp || + !MI->getOperand(Offset).isImm()) report("stack map constant to STATEPOINT not well formed!", MI); }; - const unsigned VarStart = StatepointOpers(MI).getVarIdx(); - VerifyStackMapConstant(VarStart + StatepointOpers::CCOffset); - VerifyStackMapConstant(VarStart + StatepointOpers::FlagsOffset); - VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset); + VerifyStackMapConstant(SO.getCCIdx()); + VerifyStackMapConstant(SO.getFlagsIdx()); + VerifyStackMapConstant(SO.getNumDeoptArgsIdx()); // TODO: verify we have properly encoded deopt arguments - break; + } break; } } @@ -1599,7 +1581,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1; if (!IsOptional) { if (MO->isReg()) { - if (MO->isDef() && !MCOI.isOptionalDef()) + if (MO->isDef() && !MCOI.isOptionalDef() && !MCID.variadicOpsAreDefs()) report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); @@ -1668,10 +1650,17 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } - // Verify two-address constraints after leaving SSA form. + // Verify two-address constraints after the twoaddressinstruction pass. + // Both twoaddressinstruction pass and phi-node-elimination pass call + // MRI->leaveSSA() to set MF as NoSSA, we should do the verification after + // twoaddressinstruction pass not after phi-node-elimination pass. So we + // shouldn't use the NoSSA as the condition, we should based on + // TiedOpsRewritten property to verify two-address constraints, this + // property will be set in twoaddressinstruction pass. unsigned DefIdx; - if (!MRI->isSSA() && MO->isUse() && - MI->isRegTiedToDefOperand(MONum, &DefIdx) && + if (MF->getProperties().hasProperty( + MachineFunctionProperties::Property::TiedOpsRewritten) && + MO->isUse() && MI->isRegTiedToDefOperand(MONum, &DefIdx) && Reg != MI->getOperand(DefIdx).getReg()) report("Two-address instruction operands must be identical", MO, MONum); @@ -1709,6 +1698,15 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (!RC) { // This is a generic virtual register. + // Do not allow undef uses for generic virtual registers. This ensures + // getVRegDef can never fail and return null on a generic register. + // + // FIXME: This restriction should probably be broadened to all SSA + // MIR. However, DetectDeadLanes/ProcessImplicitDefs technically still + // run on the SSA function just before phi elimination. + if (MO->isUndef()) + report("Generic virtual register use cannot be undef", MO, MONum); + // If we're post-Select, we can't have gvregs anymore. if (isFunctionSelected) { report("Generic virtual register invalid in a Selected function", @@ -2088,8 +2086,6 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } } -void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {} - // This function gets called after visiting all instructions in a bundle. The // argument points to the bundle header. // Normal stand-alone instructions are also considered 'bundles', and this @@ -2101,10 +2097,10 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { // Kill any masked registers. while (!regMasks.empty()) { const uint32_t *Mask = regMasks.pop_back_val(); - for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I) - if (Register::isPhysicalRegister(*I) && - MachineOperand::clobbersPhysReg(Mask, *I)) - regsDead.push_back(*I); + for (unsigned Reg : regsLive) + if (Register::isPhysicalRegister(Reg) && + MachineOperand::clobbersPhysReg(Mask, Reg)) + regsDead.push_back(Reg); } set_subtract(regsLive, regsDead); regsDead.clear(); set_union(regsLive, regsDefined); regsDefined.clear(); @@ -2126,40 +2122,171 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { } } +namespace { +// This implements a set of registers that serves as a filter: can filter other +// sets by passing through elements not in the filter and blocking those that +// are. Any filter implicitly includes the full set of physical registers upon +// creation, thus filtering them all out. The filter itself as a set only grows, +// and needs to be as efficient as possible. +struct VRegFilter { + // Add elements to the filter itself. \pre Input set \p FromRegSet must have + // no duplicates. Both virtual and physical registers are fine. + template <typename RegSetT> void add(const RegSetT &FromRegSet) { + SmallVector<unsigned, 0> VRegsBuffer; + filterAndAdd(FromRegSet, VRegsBuffer); + } + // Filter \p FromRegSet through the filter and append passed elements into \p + // ToVRegs. All elements appended are then added to the filter itself. + // \returns true if anything changed. + template <typename RegSetT> + bool filterAndAdd(const RegSetT &FromRegSet, + SmallVectorImpl<unsigned> &ToVRegs) { + unsigned SparseUniverse = Sparse.size(); + unsigned NewSparseUniverse = SparseUniverse; + unsigned NewDenseSize = Dense.size(); + size_t Begin = ToVRegs.size(); + for (unsigned Reg : FromRegSet) { + if (!Register::isVirtualRegister(Reg)) + continue; + unsigned Index = Register::virtReg2Index(Reg); + if (Index < SparseUniverseMax) { + if (Index < SparseUniverse && Sparse.test(Index)) + continue; + NewSparseUniverse = std::max(NewSparseUniverse, Index + 1); + } else { + if (Dense.count(Reg)) + continue; + ++NewDenseSize; + } + ToVRegs.push_back(Reg); + } + size_t End = ToVRegs.size(); + if (Begin == End) + return false; + // Reserving space in sets once performs better than doing so continuously + // and pays easily for double look-ups (even in Dense with SparseUniverseMax + // tuned all the way down) and double iteration (the second one is over a + // SmallVector, which is a lot cheaper compared to DenseSet or BitVector). + Sparse.resize(NewSparseUniverse); + Dense.reserve(NewDenseSize); + for (unsigned I = Begin; I < End; ++I) { + unsigned Reg = ToVRegs[I]; + unsigned Index = Register::virtReg2Index(Reg); + if (Index < SparseUniverseMax) + Sparse.set(Index); + else + Dense.insert(Reg); + } + return true; + } + +private: + static constexpr unsigned SparseUniverseMax = 10 * 1024 * 8; + // VRegs indexed within SparseUniverseMax are tracked by Sparse, those beyound + // are tracked by Dense. The only purpose of the threashold and the Dense set + // is to have a reasonably growing memory usage in pathological cases (large + // number of very sparse VRegFilter instances live at the same time). In + // practice even in the worst-by-execution time cases having all elements + // tracked by Sparse (very large SparseUniverseMax scenario) tends to be more + // space efficient than if tracked by Dense. The threashold is set to keep the + // worst-case memory usage within 2x of figures determined empirically for + // "all Dense" scenario in such worst-by-execution-time cases. + BitVector Sparse; + DenseSet<unsigned> Dense; +}; + +// Implements both a transfer function and a (binary, in-place) join operator +// for a dataflow over register sets with set union join and filtering transfer +// (out_b = in_b \ filter_b). filter_b is expected to be set-up ahead of time. +// Maintains out_b as its state, allowing for O(n) iteration over it at any +// time, where n is the size of the set (as opposed to O(U) where U is the +// universe). filter_b implicitly contains all physical registers at all times. +class FilteringVRegSet { + VRegFilter Filter; + SmallVector<unsigned, 0> VRegs; + +public: + // Set-up the filter_b. \pre Input register set \p RS must have no duplicates. + // Both virtual and physical registers are fine. + template <typename RegSetT> void addToFilter(const RegSetT &RS) { + Filter.add(RS); + } + // Passes \p RS through the filter_b (transfer function) and adds what's left + // to itself (out_b). + template <typename RegSetT> bool add(const RegSetT &RS) { + // Double-duty the Filter: to maintain VRegs a set (and the join operation + // a set union) just add everything being added here to the Filter as well. + return Filter.filterAndAdd(RS, VRegs); + } + using const_iterator = decltype(VRegs)::const_iterator; + const_iterator begin() const { return VRegs.begin(); } + const_iterator end() const { return VRegs.end(); } + size_t size() const { return VRegs.size(); } +}; +} // namespace + // Calculate the largest possible vregsPassed sets. These are the registers that // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. void MachineVerifier::calcRegsPassed() { + // This is a forward dataflow, doing it in RPO. A standard map serves as a + // priority (sorting by RPO number) queue, deduplicating worklist, and an RPO + // number to MBB mapping all at once. + std::map<unsigned, const MachineBasicBlock *> RPOWorklist; + DenseMap<const MachineBasicBlock *, unsigned> RPONumbers; + if (MF->empty()) { + // ReversePostOrderTraversal doesn't handle empty functions. + return; + } + std::vector<FilteringVRegSet> VRegsPassedSets(MF->size()); + for (const MachineBasicBlock *MBB : + ReversePostOrderTraversal<const MachineFunction *>(MF)) { + // Careful with the evaluation order, fetch next number before allocating. + unsigned Number = RPONumbers.size(); + RPONumbers[MBB] = Number; + // Set-up the transfer functions for all blocks. + const BBInfo &MInfo = MBBInfoMap[MBB]; + VRegsPassedSets[Number].addToFilter(MInfo.regsKilled); + VRegsPassedSets[Number].addToFilter(MInfo.regsLiveOut); + } // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. - SmallPtrSet<const MachineBasicBlock*, 8> todo; - for (const auto &MBB : *MF) { - BBInfo &MInfo = MBBInfoMap[&MBB]; + for (const MachineBasicBlock &MBB : *MF) { + const BBInfo &MInfo = MBBInfoMap[&MBB]; if (!MInfo.reachable) continue; - for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(), - SuE = MBB.succ_end(); SuI != SuE; ++SuI) { - BBInfo &SInfo = MBBInfoMap[*SuI]; - if (SInfo.addPassed(MInfo.regsLiveOut)) - todo.insert(*SuI); - } - } - - // Iteratively push vregsPassed to successors. This will converge to the same - // final state regardless of DenseSet iteration order. - while (!todo.empty()) { - const MachineBasicBlock *MBB = *todo.begin(); - todo.erase(MBB); - BBInfo &MInfo = MBBInfoMap[MBB]; - for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), - SuE = MBB->succ_end(); SuI != SuE; ++SuI) { - if (*SuI == MBB) + for (const MachineBasicBlock *Succ : MBB.successors()) { + unsigned SuccNumber = RPONumbers[Succ]; + FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber]; + if (SuccSet.add(MInfo.regsLiveOut)) + RPOWorklist.emplace(SuccNumber, Succ); + } + } + + // Iteratively push vregsPassed to successors. + while (!RPOWorklist.empty()) { + auto Next = RPOWorklist.begin(); + const MachineBasicBlock *MBB = Next->second; + RPOWorklist.erase(Next); + FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[MBB]]; + for (const MachineBasicBlock *Succ : MBB->successors()) { + if (Succ == MBB) continue; - BBInfo &SInfo = MBBInfoMap[*SuI]; - if (SInfo.addPassed(MInfo.vregsPassed)) - todo.insert(*SuI); + unsigned SuccNumber = RPONumbers[Succ]; + FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber]; + if (SuccSet.add(MSet)) + RPOWorklist.emplace(SuccNumber, Succ); } } + // Copy the results back to BBInfos. + for (const MachineBasicBlock &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; + if (!MInfo.reachable) + continue; + const FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[&MBB]]; + MInfo.vregsPassed.reserve(MSet.size()); + MInfo.vregsPassed.insert(MSet.begin(), MSet.end()); + } } // Calculate the set of virtual registers that must be passed through each basic @@ -2170,11 +2297,10 @@ void MachineVerifier::calcRegsRequired() { SmallPtrSet<const MachineBasicBlock*, 8> todo; for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; - for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), - PrE = MBB.pred_end(); PrI != PrE; ++PrI) { - BBInfo &PInfo = MBBInfoMap[*PrI]; + for (const MachineBasicBlock *Pred : MBB.predecessors()) { + BBInfo &PInfo = MBBInfoMap[Pred]; if (PInfo.addRequired(MInfo.vregsLiveIn)) - todo.insert(*PrI); + todo.insert(Pred); } } @@ -2184,13 +2310,12 @@ void MachineVerifier::calcRegsRequired() { const MachineBasicBlock *MBB = *todo.begin(); todo.erase(MBB); BBInfo &MInfo = MBBInfoMap[MBB]; - for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), - PrE = MBB->pred_end(); PrI != PrE; ++PrI) { - if (*PrI == MBB) + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + if (Pred == MBB) continue; - BBInfo &SInfo = MBBInfoMap[*PrI]; + BBInfo &SInfo = MBBInfoMap[Pred]; if (SInfo.addRequired(MInfo.vregsRequired)) - todo.insert(*PrI); + todo.insert(Pred); } } } @@ -2274,23 +2399,19 @@ void MachineVerifier::visitMachineFunctionAfter() { // Check for killed virtual registers that should be live out. for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; - for (RegSet::iterator - I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; - ++I) - if (MInfo.regsKilled.count(*I)) { + for (unsigned VReg : MInfo.vregsRequired) + if (MInfo.regsKilled.count(VReg)) { report("Virtual register killed in block, but needed live out.", &MBB); - errs() << "Virtual register " << printReg(*I) + errs() << "Virtual register " << printReg(VReg) << " is used after the block.\n"; } } if (!MF->empty()) { BBInfo &MInfo = MBBInfoMap[&MF->front()]; - for (RegSet::iterator - I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; - ++I) { + for (unsigned VReg : MInfo.vregsRequired) { report("Virtual register defs don't dominate all uses.", MF); - report_context_vreg(*I); + report_context_vreg(VReg); } } @@ -2652,9 +2773,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, VNI->def == LiveInts->getMBBStartIdx(&*MFI); // Check that VNI is live-out of all predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), - PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + for (const MachineBasicBlock *Pred : MFI->predecessors()) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred); const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); // All predecessors must have a live-out value. However for a phi @@ -2662,9 +2782,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // only one of the subregisters (not necessarily the current one) needs to // be defined. if (!PVNI && (LaneMask.none() || !IsPHI)) { - if (LiveRangeCalc::isJointlyDominated(*PI, Undefs, *Indexes)) + if (LiveRangeCalc::isJointlyDominated(Pred, Undefs, *Indexes)) continue; - report("Register not marked live out of predecessor", *PI); + report("Register not marked live out of predecessor", Pred); report_context(LR, Reg, LaneMask); report_context(*VNI); errs() << " live into " << printMBBReference(*MFI) << '@' @@ -2675,10 +2795,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI); + report("Different value live out of predecessor", Pred); report_context(LR, Reg, LaneMask); errs() << "Valno #" << PVNI->id << " live out of " - << printMBBReference(*(*PI)) << '@' << PEnd << "\nValno #" + << printMBBReference(*Pred) << '@' << PEnd << "\nValno #" << VNI->id << " live into " << printMBBReference(*MFI) << '@' << LiveInts->getMBBStartIdx(&*MFI) << '\n'; } @@ -2734,10 +2854,9 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { report_context(LI); for (unsigned comp = 0; comp != NumComp; ++comp) { errs() << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - errs() << ' ' << (*I)->id; + for (const VNInfo *I : LI.valnos) + if (comp == ConEQ.getEqClass(I)) + errs() << ' ' << I->id; errs() << '\n'; } } @@ -2824,15 +2943,14 @@ void MachineVerifier::verifyStackFrame() { // Make sure the exit state of any predecessor is consistent with the entry // state. - for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), - E = MBB->pred_end(); I != E; ++I) { - if (Reachable.count(*I) && - (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || - SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + if (Reachable.count(Pred) && + (SPState[Pred->getNumber()].ExitValue != BBState.EntryValue || + SPState[Pred->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { report("The exit stack state of a predecessor is inconsistent.", MBB); - errs() << "Predecessor " << printMBBReference(*(*I)) - << " has exit state (" << SPState[(*I)->getNumber()].ExitValue - << ", " << SPState[(*I)->getNumber()].ExitIsSetup << "), while " + errs() << "Predecessor " << printMBBReference(*Pred) + << " has exit state (" << SPState[Pred->getNumber()].ExitValue + << ", " << SPState[Pred->getNumber()].ExitIsSetup << "), while " << printMBBReference(*MBB) << " has entry state (" << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; } @@ -2840,15 +2958,14 @@ void MachineVerifier::verifyStackFrame() { // Make sure the entry state of any successor is consistent with the exit // state. - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - if (Reachable.count(*I) && - (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || - SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { + for (const MachineBasicBlock *Succ : MBB->successors()) { + if (Reachable.count(Succ) && + (SPState[Succ->getNumber()].EntryValue != BBState.ExitValue || + SPState[Succ->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { report("The entry stack state of a successor is inconsistent.", MBB); - errs() << "Successor " << printMBBReference(*(*I)) - << " has entry state (" << SPState[(*I)->getNumber()].EntryValue - << ", " << SPState[(*I)->getNumber()].EntryIsSetup << "), while " + errs() << "Successor " << printMBBReference(*Succ) + << " has entry state (" << SPState[Succ->getNumber()].EntryValue + << ", " << SPState[Succ->getNumber()].EntryIsSetup << "), while " << printMBBReference(*MBB) << " has exit state (" << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp index 163e52d9199d..d85b1b7988ce 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -8,6 +8,7 @@ #include "llvm/CodeGen/ModuloSchedule.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopUtils.h" @@ -420,7 +421,7 @@ void ModuloScheduleExpander::generateExistingPhis( unsigned NewReg = 0; unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; // In the epilog, we may need to look back one stage to get the correct - // Phi name because the epilog and prolog blocks execute the same stage. + // Phi name, because the epilog and prolog blocks execute the same stage. // The correct name is from the previous block only when the Phi has // been completely scheduled prior to the epilog, and Phi value is not // needed in multiple stages. @@ -913,7 +914,12 @@ bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + return false; + + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) return false; if (!BaseOp->isReg()) @@ -1435,11 +1441,15 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { // immediately prior to pruning. auto RC = MRI.getRegClass(Reg); Register R = MRI.createVirtualRegister(RC); - BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R) - .addReg(IllegalPhiDefault.getValue()) - .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect. - .addReg(LoopReg) - .addMBB(BB); // Block choice is arbitrary and has no effect. + MachineInstr *IllegalPhi = + BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(IllegalPhiDefault.getValue()) + .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect. + .addReg(LoopReg) + .addMBB(BB); // Block choice is arbitrary and has no effect. + // Illegal phi should belong to the producer stage so that it can be + // filtered correctly during peeling. + S.setStage(IllegalPhi, LoopProducerStage); return R; } @@ -1620,18 +1630,21 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks( MachineInstr *MI = &*I++; if (MI->isPHI()) { // This is an illegal PHI. If we move any instructions using an illegal - // PHI, we need to create a legal Phi - Register PhiR = MI->getOperand(0).getReg(); - auto RC = MRI.getRegClass(PhiR); - Register NR = MRI.createVirtualRegister(RC); - MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::PHI), NR) - .addReg(PhiR) - .addMBB(SourceBB); - BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI; - CanonicalMIs[NI] = CanonicalMIs[MI]; - Remaps[PhiR] = NR; - continue; + // PHI, we need to create a legal Phi. + if (getStage(MI) != Stage) { + // The legal Phi is not necessary if the illegal phi's stage + // is being moved. + Register PhiR = MI->getOperand(0).getReg(); + auto RC = MRI.getRegClass(PhiR); + Register NR = MRI.createVirtualRegister(RC); + MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), + DebugLoc(), TII->get(TargetOpcode::PHI), NR) + .addReg(PhiR) + .addMBB(SourceBB); + BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI; + CanonicalMIs[NI] = CanonicalMIs[MI]; + Remaps[PhiR] = NR; + } } if (getStage(MI) != Stage) continue; @@ -1649,8 +1662,8 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks( // we don't need the phi anymore. if (getStage(Def) == Stage) { Register PhiReg = MI.getOperand(0).getReg(); - MRI.replaceRegWith(MI.getOperand(0).getReg(), - Def->getOperand(0).getReg()); + assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1); + MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.getOperand(0).setReg(PhiReg); PhiToDelete.push_back(&MI); } @@ -1698,16 +1711,17 @@ PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi) { unsigned distance = PhiNodeLoopIteration[Phi]; MachineInstr *CanonicalUse = CanonicalPhi; + Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg(); for (unsigned I = 0; I < distance; ++I) { assert(CanonicalUse->isPHI()); assert(CanonicalUse->getNumOperands() == 5); unsigned LoopRegIdx = 3, InitRegIdx = 1; if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent()) std::swap(LoopRegIdx, InitRegIdx); - CanonicalUse = - MRI.getVRegDef(CanonicalUse->getOperand(LoopRegIdx).getReg()); + CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg(); + CanonicalUse = MRI.getVRegDef(CanonicalUseReg); } - return CanonicalUse->getOperand(0).getReg(); + return CanonicalUseReg; } void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { @@ -1933,7 +1947,7 @@ void PeelingModuloScheduleExpander::fixupBranches() { SmallVector<MachineOperand, 4> Cond; TII->removeBranch(*Prolog); Optional<bool> StaticallyGreater = - Info->createTripCountGreaterCondition(TC, *Prolog, Cond); + LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond); if (!StaticallyGreater.hasValue()) { LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n"); // Dynamically branch based on Cond. @@ -1961,10 +1975,10 @@ void PeelingModuloScheduleExpander::fixupBranches() { } if (!KernelDisposed) { - Info->adjustTripCount(-(Schedule.getNumStages() - 1)); - Info->setPreheader(Prologs.back()); + LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1)); + LoopInfo->setPreheader(Prologs.back()); } else { - Info->disposed(); + LoopInfo->disposed(); } } @@ -1977,8 +1991,8 @@ void PeelingModuloScheduleExpander::expand() { BB = Schedule.getLoop()->getTopBlock(); Preheader = Schedule.getLoop()->getLoopPreheader(); LLVM_DEBUG(Schedule.dump()); - Info = TII->analyzeLoopForPipelining(BB); - assert(Info); + LoopInfo = TII->analyzeLoopForPipelining(BB); + assert(LoopInfo); rewriteKernel(); peelPrologAndEpilogs(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp index 4dd4c4b1084e..311b87fa9e3b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp @@ -96,7 +96,8 @@ namespace { /// Split critical edges where necessary for good coalescer performance. bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - MachineLoopInfo *MLI); + MachineLoopInfo *MLI, + std::vector<SparseBitVector<>> *LiveInSets); // These functions are temporary abstractions around LiveVariables and // LiveIntervals, so they can go away when LiveVariables does. @@ -151,16 +152,45 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - // This pass takes the function out of SSA form. - MRI->leaveSSA(); - // Split critical edges to help the coalescer. if (!DisableEdgeSplitting && (LV || LIS)) { + // A set of live-in regs for each MBB which is used to update LV + // efficiently also with large functions. + std::vector<SparseBitVector<>> LiveInSets; + if (LV) { + LiveInSets.resize(MF.size()); + for (unsigned Index = 0, e = MRI->getNumVirtRegs(); Index != e; ++Index) { + // Set the bit for this register for each MBB where it is + // live-through or live-in (killed). + unsigned VirtReg = Register::index2VirtReg(Index); + MachineInstr *DefMI = MRI->getVRegDef(VirtReg); + if (!DefMI) + continue; + LiveVariables::VarInfo &VI = LV->getVarInfo(VirtReg); + SparseBitVector<>::iterator AliveBlockItr = VI.AliveBlocks.begin(); + SparseBitVector<>::iterator EndItr = VI.AliveBlocks.end(); + while (AliveBlockItr != EndItr) { + unsigned BlockNum = *(AliveBlockItr++); + LiveInSets[BlockNum].set(Index); + } + // The register is live into an MBB in which it is killed but not + // defined. See comment for VarInfo in LiveVariables.h. + MachineBasicBlock *DefMBB = DefMI->getParent(); + if (VI.Kills.size() > 1 || + (!VI.Kills.empty() && VI.Kills.front()->getParent() != DefMBB)) + for (auto *MI : VI.Kills) + LiveInSets[MI->getParent()->getNumber()].set(Index); + } + } + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); for (auto &MBB : MF) - Changed |= SplitPHIEdges(MF, MBB, MLI); + Changed |= SplitPHIEdges(MF, MBB, MLI, (LV ? &LiveInSets : nullptr)); } + // This pass takes the function out of SSA form. + MRI->leaveSSA(); + // Populate VRegPHIUseCount analyzePHINodes(MF); @@ -561,7 +591,8 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) { bool PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - MachineLoopInfo *MLI) { + MachineLoopInfo *MLI, + std::vector<SparseBitVector<>> *LiveInSets) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad()) return false; // Quick exit for basic blocks without PHIs. @@ -628,7 +659,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, } if (!ShouldSplit && !SplitAllCriticalEdges) continue; - if (!PreMBB->SplitCriticalEdge(&MBB, *this)) { + if (!PreMBB->SplitCriticalEdge(&MBB, *this, LiveInSets)) { LLVM_DEBUG(dbgs() << "Failed to split critical edge.\n"); continue; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp index 3a2cdaf3bd3c..bae96eb84521 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -26,8 +26,9 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, // Usually, we just want to insert the copy before the first terminator // instruction. However, for the edge going to a landing pad, we must insert - // the copy before the call/invoke instruction. - if (!SuccMBB->isEHPad()) + // the copy before the call/invoke instruction. Similarly for an INLINEASM_BR + // going to an indirect target. + if (!SuccMBB->isEHPad() && !SuccMBB->isInlineAsmBrIndirectTarget()) return MBB->getFirstTerminator(); // Discover any defs/uses in this basic block. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp index 7dbd830666fb..c19ed1f8f71d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp @@ -51,7 +51,7 @@ std::unique_ptr<Module> llvm::splitCodeGen( // Create ThreadPool in nested scope so that threads will be joined // on destruction. { - ThreadPool CodegenThreadPool(OSs.size()); + ThreadPool CodegenThreadPool(hardware_concurrency(OSs.size())); int ThreadCount = 0; SplitModule( diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp index a8466396f9b8..ca44b7a53982 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp @@ -58,14 +58,9 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { if (MF.getFunction().hasFnAttribute("patchable-function-entry")) { MachineBasicBlock &FirstMBB = *MF.begin(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - if (FirstMBB.empty()) { - BuildMI(&FirstMBB, DebugLoc(), - TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); - } else { - MachineInstr &FirstMI = *FirstMBB.begin(); - BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), - TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); - } + // The initial .loc covers PATCHABLE_FUNCTION_ENTER. + BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(), + TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp index c9c279cf0ddf..4a66863ea803 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -457,12 +457,12 @@ INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, bool PeepholeOptimizer:: optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, SmallPtrSetImpl<MachineInstr*> &LocalMIs) { - unsigned SrcReg, DstReg, SubIdx; + Register SrcReg, DstReg; + unsigned SubIdx; if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx)) return false; - if (Register::isPhysicalRegister(DstReg) || - Register::isPhysicalRegister(SrcReg)) + if (DstReg.isPhysical() || SrcReg.isPhysical()) return false; if (MRI->hasOneNonDBGUse(SrcReg)) @@ -607,15 +607,16 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. - unsigned SrcReg, SrcReg2; + Register SrcReg, SrcReg2; int CmpMask, CmpValue; if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || - Register::isPhysicalRegister(SrcReg) || - (SrcReg2 != 0 && Register::isPhysicalRegister(SrcReg2))) + SrcReg.isPhysical() || SrcReg2.isPhysical()) return false; // Attempt to optimize the comparison instruction. + LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI); if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { + LLVM_DEBUG(dbgs() << " -> Successfully optimized compare!\n"); ++NumCmps; return true; } @@ -636,6 +637,7 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI, return false; if (!TII->optimizeSelect(MI, LocalMIs)) return false; + LLVM_DEBUG(dbgs() << "Deleting select: " << MI); MI.eraseFromParent(); ++NumSelects; return true; @@ -663,8 +665,8 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, // So far we do not have any motivating example for doing that. // Thus, instead of maintaining untested code, we will revisit that if // that changes at some point. - unsigned Reg = RegSubReg.Reg; - if (Register::isPhysicalRegister(Reg)) + Register Reg = RegSubReg.Reg; + if (Reg.isPhysical()) return false; const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); @@ -1300,6 +1302,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( } // MI is now dead. + LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI); MI.eraseFromParent(); ++NumUncoalescableCopies; return true; @@ -1724,6 +1727,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { (foldRedundantCopy(*MI, CopySrcRegs, CopySrcMIs) || foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) { LocalMIs.erase(MI); + LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n"); MI->eraseFromParent(); Changed = true; continue; @@ -1776,7 +1780,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); - if (MI->isCall()) + // Update the call site info. + if (MI->shouldUpdateCallSiteInfo()) MI->getMF()->moveCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp index d68959935cec..b85f00a61eac 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -17,11 +17,9 @@ // //===----------------------------------------------------------------------===// -#include "AggressiveAntiDepBreaker.h" -#include "AntiDepBreaker.h" -#include "CriticalAntiDepBreaker.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/AntiDepBreaker.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -220,11 +218,11 @@ SchedulePostRATDList::SchedulePostRATDList( assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || MRI.tracksLiveness()) && "Live-ins must be accurate for anti-dependency breaking"); - AntiDepBreak = - ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : - ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : nullptr)); + AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) + ? createAggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) + : ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) + ? createCriticalAntiDepBreaker(MF, RCI) + : nullptr)); } SchedulePostRATDList::~SchedulePostRATDList() { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 1ff4e7cbd8fb..1be9544848ec 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -39,14 +39,14 @@ static bool lowerLoadRelative(Function &F) { for (auto I = F.use_begin(), E = F.use_end(); I != E;) { auto CI = dyn_cast<CallInst>(I->getUser()); ++I; - if (!CI || CI->getCalledValue() != &F) + if (!CI || CI->getCalledOperand() != &F) continue; IRBuilder<> B(CI); Value *OffsetPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1)); Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy); - Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, 4); + Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, Align(4)); Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 3909b5717281..a489f493d5ee 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -185,7 +185,7 @@ static void stashEntryDbgValues(MachineBasicBlock &MBB, break; if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter()) continue; - if (MI.getOperand(0).isFI()) { + if (MI.getDebugOperand(0).isFI()) { // We can only emit valid locations for frame indices after the frame // setup, so do not stash away them. FrameIndexValues.push_back(&MI); @@ -237,7 +237,7 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { stashEntryDbgValues(*SaveBlock, EntryDbgValues); // Handle CSR spilling and restoring, for targets that need it. - if (MF.getTarget().usesPhysRegsForPEI()) + if (MF.getTarget().usesPhysRegsForValues()) spillCalleeSavedRegs(MF); // Allow the target machine to make final modifications to the function @@ -259,6 +259,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { for (auto &I : EntryDbgValues) I.first->insert(I.first->begin(), I.second.begin(), I.second.end()); + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS); + // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // @@ -434,14 +438,12 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, unsigned Size = RegInfo->getSpillSize(*RC); if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. - unsigned Align = RegInfo->getSpillAlignment(*RC); - unsigned StackAlign = TFI->getStackAlignment(); - + Align Alignment(RegInfo->getSpillAlignment(*RC)); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. - Align = std::min(Align, StackAlign); - FrameIdx = MFI.CreateStackObject(Size, Align, true); + Alignment = std::min(Alignment, TFI->getStackAlign()); + FrameIdx = MFI.CreateStackObject(Size, Alignment, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { @@ -631,22 +633,21 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) { } /// AdjustStackOffset - Helper function used to adjust the stack frame offset. -static inline void -AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, - bool StackGrowsDown, int64_t &Offset, - unsigned &MaxAlign, unsigned Skew) { +static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, + bool StackGrowsDown, int64_t &Offset, + Align &MaxAlign, unsigned Skew) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) Offset += MFI.getObjectSize(FrameIdx); - unsigned Align = MFI.getObjectAlignment(FrameIdx); + Align Alignment = MFI.getObjectAlign(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); + MaxAlign = std::max(MaxAlign, Alignment); // Adjust to alignment boundary. - Offset = alignTo(Offset, Align, Skew); + Offset = alignTo(Offset, Alignment, Skew); if (StackGrowsDown) { LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset @@ -706,7 +707,7 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, /// Assign frame object to an unused portion of the stack in the fixed stack /// object range. Return true if the allocation was successful. static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, - bool StackGrowsDown, unsigned MaxAlign, + bool StackGrowsDown, Align MaxAlign, BitVector &StackBytesFree) { if (MFI.isVariableSizedObjectIndex(FrameIdx)) return false; @@ -718,7 +719,7 @@ static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, return false; } - unsigned ObjAlign = MFI.getObjectAlignment(FrameIdx); + Align ObjAlign = MFI.getObjectAlign(FrameIdx); if (ObjAlign > MaxAlign) return false; @@ -765,11 +766,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., /// those required to be close to the Stack Protector) to stack offsets. -static void -AssignProtectedObjSet(const StackObjSet &UnassignedObjs, - SmallSet<int, 16> &ProtectedObjs, - MachineFrameInfo &MFI, bool StackGrowsDown, - int64_t &Offset, unsigned &MaxAlign, unsigned Skew) { +static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo &MFI, bool StackGrowsDown, + int64_t &Offset, Align &MaxAlign, + unsigned Skew) { for (StackObjSet::const_iterator I = UnassignedObjs.begin(), E = UnassignedObjs.end(); I != E; ++I) { @@ -807,7 +808,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) if (!MFI.isDeadObjectIndex(i) && MFI.getStackID(i) == TargetStackID::Default) - assert(MFI.getObjectAlignment(i) <= MFI.getMaxAlignment() && + assert(MFI.getObjectAlign(i) <= MFI.getMaxAlign() && "MaxAlignment is invalid"); #endif @@ -846,9 +847,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // address of the object. Offset += MFI.getObjectSize(i); - unsigned Align = MFI.getObjectAlignment(i); // Adjust to alignment boundary - Offset = alignTo(Offset, Align, Skew); + Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew); LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); MFI.setObjectOffset(i, -Offset); // Set the computed offset @@ -863,9 +863,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { if (MFI.isDeadObjectIndex(i)) continue; - unsigned Align = MFI.getObjectAlignment(i); // Adjust to alignment boundary - Offset = alignTo(Offset, Align, Skew); + Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew); LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); MFI.setObjectOffset(i, Offset); @@ -876,7 +875,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // FixedCSEnd is the stack offset to the end of the fixed and callee-save // stack area. int64_t FixedCSEnd = Offset; - unsigned MaxAlign = MFI.getMaxAlignment(); + Align MaxAlign = MFI.getMaxAlign(); // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer @@ -899,10 +898,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI.getUseLocalStackAllocationBlock()) { - unsigned Align = MFI.getLocalFrameMaxAlign().value(); + Align Alignment = MFI.getLocalFrameMaxAlign(); // Adjust to alignment boundary. - Offset = alignTo(Offset, Align, Skew); + Offset = alignTo(Offset, Alignment, Skew); LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); @@ -917,7 +916,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // Allocate the local block Offset += MFI.getLocalFrameSize(); - MaxAlign = std::max(Align, MaxAlign); + MaxAlign = std::max(Alignment, MaxAlign); } // Retrieve the Exception Handler registration node. @@ -1068,12 +1067,12 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. - unsigned StackAlign; + Align StackAlign; if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || (RegInfo->needsStackRealignment(MF) && MFI.getObjectIndexEnd() != 0)) - StackAlign = TFI.getStackAlignment(); + StackAlign = TFI.getStackAlign(); else - StackAlign = TFI.getTransientStackAlignment(); + StackAlign = TFI.getTransientStackAlign(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. @@ -1206,7 +1205,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, if (MI.isDebugValue()) { assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); - unsigned Reg; + Register Reg; unsigned FrameIdx = MI.getOperand(0).getIndex(); unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx); @@ -1235,10 +1234,10 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, bool WithStackValue = true; DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); // Make the DBG_VALUE direct. - MI.getOperand(1).ChangeToRegister(0, false); + MI.getDebugOffset().ChangeToRegister(0, false); } DIExpr = DIExpression::prepend(DIExpr, PrependFlags, Offset); - MI.getOperand(3).setMetadata(DIExpr); + MI.getDebugExpressionOp().setMetadata(DIExpr); continue; } @@ -1251,7 +1250,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, assert((!MI.isDebugValue() || i == 0) && "Frame indicies can only appear as the first operand of a " "DBG_VALUE machine instruction"); - unsigned Reg; + Register Reg; MachineOperand &Offset = MI.getOperand(i + 1); int refOffset = TFI->getFrameIndexReferencePreferSP( MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 3c1f9905afd0..5bd8b4b8e27f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -20,10 +21,27 @@ char ReachingDefAnalysis::ID = 0; INITIALIZE_PASS(ReachingDefAnalysis, DEBUG_TYPE, "ReachingDefAnalysis", false, true) -void ReachingDefAnalysis::enterBasicBlock( - const LoopTraversal::TraversedMBBInfo &TraversedMBB) { +static bool isValidReg(const MachineOperand &MO) { + return MO.isReg() && MO.getReg(); +} - MachineBasicBlock *MBB = TraversedMBB.MBB; +static bool isValidRegUse(const MachineOperand &MO) { + return isValidReg(MO) && MO.isUse(); +} + +static bool isValidRegUseOf(const MachineOperand &MO, int PhysReg) { + return isValidRegUse(MO) && MO.getReg() == PhysReg; +} + +static bool isValidRegDef(const MachineOperand &MO) { + return isValidReg(MO) && MO.isDef(); +} + +static bool isValidRegDefOf(const MachineOperand &MO, int PhysReg) { + return isValidRegDef(MO) && MO.getReg() == PhysReg; +} + +void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) { unsigned MBBNumber = MBB->getNumber(); assert(MBBNumber < MBBReachingDefs.size() && "Unexpected basic block number."); @@ -44,8 +62,10 @@ void ReachingDefAnalysis::enterBasicBlock( // Treat function live-ins as if they were defined just before the first // instruction. Usually, function arguments are set up immediately // before the call. - LiveRegs[*Unit] = -1; - MBBReachingDefs[MBBNumber][*Unit].push_back(LiveRegs[*Unit]); + if (LiveRegs[*Unit] != -1) { + LiveRegs[*Unit] = -1; + MBBReachingDefs[MBBNumber][*Unit].push_back(-1); + } } } LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); @@ -62,23 +82,20 @@ void ReachingDefAnalysis::enterBasicBlock( if (Incoming.empty()) continue; - for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) { - // Use the most recent predecessor def for each register. + // Find the most recent reaching definition from a predecessor. + for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) LiveRegs[Unit] = std::max(LiveRegs[Unit], Incoming[Unit]); - if ((LiveRegs[Unit] != ReachingDefDefaultVal)) - MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]); - } } - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) - << (!TraversedMBB.IsDone ? ": incomplete\n" - : ": all preds known\n")); + // Insert the most recent reaching definition we found. + for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) + if (LiveRegs[Unit] != ReachingDefDefaultVal) + MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]); } -void ReachingDefAnalysis::leaveBasicBlock( - const LoopTraversal::TraversedMBBInfo &TraversedMBB) { +void ReachingDefAnalysis::leaveBasicBlock(MachineBasicBlock *MBB) { assert(!LiveRegs.empty() && "Must enter basic block first."); - unsigned MBBNumber = TraversedMBB.MBB->getNumber(); + unsigned MBBNumber = MBB->getNumber(); assert(MBBNumber < MBBOutRegsInfos.size() && "Unexpected basic block number."); // Save register clearances at end of MBB - used by enterBasicBlock(). @@ -89,7 +106,8 @@ void ReachingDefAnalysis::leaveBasicBlock( // only cares about the clearance from the end of the block, so adjust // everything to be relative to the end of the basic block. for (int &OutLiveReg : MBBOutRegsInfos[MBBNumber]) - OutLiveReg -= CurInstr; + if (OutLiveReg != ReachingDefDefaultVal) + OutLiveReg -= CurInstr; LiveRegs.clear(); } @@ -99,79 +117,146 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) { unsigned MBBNumber = MI->getParent()->getNumber(); assert(MBBNumber < MBBReachingDefs.size() && "Unexpected basic block number."); - const MCInstrDesc &MCID = MI->getDesc(); - for (unsigned i = 0, - e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); - i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isUse()) + + for (auto &MO : MI->operands()) { + if (!isValidRegDef(MO)) continue; for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) { // This instruction explicitly defines the current reg unit. - LLVM_DEBUG(dbgs() << printReg(MO.getReg(), TRI) << ":\t" << CurInstr + LLVM_DEBUG(dbgs() << printReg(*Unit, TRI) << ":\t" << CurInstr << '\t' << *MI); // How many instructions since this reg unit was last written? - LiveRegs[*Unit] = CurInstr; - MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr); + if (LiveRegs[*Unit] != CurInstr) { + LiveRegs[*Unit] = CurInstr; + MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr); + } } } InstIds[MI] = CurInstr; ++CurInstr; } +void ReachingDefAnalysis::reprocessBasicBlock(MachineBasicBlock *MBB) { + unsigned MBBNumber = MBB->getNumber(); + assert(MBBNumber < MBBReachingDefs.size() && + "Unexpected basic block number."); + + // Count number of non-debug instructions for end of block adjustment. + int NumInsts = 0; + for (const MachineInstr &MI : *MBB) + if (!MI.isDebugInstr()) + NumInsts++; + + // When reprocessing a block, the only thing we need to do is check whether + // there is now a more recent incoming reaching definition from a predecessor. + for (MachineBasicBlock *pred : MBB->predecessors()) { + assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && + "Should have pre-allocated MBBInfos for all MBBs"); + const LiveRegsDefInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; + // Incoming may be empty for dead predecessors. + if (Incoming.empty()) + continue; + + for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) { + int Def = Incoming[Unit]; + if (Def == ReachingDefDefaultVal) + continue; + + auto Start = MBBReachingDefs[MBBNumber][Unit].begin(); + if (Start != MBBReachingDefs[MBBNumber][Unit].end() && *Start < 0) { + if (*Start >= Def) + continue; + + // Update existing reaching def from predecessor to a more recent one. + *Start = Def; + } else { + // Insert new reaching def from predecessor. + MBBReachingDefs[MBBNumber][Unit].insert(Start, Def); + } + + // Update reaching def at end of of BB. Keep in mind that these are + // adjusted relative to the end of the basic block. + if (MBBOutRegsInfos[MBBNumber][Unit] < Def - NumInsts) + MBBOutRegsInfos[MBBNumber][Unit] = Def - NumInsts; + } + } +} + void ReachingDefAnalysis::processBasicBlock( const LoopTraversal::TraversedMBBInfo &TraversedMBB) { - enterBasicBlock(TraversedMBB); - for (MachineInstr &MI : *TraversedMBB.MBB) { + MachineBasicBlock *MBB = TraversedMBB.MBB; + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) + << (!TraversedMBB.IsDone ? ": incomplete\n" + : ": all preds known\n")); + + if (!TraversedMBB.PrimaryPass) { + // Reprocess MBB that is part of a loop. + reprocessBasicBlock(MBB); + return; + } + + enterBasicBlock(MBB); + for (MachineInstr &MI : *MBB) { if (!MI.isDebugInstr()) processDefs(&MI); } - leaveBasicBlock(TraversedMBB); + leaveBasicBlock(MBB); } bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TRI = MF->getSubtarget().getRegisterInfo(); + LLVM_DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n"); + init(); + traverse(); + return false; +} +void ReachingDefAnalysis::releaseMemory() { + // Clear the internal vectors. + MBBOutRegsInfos.clear(); + MBBReachingDefs.clear(); + InstIds.clear(); LiveRegs.clear(); - NumRegUnits = TRI->getNumRegUnits(); - - MBBReachingDefs.resize(mf.getNumBlockIDs()); +} - LLVM_DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n"); +void ReachingDefAnalysis::reset() { + releaseMemory(); + init(); + traverse(); +} +void ReachingDefAnalysis::init() { + NumRegUnits = TRI->getNumRegUnits(); + MBBReachingDefs.resize(MF->getNumBlockIDs()); // Initialize the MBBOutRegsInfos - MBBOutRegsInfos.resize(mf.getNumBlockIDs()); + MBBOutRegsInfos.resize(MF->getNumBlockIDs()); + LoopTraversal Traversal; + TraversedMBBOrder = Traversal.traverse(*MF); +} +void ReachingDefAnalysis::traverse() { // Traverse the basic blocks. - LoopTraversal Traversal; - LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf); - for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) { + for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) processBasicBlock(TraversedMBB); - } - - // Sorting all reaching defs found for a ceartin reg unit in a given BB. +#ifndef NDEBUG + // Make sure reaching defs are sorted and unique. for (MBBDefsInfo &MBBDefs : MBBReachingDefs) { - for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) - llvm::sort(RegUnitDefs); + for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) { + int LastDef = ReachingDefDefaultVal; + for (int Def : RegUnitDefs) { + assert(Def > LastDef && "Defs must be sorted and unique"); + LastDef = Def; + } + } } - - return false; -} - -void ReachingDefAnalysis::releaseMemory() { - // Clear the internal vectors. - MBBOutRegsInfos.clear(); - MBBReachingDefs.clear(); - InstIds.clear(); +#endif } -int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { +int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const { assert(InstIds.count(MI) && "Unexpected machine instuction."); - int InstId = InstIds[MI]; + int InstId = InstIds.lookup(MI); int DefRes = ReachingDefDefaultVal; unsigned MBBNumber = MI->getParent()->getNumber(); assert(MBBNumber < MBBReachingDefs.size() && @@ -188,12 +273,13 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { return LatestDef; } -MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) { +MachineInstr* ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI, + int PhysReg) const { return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)); } bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B, - int PhysReg) { + int PhysReg) const { MachineBasicBlock *ParentA = A->getParent(); MachineBasicBlock *ParentB = B->getParent(); if (ParentA != ParentB) @@ -203,7 +289,7 @@ bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B, } MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, - int InstId) { + int InstId) const { assert(static_cast<size_t>(MBB->getNumber()) < MBBReachingDefs.size() && "Unexpected basic block number."); assert(InstId < static_cast<int>(MBB->size()) && @@ -213,45 +299,156 @@ MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, return nullptr; for (auto &MI : *MBB) { - if (InstIds.count(&MI) && InstIds[&MI] == InstId) + auto F = InstIds.find(&MI); + if (F != InstIds.end() && F->second == InstId) return &MI; } + return nullptr; } -int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) { +int +ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) const { assert(InstIds.count(MI) && "Unexpected machine instuction."); - return InstIds[MI] - getReachingDef(MI, PhysReg); + return InstIds.lookup(MI) - getReachingDef(MI, PhysReg); +} + +bool +ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI, int PhysReg) const { + return getReachingDef(MI, PhysReg) >= 0; } void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg, - SmallVectorImpl<MachineInstr*> &Uses) { + InstSet &Uses) const { MachineBasicBlock *MBB = Def->getParent(); MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def); while (++MI != MBB->end()) { + if (MI->isDebugInstr()) + continue; + // If/when we find a new reaching def, we know that there's no more uses // of 'Def'. - if (getReachingMIDef(&*MI, PhysReg) != Def) + if (getReachingLocalMIDef(&*MI, PhysReg) != Def) return; for (auto &MO : MI->operands()) { - if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg) + if (!isValidRegUseOf(MO, PhysReg)) continue; - Uses.push_back(&*MI); + Uses.insert(&*MI); if (MO.isKill()) return; } } } -unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) { - SmallVector<MachineInstr*, 4> Uses; - getReachingLocalUses(Def, PhysReg, Uses); - return Uses.size(); +bool +ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg, + InstSet &Uses) const { + for (auto &MI : *MBB) { + if (MI.isDebugInstr()) + continue; + for (auto &MO : MI.operands()) { + if (!isValidRegUseOf(MO, PhysReg)) + continue; + if (getReachingDef(&MI, PhysReg) >= 0) + return false; + Uses.insert(&MI); + } + } + return isReachingDefLiveOut(&MBB->back(), PhysReg); +} + +void +ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg, + InstSet &Uses) const { + MachineBasicBlock *MBB = MI->getParent(); + + // Collect the uses that each def touches within the block. + getReachingLocalUses(MI, PhysReg, Uses); + + // Handle live-out values. + if (auto *LiveOut = getLocalLiveOutMIDef(MI->getParent(), PhysReg)) { + if (LiveOut != MI) + return; + + SmallVector<MachineBasicBlock*, 4> ToVisit; + ToVisit.insert(ToVisit.begin(), MBB->successors().begin(), + MBB->successors().end()); + SmallPtrSet<MachineBasicBlock*, 4>Visited; + while (!ToVisit.empty()) { + MachineBasicBlock *MBB = ToVisit.back(); + ToVisit.pop_back(); + if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg)) + continue; + if (getLiveInUses(MBB, PhysReg, Uses)) + ToVisit.insert(ToVisit.end(), MBB->successors().begin(), + MBB->successors().end()); + Visited.insert(MBB); + } + } +} + +void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg, + InstSet &Defs) const { + SmallPtrSet<MachineBasicBlock*, 2> VisitedBBs; + getLiveOuts(MBB, PhysReg, Defs, VisitedBBs); +} + +void +ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg, + InstSet &Defs, BlockSet &VisitedBBs) const { + if (VisitedBBs.count(MBB)) + return; + + VisitedBBs.insert(MBB); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.contains(PhysReg)) + return; + + if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) + Defs.insert(Def); + else + for (auto *Pred : MBB->predecessors()) + getLiveOuts(Pred, PhysReg, Defs, VisitedBBs); +} + +MachineInstr *ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI, + int PhysReg) const { + // If there's a local def before MI, return it. + MachineInstr *LocalDef = getReachingLocalMIDef(MI, PhysReg); + if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI)) + return LocalDef; + + SmallPtrSet<MachineBasicBlock*, 4> VisitedBBs; + SmallPtrSet<MachineInstr*, 2> Incoming; + for (auto *Pred : MI->getParent()->predecessors()) + getLiveOuts(Pred, PhysReg, Incoming, VisitedBBs); + + // If we have a local def and an incoming instruction, then there's not a + // unique instruction def. + if (!Incoming.empty() && LocalDef) + return nullptr; + else if (Incoming.size() == 1) + return *Incoming.begin(); + else + return LocalDef; } -bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) { +MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI, + unsigned Idx) const { + assert(MI->getOperand(Idx).isReg() && "Expected register operand"); + return getUniqueReachingMIDef(MI, MI->getOperand(Idx).getReg()); +} + +MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI, + MachineOperand &MO) const { + assert(MO.isReg() && "Expected register operand"); + return getUniqueReachingMIDef(MI, MO.getReg()); +} + +bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); @@ -265,12 +462,25 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) { for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) { LiveRegs.stepBackward(*Last); if (LiveRegs.contains(PhysReg)) - return InstIds[&*Last] > InstIds[MI]; + return InstIds.lookup(&*Last) > InstIds.lookup(MI); } return false; } -bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) { +bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI, + int PhysReg) const { + MachineBasicBlock *MBB = MI->getParent(); + if (getReachingDef(MI, PhysReg) != getReachingDef(&MBB->back(), PhysReg)) + return true; + + if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) + return Def == getReachingLocalMIDef(MI, PhysReg); + + return false; +} + +bool +ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); @@ -284,14 +494,14 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) { // Finally check that the last instruction doesn't redefine the register. for (auto &MO : Last->operands()) - if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + if (isValidRegDefOf(MO, PhysReg)) return false; return true; } MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, - int PhysReg) { + int PhysReg) const { LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); if (!LiveRegs.contains(PhysReg)) @@ -300,33 +510,168 @@ MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, MachineInstr *Last = &MBB->back(); int Def = getReachingDef(Last, PhysReg); for (auto &MO : Last->operands()) - if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + if (isValidRegDefOf(MO, PhysReg)) return Last; return Def < 0 ? nullptr : getInstFromId(MBB, Def); } -MachineInstr *ReachingDefAnalysis::getInstWithUseBefore(MachineInstr *MI, - int PhysReg) { - auto I = MachineBasicBlock::reverse_iterator(MI); - auto E = MI->getParent()->rend(); - I++; +static bool mayHaveSideEffects(MachineInstr &MI) { + return MI.mayLoadOrStore() || MI.mayRaiseFPException() || + MI.hasUnmodeledSideEffects() || MI.isTerminator() || + MI.isCall() || MI.isBarrier() || MI.isBranch() || MI.isReturn(); +} + +// Can we safely move 'From' to just before 'To'? To satisfy this, 'From' must +// not define a register that is used by any instructions, after and including, +// 'To'. These instructions also must not redefine any of Froms operands. +template<typename Iterator> +bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From, + MachineInstr *To) const { + if (From->getParent() != To->getParent()) + return false; + + SmallSet<int, 2> Defs; + // First check that From would compute the same value if moved. + for (auto &MO : From->operands()) { + if (!isValidReg(MO)) + continue; + if (MO.isDef()) + Defs.insert(MO.getReg()); + else if (!hasSameReachingDef(From, To, MO.getReg())) + return false; + } - for ( ; I != E; I++) + // Now walk checking that the rest of the instructions will compute the same + // value and that we're not overwriting anything. Don't move the instruction + // past any memory, control-flow or other ambiguous instructions. + for (auto I = ++Iterator(From), E = Iterator(To); I != E; ++I) { + if (mayHaveSideEffects(*I)) + return false; for (auto &MO : I->operands()) - if (MO.isReg() && MO.isUse() && MO.getReg() == PhysReg) - return &*I; + if (MO.isReg() && MO.getReg() && Defs.count(MO.getReg())) + return false; + } + return true; +} - return nullptr; +bool ReachingDefAnalysis::isSafeToMoveForwards(MachineInstr *From, + MachineInstr *To) const { + return isSafeToMove<MachineBasicBlock::reverse_iterator>(From, To); +} + +bool ReachingDefAnalysis::isSafeToMoveBackwards(MachineInstr *From, + MachineInstr *To) const { + return isSafeToMove<MachineBasicBlock::iterator>(From, To); +} + +bool ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, + InstSet &ToRemove) const { + SmallPtrSet<MachineInstr*, 1> Ignore; + SmallPtrSet<MachineInstr*, 2> Visited; + return isSafeToRemove(MI, Visited, ToRemove, Ignore); +} + +bool +ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &ToRemove, + InstSet &Ignore) const { + SmallPtrSet<MachineInstr*, 2> Visited; + return isSafeToRemove(MI, Visited, ToRemove, Ignore); +} + +bool +ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited, + InstSet &ToRemove, InstSet &Ignore) const { + if (Visited.count(MI) || Ignore.count(MI)) + return true; + else if (mayHaveSideEffects(*MI)) { + // Unless told to ignore the instruction, don't remove anything which has + // side effects. + return false; + } + + Visited.insert(MI); + for (auto &MO : MI->operands()) { + if (!isValidRegDef(MO)) + continue; + + SmallPtrSet<MachineInstr*, 4> Uses; + getGlobalUses(MI, MO.getReg(), Uses); + + for (auto I : Uses) { + if (Ignore.count(I) || ToRemove.count(I)) + continue; + if (!isSafeToRemove(I, Visited, ToRemove, Ignore)) + return false; + } + } + ToRemove.insert(MI); + return true; +} + +void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI, + InstSet &Dead) const { + Dead.insert(MI); + auto IsDead = [this, &Dead](MachineInstr *Def, int PhysReg) { + unsigned LiveDefs = 0; + for (auto &MO : Def->operands()) { + if (!isValidRegDef(MO)) + continue; + if (!MO.isDead()) + ++LiveDefs; + } + + if (LiveDefs > 1) + return false; + + SmallPtrSet<MachineInstr*, 4> Uses; + getGlobalUses(Def, PhysReg, Uses); + for (auto *Use : Uses) + if (!Dead.count(Use)) + return false; + return true; + }; + + for (auto &MO : MI->operands()) { + if (!isValidRegUse(MO)) + continue; + if (MachineInstr *Def = getMIOperand(MI, MO)) + if (IsDead(Def, MO.getReg())) + collectKilledOperands(Def, Dead); + } } -void ReachingDefAnalysis::getAllInstWithUseBefore(MachineInstr *MI, - int PhysReg, SmallVectorImpl<MachineInstr*> &Uses) { - MachineInstr *Use = nullptr; - MachineInstr *Pos = MI; +bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, + int PhysReg) const { + SmallPtrSet<MachineInstr*, 1> Ignore; + return isSafeToDefRegAt(MI, PhysReg, Ignore); +} - while ((Use = getInstWithUseBefore(Pos, PhysReg))) { - Uses.push_back(Use); - Pos = Use; +bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, int PhysReg, + InstSet &Ignore) const { + // Check for any uses of the register after MI. + if (isRegUsedAfter(MI, PhysReg)) { + if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) { + SmallPtrSet<MachineInstr*, 2> Uses; + getReachingLocalUses(Def, PhysReg, Uses); + for (auto *Use : Uses) + if (!Ignore.count(Use)) + return false; + } else + return false; } + + MachineBasicBlock *MBB = MI->getParent(); + // Check for any defs after MI. + if (isRegDefinedAfter(MI, PhysReg)) { + auto I = MachineBasicBlock::iterator(MI); + for (auto E = MBB->end(); I != E; ++I) { + if (Ignore.count(&*I)) + continue; + for (auto &MO : I->operands()) + if (isValidRegDefOf(MO, PhysReg)) + return false; + } + } + return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp index 156daaa03bb5..d22826853672 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "RegAllocBase.h" -#include "Spiller.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" @@ -21,6 +20,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Spiller.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Pass.h" @@ -107,7 +107,7 @@ void RegAllocBase::allocatePhysRegs() { << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); - using VirtRegVec = SmallVector<unsigned, 4>; + using VirtRegVec = SmallVector<Register, 4>; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h index 6a7cc5ba4308..8e931eaae99a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h @@ -101,8 +101,8 @@ protected: // Each call must guarantee forward progess by returning an available PhysReg // or new set of split live virtual registers. It is up to the splitter to // converge quickly toward fully spilled live ranges. - virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<unsigned> &splitLVRs) = 0; + virtual Register selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<Register> &splitLVRs) = 0; // Use this group name for NamedRegionTimer. static const char TimerGroupName[]; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp index 46f6946f7003..5009bcc0a397 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -14,7 +14,6 @@ #include "AllocationOrder.h" #include "LiveDebugVariables.h" #include "RegAllocBase.h" -#include "Spiller.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -28,9 +27,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/Spiller.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/PassAnalysisSupport.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cstdlib> @@ -100,8 +100,8 @@ public: return LI; } - unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<unsigned> &SplitVRegs) override; + Register selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<Register> &SplitVRegs) override; /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; @@ -114,8 +114,8 @@ public: // Helper for spilling all live virtual registers currently unified under preg // that interfere with the most recently queried lvr. Return true if spilling // was successful, and append any new spilled/split intervals to splitLVRs. - bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<unsigned> &SplitVRegs); + bool spillInterferences(LiveInterval &VirtReg, Register PhysReg, + SmallVectorImpl<Register> &SplitVRegs); static char ID; }; @@ -201,8 +201,8 @@ void RABasic::releaseMemory() { // Spill or split all live virtual registers currently unified under PhysReg // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. -bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<unsigned> &SplitVRegs) { +bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg, + SmallVectorImpl<Register> &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. SmallVector<LiveInterval*, 8> Intfs; @@ -253,14 +253,14 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // |vregs| * |machineregs|. And since the number of interference tests is // minimal, there is no value in caching them outside the scope of // selectOrSplit(). -unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<unsigned> &SplitVRegs) { +Register RABasic::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<Register> &SplitVRegs) { // Populate a list of physical register spill candidates. - SmallVector<unsigned, 8> PhysRegSpillCands; + SmallVector<Register, 8> PhysRegSpillCands; // Check for an available register in this class. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); - while (unsigned PhysReg = Order.next()) { + while (Register PhysReg = Order.next()) { // Check for interference in PhysReg switch (Matrix->checkInterference(VirtReg, PhysReg)) { case LiveRegMatrix::IK_Free: @@ -279,7 +279,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, } // Try to spill another interfering reg with less spill weight. - for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(), + for (SmallVectorImpl<Register>::iterator PhysRegI = PhysRegSpillCands.begin(), PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp index 89b5bcebd61c..5396f9f3a143 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp @@ -106,13 +106,8 @@ namespace { /// that it is alive across blocks. BitVector MayLiveAcrossBlocks; - /// State of a physical register. - enum RegState { - /// A disabled register is not available for allocation, but an alias may - /// be in use. A register can only be moved out of the disabled state if - /// all aliases are disabled. - regDisabled, - + /// State of a register unit. + enum RegUnitState { /// A free register is not currently in use and can be allocated /// immediately without checking aliases. regFree, @@ -126,8 +121,8 @@ namespace { /// register. In that case, LiveVirtRegs contains the inverse mapping. }; - /// Maps each physical register to a RegState enum or a virtual register. - std::vector<unsigned> PhysRegState; + /// Maps each physical register to a RegUnitState enum or virtual register. + std::vector<unsigned> RegUnitStates; SmallVector<Register, 16> VirtDead; SmallVector<MachineInstr *, 32> Coalesced; @@ -189,6 +184,10 @@ namespace { bool isLastUseOfLocalReg(const MachineOperand &MO) const; void addKillFlag(const LiveReg &LRI); +#ifndef NDEBUG + bool verifyRegStateMapping(const LiveReg &LR) const; +#endif + void killVirtReg(LiveReg &LR); void killVirtReg(Register VirtReg); void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); @@ -196,7 +195,7 @@ namespace { void usePhysReg(MachineOperand &MO); void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, - RegState NewState); + unsigned NewState); unsigned calcSpillCost(MCPhysReg PhysReg) const; void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); @@ -229,7 +228,7 @@ namespace { bool mayLiveOut(Register VirtReg); bool mayLiveIn(Register VirtReg); - void dumpState(); + void dumpState() const; }; } // end anonymous namespace @@ -240,7 +239,8 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, false) void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { - PhysRegState[PhysReg] = NewState; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) + RegUnitStates[*UI] = NewState; } /// This allocates space for the specified virtual register to be held on the @@ -255,8 +255,8 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) { // Allocate a new stack object for this spill location... const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); unsigned Size = TRI->getSpillSize(RC); - unsigned Align = TRI->getSpillAlignment(RC); - int FrameIdx = MFI->CreateSpillStackObject(Size, Align); + Align Alignment = TRI->getSpillAlign(RC); + int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment); // Assign the slot. StackSlotForVirtReg[VirtReg] = FrameIdx; @@ -384,12 +384,23 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) { } } +#ifndef NDEBUG +bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { + for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != LR.VirtReg) + return false; + } + + return true; +} +#endif + /// Mark virtreg as no longer available. void RegAllocFast::killVirtReg(LiveReg &LR) { + assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); addKillFlag(LR); - assert(PhysRegState[LR.PhysReg] == LR.VirtReg && - "Broken RegState mapping"); - setPhysRegState(LR.PhysReg, regFree); + MCPhysReg PhysReg = LR.PhysReg; + setPhysRegState(PhysReg, regFree); LR.PhysReg = 0; } @@ -416,7 +427,9 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, /// Do the actual work of spilling. void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { - assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping"); + assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); + + MCPhysReg PhysReg = LR.PhysReg; if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the @@ -424,7 +437,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; LR.Dirty = false; - spill(MI, LR.VirtReg, LR.PhysReg, SpillKill); + spill(MI, LR.VirtReg, PhysReg, SpillKill); if (SpillKill) LR.LastUse = nullptr; // Don't kill register again @@ -460,53 +473,16 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { assert(PhysReg.isPhysical() && "Bad usePhysReg operand"); markRegUsedInInstr(PhysReg); - switch (PhysRegState[PhysReg]) { - case regDisabled: - break; - case regReserved: - PhysRegState[PhysReg] = regFree; - LLVM_FALLTHROUGH; - case regFree: - MO.setIsKill(); - return; - default: - // The physreg was allocated to a virtual register. That means the value we - // wanted has been clobbered. - llvm_unreachable("Instruction uses an allocated register"); - } - // Maybe a superregister is reserved? - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (PhysRegState[Alias]) { - case regDisabled: - break; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (RegUnitStates[*UI]) { case regReserved: - // Either PhysReg is a subregister of Alias and we mark the - // whole register as free, or PhysReg is the superregister of - // Alias and we mark all the aliases as disabled before freeing - // PhysReg. - // In the latter case, since PhysReg was disabled, this means that - // its value is defined only by physical sub-registers. This check - // is performed by the assert of the default case in this loop. - // Note: The value of the superregister may only be partial - // defined, that is why regDisabled is a valid state for aliases. - assert((TRI->isSuperRegister(PhysReg, Alias) || - TRI->isSuperRegister(Alias, PhysReg)) && - "Instruction is not using a subregister of a reserved register"); + RegUnitStates[*UI] = regFree; LLVM_FALLTHROUGH; case regFree: - if (TRI->isSuperRegister(PhysReg, Alias)) { - // Leave the superregister in the working set. - setPhysRegState(Alias, regFree); - MO.getParent()->addRegisterKilled(Alias, TRI, true); - return; - } - // Some other alias was in the working set - clear it. - setPhysRegState(Alias, regDisabled); break; default: - llvm_unreachable("Instruction uses an alias of an allocated register"); + llvm_unreachable("Unexpected reg unit state"); } } @@ -519,38 +495,20 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, - MCPhysReg PhysReg, RegState NewState) { - markRegUsedInInstr(PhysReg); - switch (Register VirtReg = PhysRegState[PhysReg]) { - case regDisabled: - break; - default: - spillVirtReg(MI, VirtReg); - LLVM_FALLTHROUGH; - case regFree: - case regReserved: - setPhysRegState(PhysReg, NewState); - return; - } - - // This is a disabled register, disable all aliases. - setPhysRegState(PhysReg, NewState); - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (Register VirtReg = PhysRegState[Alias]) { - case regDisabled: - break; + MCPhysReg PhysReg, unsigned NewState) { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (unsigned VirtReg = RegUnitStates[*UI]) { default: spillVirtReg(MI, VirtReg); - LLVM_FALLTHROUGH; + break; case regFree: case regReserved: - setPhysRegState(Alias, regDisabled); - if (TRI->isSuperRegister(PhysReg, Alias)) - return; break; } } + + markRegUsedInInstr(PhysReg); + setPhysRegState(PhysReg, NewState); } /// Return the cost of spilling clearing out PhysReg and aliases so it is free @@ -563,46 +521,24 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { << " is already used in instr.\n"); return spillImpossible; } - switch (Register VirtReg = PhysRegState[PhysReg]) { - case regDisabled: - break; - case regFree: - return 0; - case regReserved: - LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " - << printReg(PhysReg, TRI) << " is reserved already.\n"); - return spillImpossible; - default: { - LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Missing VirtReg entry"); - return LRI->Dirty ? spillDirty : spillClean; - } - } - // This is a disabled register, add up cost of aliases. - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); - unsigned Cost = 0; - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (Register VirtReg = PhysRegState[Alias]) { - case regDisabled: - break; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (unsigned VirtReg = RegUnitStates[*UI]) { case regFree: - ++Cost; break; case regReserved: + LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " + << printReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: { LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && "Missing VirtReg entry"); - Cost += LRI->Dirty ? spillDirty : spillClean; - break; + return LRI->Dirty ? spillDirty : spillClean; } } } - return Cost; + return 0; } /// This method updates local state so that we know that PhysReg is the @@ -909,9 +845,17 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, if (!Reg || !Reg.isPhysical()) continue; markRegUsedInInstr(Reg); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - if (ThroughRegs.count(PhysRegState[*AI])) - definePhysReg(MI, *AI, regFree); + + for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) { + if (!ThroughRegs.count(RegUnitStates[*UI])) + continue; + + // Need to spill any aliasing registers. + for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { + for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) { + definePhysReg(MI, *SI, regFree); + } + } } } @@ -975,37 +919,40 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, } #ifndef NDEBUG -void RegAllocFast::dumpState() { - for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { - if (PhysRegState[Reg] == regDisabled) continue; - dbgs() << " " << printReg(Reg, TRI); - switch(PhysRegState[Reg]) { + +void RegAllocFast::dumpState() const { + for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE; + ++Unit) { + switch (unsigned VirtReg = RegUnitStates[Unit]) { case regFree: break; case regReserved: - dbgs() << "*"; + dbgs() << " " << printRegUnit(Unit, TRI) << "[P]"; break; default: { - dbgs() << '=' << printReg(PhysRegState[Reg]); - LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Missing VirtReg entry"); - if (LRI->Dirty) - dbgs() << "*"; - assert(LRI->PhysReg == Reg && "Bad inverse map"); + dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg); + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry"); + if (I->Dirty) + dbgs() << "[D]"; + assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present"); break; } } } dbgs() << '\n'; // Check that LiveVirtRegs is the inverse. - for (LiveRegMap::iterator i = LiveVirtRegs.begin(), - e = LiveVirtRegs.end(); i != e; ++i) { - if (!i->PhysReg) - continue; - assert(i->VirtReg.isVirtual() && "Bad map key"); - assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value"); - assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); + for (const LiveReg &LR : LiveVirtRegs) { + Register VirtReg = LR.VirtReg; + assert(VirtReg.isVirtual() && "Bad map key"); + MCPhysReg PhysReg = LR.PhysReg; + if (PhysReg != 0) { + assert(Register::isPhysicalRegister(PhysReg) && + "mapped to physreg"); + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + assert(RegUnitStates[*UI] == VirtReg && "inverse map valid"); + } + } } } #endif @@ -1209,7 +1156,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { } void RegAllocFast::handleDebugValue(MachineInstr &MI) { - MachineOperand &MO = MI.getOperand(0); + MachineOperand &MO = MI.getDebugOperand(0); // Ignore DBG_VALUEs that aren't based on virtual registers. These are // mostly constants and frame indices. @@ -1247,7 +1194,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); - PhysRegState.assign(TRI->getNumRegs(), regDisabled); + RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); MachineBasicBlock::iterator MII = MBB.begin(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp index 27de7fe45887..41cf00261265 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -16,7 +16,6 @@ #include "LiveDebugVariables.h" #include "RegAllocBase.h" #include "SpillPlacement.h" -#include "Spiller.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" @@ -53,6 +52,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/Spiller.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -124,12 +124,6 @@ static cl::opt<bool> EnableDeferredSpilling( "variable because of other evicted variables."), cl::init(false)); -static cl::opt<unsigned> - HugeSizeForSplit("huge-size-for-split", cl::Hidden, - cl::desc("A threshold of live range size which may cause " - "high compile time cost in global splitting."), - cl::init(5000)); - // FIXME: Find a good default for this flag and remove the flag. static cl::opt<unsigned> CSRFirstTimeCost("regalloc-csr-first-time-cost", @@ -423,7 +417,7 @@ public: Spiller &spiller() override { return *SpillerInstance; } void enqueue(LiveInterval *LI) override; LiveInterval *dequeue() override; - unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; + Register selectOrSplit(LiveInterval&, SmallVectorImpl<Register>&) override; void aboutToRemoveInterval(LiveInterval &) override; /// Perform register allocation. @@ -437,7 +431,7 @@ public: static char ID; private: - unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, + Register selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &, SmallVirtRegSet &, unsigned = 0); bool LRE_CanEraseVirtReg(unsigned) override; @@ -462,31 +456,30 @@ private: bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(unsigned, SmallVectorImpl<float>&); - unsigned canReassign(LiveInterval &VirtReg, unsigned PrevReg); + Register canReassign(LiveInterval &VirtReg, Register PrevReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); - bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&, + bool canEvictInterference(LiveInterval&, Register, bool, EvictionCost&, const SmallVirtRegSet&); - bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg, + bool canEvictInterferenceInRange(LiveInterval &VirtReg, Register oPhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost); unsigned getCheapestEvicteeWeight(const AllocationOrder &Order, LiveInterval &VirtReg, SlotIndex Start, SlotIndex End, float *BestEvictWeight); - void evictInterference(LiveInterval&, unsigned, - SmallVectorImpl<unsigned>&); + void evictInterference(LiveInterval&, Register, + SmallVectorImpl<Register>&); bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters); - unsigned tryAssign(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&, + Register tryAssign(LiveInterval&, AllocationOrder&, + SmallVectorImpl<Register>&, const SmallVirtRegSet&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&, unsigned, + SmallVectorImpl<Register>&, unsigned, const SmallVirtRegSet&); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&); - unsigned isSplitBenefitWorthCost(LiveInterval &VirtReg); + SmallVectorImpl<Register>&); /// Calculate cost of region splitting. unsigned calculateRegionSplitCost(LiveInterval &VirtReg, AllocationOrder &Order, @@ -496,26 +489,26 @@ private: /// Perform region splitting. unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, - SmallVectorImpl<unsigned> &NewVRegs); + SmallVectorImpl<Register> &NewVRegs); /// Check other options before using a callee-saved register for the first /// time. unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, - unsigned PhysReg, unsigned &CostPerUseLimit, - SmallVectorImpl<unsigned> &NewVRegs); + Register PhysReg, unsigned &CostPerUseLimit, + SmallVectorImpl<Register> &NewVRegs); void initializeCSRCost(); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&); + SmallVectorImpl<Register>&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&); + SmallVectorImpl<Register>&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&); + SmallVectorImpl<Register>&); unsigned trySplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<unsigned>&, + SmallVectorImpl<Register>&, const SmallVirtRegSet&); unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, - SmallVectorImpl<unsigned> &, + SmallVectorImpl<Register> &, SmallVirtRegSet &, unsigned); - bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, + bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &, SmallVirtRegSet &, unsigned); void tryHintRecoloring(LiveInterval &); void tryHintsRecoloring(); @@ -525,12 +518,12 @@ private: /// The frequency of the copy. BlockFrequency Freq; /// The virtual register or physical register. - unsigned Reg; + Register Reg; /// Its currently assigned register. /// In case of a physical register Reg == PhysReg. - unsigned PhysReg; + MCRegister PhysReg; - HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) + HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg) : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} }; using HintsInfo = SmallVector<HintInfo, 4>; @@ -538,7 +531,7 @@ private: BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); void collectHintInfo(unsigned, HintsInfo &); - bool isUnusedCalleeSavedReg(unsigned PhysReg) const; + bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; /// Compute and report the number of spills and reloads for a loop. void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, @@ -759,12 +752,12 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { //===----------------------------------------------------------------------===// /// tryAssign - Try to assign VirtReg to an available register. -unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, +Register RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned> &NewVRegs, + SmallVectorImpl<Register> &NewVRegs, const SmallVirtRegSet &FixedRegisters) { Order.rewind(); - unsigned PhysReg; + Register PhysReg; while ((PhysReg = Order.next())) if (!Matrix->checkInterference(VirtReg, PhysReg)) break; @@ -775,7 +768,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // If we missed a simple hint, try to cheaply evict interference from the // preferred register. - if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) + if (Register Hint = MRI->getSimpleHint(VirtReg.reg)) if (Order.isHint(Hint)) { LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n'); EvictionCost MaxCost; @@ -798,7 +791,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost << '\n'); - unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); + Register CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); return CheapReg ? CheapReg : PhysReg; } @@ -806,9 +799,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// -unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { +Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); - unsigned PhysReg; + Register PhysReg; while ((PhysReg = Order.next())) { if (PhysReg == PrevReg) continue; @@ -869,7 +862,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, +bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, bool IsHint, EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) { // It is only possible to evict virtual register interference. @@ -967,7 +960,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// when returning true. /// \return True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, - unsigned PhysReg, SlotIndex Start, + Register PhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost) { EvictionCost Cost; @@ -1045,8 +1038,8 @@ unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, /// evictInterference - Evict any interferring registers that prevent VirtReg /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. -void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<unsigned> &NewVRegs) { +void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg, + SmallVectorImpl<Register> &NewVRegs) { // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. @@ -1091,9 +1084,9 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// Returns true if the given \p PhysReg is a callee saved register and has not /// been used for allocation yet. -bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { - unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); - if (CSR == 0) +bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const { + MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); + if (!CSR) return false; return !Matrix->isPhysRegUsed(PhysReg); @@ -1105,7 +1098,7 @@ bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned> &NewVRegs, + SmallVectorImpl<Register> &NewVRegs, unsigned CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) { NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, @@ -1142,7 +1135,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, } Order.rewind(); - while (unsigned PhysReg = Order.next(OrderLimit)) { + while (MCRegister PhysReg = Order.next(OrderLimit)) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1815,20 +1808,9 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, MF->verify(this, "After splitting live range around region"); } -// Global split has high compile time cost especially for large live range. -// Return false for the case here where the potential benefit will never -// worth the cost. -unsigned RAGreedy::isSplitBenefitWorthCost(LiveInterval &VirtReg) { - MachineInstr *MI = MRI->getUniqueVRegDef(VirtReg.reg); - if (MI && TII->isTriviallyReMaterializable(*MI, AA) && - VirtReg.size() > HugeSizeForSplit) - return false; - return true; -} - unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned> &NewVRegs) { - if (!isSplitBenefitWorthCost(VirtReg)) + SmallVectorImpl<Register> &NewVRegs) { + if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg)) return 0; unsigned NumCands = 0; BlockFrequency SpillCost = calcSpillCost(); @@ -1971,7 +1953,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, - SmallVectorImpl<unsigned> &NewVRegs) { + SmallVectorImpl<Register> &NewVRegs) { SmallVector<unsigned, 8> UsedCands; // Prepare split editor. LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); @@ -2017,9 +1999,9 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, /// creates a lot of local live ranges, that will be split by tryLocalSplit if /// they don't allocate. unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned> &NewVRegs) { + SmallVectorImpl<Register> &NewVRegs) { assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); - unsigned Reg = VirtReg.reg; + Register Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); SE->reset(LREdit, SplitSpillMode); @@ -2084,7 +2066,7 @@ static unsigned getNumAllocatableRegsForConstraints( /// This is similar to spilling to a larger register class. unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned> &NewVRegs) { + SmallVectorImpl<Register> &NewVRegs) { const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); // There is no point to this if there are no larger sub-classes. if (!RegClassInfo.isProperSubClass(CurRC)) @@ -2227,7 +2209,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, /// basic block. /// unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned> &NewVRegs) { + SmallVectorImpl<Register> &NewVRegs) { // TODO: the function currently only handles a single UseBlock; it should be // possible to generalize. if (SA->getUseBlocks().size() != 1) @@ -2458,7 +2440,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// assignable. /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned>&NewVRegs, + SmallVectorImpl<Register> &NewVRegs, const SmallVirtRegSet &FixedRegisters) { // Ranges must be Split2 or less. if (getStage(VirtReg) >= RS_Spill) @@ -2469,7 +2451,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); SA->analyze(&VirtReg); - unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); + Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) return PhysReg; return tryInstructionSplit(VirtReg, Order, NewVRegs); @@ -2487,7 +2469,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, if (SA->didRepairRange()) { // VirtReg has changed, so all cached queries are invalid. Matrix->invalidateVirtRegs(); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) + if (Register PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) return PhysReg; } @@ -2602,7 +2584,7 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, /// exists. unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<unsigned> &NewVRegs, + SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); @@ -2623,15 +2605,15 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, SmallLISet RecoloringCandidates; // Record the original mapping virtual register to physical register in case // the recoloring fails. - DenseMap<unsigned, unsigned> VirtRegToPhysReg; + DenseMap<Register, Register> VirtRegToPhysReg; // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in // this recoloring "session". assert(!FixedRegisters.count(VirtReg.reg)); FixedRegisters.insert(VirtReg.reg); - SmallVector<unsigned, 4> CurrentNewVRegs; + SmallVector<Register, 4> CurrentNewVRegs; Order.rewind(); - while (unsigned PhysReg = Order.next()) { + while (Register PhysReg = Order.next()) { LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " << printReg(PhysReg, TRI) << '\n'); RecoloringCandidates.clear(); @@ -2662,7 +2644,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, for (SmallLISet::iterator It = RecoloringCandidates.begin(), EndIt = RecoloringCandidates.end(); It != EndIt; ++It) { - unsigned ItVirtReg = (*It)->reg; + Register ItVirtReg = (*It)->reg; enqueue(RecoloringQueue, *It); assert(VRM->hasPhys(ItVirtReg) && "Interferences are supposed to be with allocated variables"); @@ -2685,7 +2667,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs, FixedRegisters, Depth)) { // Push the queued vregs into the main queue. - for (unsigned NewVReg : CurrentNewVRegs) + for (Register NewVReg : CurrentNewVRegs) NewVRegs.push_back(NewVReg); // Do not mess up with the global assignment process. // I.e., VirtReg must be unassigned. @@ -2704,7 +2686,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // don't add it to NewVRegs because its physical register will be restored // below. Other vregs in CurrentNewVRegs are created by calling // selectOrSplit and should be added into NewVRegs. - for (SmallVectorImpl<unsigned>::iterator Next = CurrentNewVRegs.begin(), + for (SmallVectorImpl<Register>::iterator Next = CurrentNewVRegs.begin(), End = CurrentNewVRegs.end(); Next != End; ++Next) { if (RecoloringCandidates.count(&LIS->getInterval(*Next))) @@ -2715,10 +2697,10 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, for (SmallLISet::iterator It = RecoloringCandidates.begin(), EndIt = RecoloringCandidates.end(); It != EndIt; ++It) { - unsigned ItVirtReg = (*It)->reg; + Register ItVirtReg = (*It)->reg; if (VRM->hasPhys(ItVirtReg)) Matrix->unassign(**It); - unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg]; + Register ItPhysReg = VirtRegToPhysReg[ItVirtReg]; Matrix->assign(**It, ItPhysReg); } } @@ -2736,14 +2718,14 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, /// \return true if all virtual registers in RecoloringQueue were successfully /// recolored, false otherwise. bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, - SmallVectorImpl<unsigned> &NewVRegs, + SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { while (!RecoloringQueue.empty()) { LiveInterval *LI = dequeue(RecoloringQueue); LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); - unsigned PhysReg; - PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); + Register PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, + Depth + 1); // When splitting happens, the live-range may actually be empty. // In that case, this is okay to continue the recoloring even // if we did not find an alternative color for it. Indeed, @@ -2770,12 +2752,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, // Main Entry Point //===----------------------------------------------------------------------===// -unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<unsigned> &NewVRegs) { +Register RAGreedy::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<Register> &NewVRegs) { CutOffInfo = CO_None; LLVMContext &Ctx = MF->getFunction().getContext(); SmallVirtRegSet FixedRegisters; - unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + Register Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); if (Reg == ~0U && (CutOffInfo != CO_None)) { uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); if (CutOffEncountered == CO_Depth) @@ -2802,9 +2784,9 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, /// to use the CSR; otherwise return 0. unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, - unsigned PhysReg, + Register PhysReg, unsigned &CostPerUseLimit, - SmallVectorImpl<unsigned> &NewVRegs) { + SmallVectorImpl<Register> &NewVRegs) { if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost // is lower than CSRCost. @@ -3031,8 +3013,8 @@ void RAGreedy::tryHintsRecoloring() { } } -unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, - SmallVectorImpl<unsigned> &NewVRegs, +Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, + SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { unsigned CostPerUseLimit = ~0u; @@ -3046,7 +3028,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // register. if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) { - unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, + Register CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, CostPerUseLimit, NewVRegs); if (CSRReg || !NewVRegs.empty()) // Return now if we decide to use a CSR or create new vregs due to @@ -3064,10 +3046,10 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. if (Stage != RS_Split) - if (unsigned PhysReg = + if (Register PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit, FixedRegisters)) { - unsigned Hint = MRI->getSimpleHint(VirtReg.reg); + Register Hint = MRI->getSimpleHint(VirtReg.reg); // If VirtReg has a hint and that hint is broken record this // virtual register as a recoloring candidate for broken hint. // Indeed, since we evicted a variable in its neighborhood it is @@ -3096,9 +3078,9 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, if (Stage < RS_Spill) { // Try splitting VirtReg or interferences. unsigned NewVRegSizeBefore = NewVRegs.size(); - unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); + Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) { - // If VirtReg got split, the eviction info is no longre relevant. + // If VirtReg got split, the eviction info is no longer relevant. LastEvicted.clearEvicteeInfo(VirtReg.reg); return PhysReg; } @@ -3165,7 +3147,6 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, } const MachineFrameInfo &MFI = MF->getFrameInfo(); - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); int FI; for (MachineBasicBlock *MBB : L->getBlocks()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp index 3c4a46b12f99..7590dbf1b977 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -30,7 +30,6 @@ #include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" -#include "Spiller.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" @@ -58,6 +57,7 @@ #include "llvm/CodeGen/PBQPRAConstraint.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/Spiller.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -166,7 +166,7 @@ private: void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller); /// Spill the given VReg. - void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals, + void spillVReg(Register VReg, SmallVectorImpl<Register> &NewIntervals, MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, Spiller &VRegSpiller); @@ -637,7 +637,7 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, // Check for vregs that have no allowed registers. These should be // pre-spilled and the new vregs added to the worklist. if (VRegAllowed.empty()) { - SmallVector<unsigned, 8> NewVRegs; + SmallVector<Register, 8> NewVRegs; spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end()); continue; @@ -673,8 +673,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, } } -void RegAllocPBQP::spillVReg(unsigned VReg, - SmallVectorImpl<unsigned> &NewIntervals, +void RegAllocPBQP::spillVReg(Register VReg, + SmallVectorImpl<Register> &NewIntervals, MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, Spiller &VRegSpiller) { VRegsToAlloc.erase(VReg); @@ -730,7 +730,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, } else { // Spill VReg. If this introduces new intervals we'll need another round // of allocation. - SmallVector<unsigned, 8> NewVRegs; + SmallVector<Register, 8> NewVRegs; spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); AnotherRoundNeeded |= !NewVRegs.empty(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp index 0205e6193741..0c3e8a89c920 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -26,7 +26,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/IR/Module.h" -#include "llvm/PassAnalysisSupport.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -118,8 +118,8 @@ bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) { continue; LLVM_DEBUG( dbgs() - << "Call Instruction Before Register Usage Info Propagation : \n"); - LLVM_DEBUG(dbgs() << MI << "\n"); + << "Call Instruction Before Register Usage Info Propagation : \n" + << MI << "\n"); auto UpdateRegMask = [&](const Function &F) { const ArrayRef<uint32_t> RegMask = PRUI->getRegUsageInfo(F); @@ -140,8 +140,9 @@ bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) { } LLVM_DEBUG( - dbgs() << "Call Instruction After Register Usage Info Propagation : " - << MI << '\n'); + dbgs() + << "Call Instruction After Register Usage Info Propagation : \n" + << MI << '\n'); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp index a3f75d82d0ec..17160a9f42cd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -571,7 +571,7 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { } void RegisterCoalescer::eliminateDeadDefs() { - SmallVector<unsigned, 8> NewRegs; + SmallVector<Register, 8> NewRegs; LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this).eliminateDeadDefs(DeadDefs); } @@ -675,6 +675,12 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, S.removeSegment(*SS, true); continue; } + // The subrange may have ended before FillerStart. If so, extend it. + if (!S.getVNInfoAt(FillerStart)) { + SlotIndex BBStart = + LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart)); + S.extendInBlock(BBStart, FillerStart); + } VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); @@ -1058,7 +1064,9 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, return false; MachineBasicBlock &MBB = *CopyMI.getParent(); - if (MBB.isEHPad()) + // If this block is the target of an invoke/inlineasm_br, moving the copy into + // the predecessor is tricker, and we don't handle it. + if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) return false; if (MBB.pred_size() != 2) @@ -1439,6 +1447,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx); bool UpdatedSubRanges = false; + SlotIndex DefIndex = + CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); + VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &SR : DstInt.subranges()) { if ((SR.LaneMask & DstMask).none()) { LLVM_DEBUG(dbgs() @@ -1449,6 +1460,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SR.removeValNo(RmValNo); UpdatedSubRanges = true; } + } else { + // We know that this lane is defined by this instruction, + // but at this point it may be empty because it is not used by + // anything. This happens when updateRegDefUses adds the missing + // lanes. Assign that lane a dead def so that the interferences + // are properly modeled. + if (SR.empty()) + SR.createDeadDef(DefIndex, Alloc); } } if (UpdatedSubRanges) @@ -2412,7 +2431,7 @@ public: /// Add foreign virtual registers to ShrinkRegs if their live range ended at /// the erased instrs. void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, - SmallVectorImpl<unsigned> &ShrinkRegs, + SmallVectorImpl<Register> &ShrinkRegs, LiveInterval *LI = nullptr); /// Remove liverange defs at places where implicit defs will be removed. @@ -2885,7 +2904,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { if (V.Resolution != CR_Unresolved) continue; LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@' - << LR.getValNumInfo(i)->def << '\n'); + << LR.getValNumInfo(i)->def + << ' ' << PrintLaneMask(LaneMask) << '\n'); if (SubRangeJoin) return false; @@ -3153,7 +3173,7 @@ void JoinVals::removeImplicitDefs() { } void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, - SmallVectorImpl<unsigned> &ShrinkRegs, + SmallVectorImpl<Register> &ShrinkRegs, LiveInterval *LI) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { // Get the def location before markUnused() below invalidates it. @@ -3421,7 +3441,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Erase COPY and IMPLICIT_DEF instructions. This may cause some external // registers to require trimming. - SmallVector<unsigned, 8> ShrinkRegs; + SmallVector<Register, 8> ShrinkRegs; LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS); RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); while (!ShrinkRegs.empty()) @@ -3470,7 +3490,7 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF) // vreg => DbgValueLoc map. auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) { for (auto *X : ToInsert) - DbgVRegToValues[X->getOperand(0).getReg()].push_back({Slot, X}); + DbgVRegToValues[X->getDebugOperand(0).getReg()].push_back({Slot, X}); ToInsert.clear(); }; @@ -3482,8 +3502,8 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF) SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB); for (auto &MI : MBB) { - if (MI.isDebugValue() && MI.getOperand(0).isReg() && - MI.getOperand(0).getReg().isVirtual()) { + if (MI.isDebugValue() && MI.getDebugOperand(0).isReg() && + MI.getDebugOperand(0).getReg().isVirtual()) { ToInsert.push_back(&MI); } else if (!MI.isDebugInstr()) { CurrentSlot = Slots.getInstructionIndex(MI); @@ -3582,10 +3602,10 @@ void RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg, // "Other" is live and there is a DBG_VALUE of Reg: test if we should // set it undef. if (DbgValueSetIt->first >= SegmentIt->start && - DbgValueSetIt->second->getOperand(0).getReg() != 0 && + DbgValueSetIt->second->getDebugOperand(0).getReg() != 0 && ShouldUndef(DbgValueSetIt->first)) { // Mark undef, erase record of this DBG_VALUE to avoid revisiting. - DbgValueSetIt->second->getOperand(0).setReg(0); + DbgValueSetIt->second->setDebugValueUndef(); continue; } ++DbgValueSetIt; @@ -3853,6 +3873,23 @@ void RegisterCoalescer::releaseMemory() { } bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { + LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " << fn.getName() << '\n'); + + // Variables changed between a setjmp and a longjump can have undefined value + // after the longjmp. This behaviour can be observed if such a variable is + // spilled, so longjmp won't restore the value in the spill slot. + // RegisterCoalescer should not run in functions with a setjmp to avoid + // merging such undefined variables with predictable ones. + // + // TODO: Could specifically disable coalescing registers live across setjmp + // calls + if (fn.exposesReturnsTwice()) { + LLVM_DEBUG( + dbgs() << "* Skipped as it exposes funcions that returns twice.\n"); + return false; + } + MF = &fn; MRI = &fn.getRegInfo(); const TargetSubtargetInfo &STI = fn.getSubtarget(); @@ -3871,9 +3908,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // splitting optimization. JoinSplitEdges = EnableJoinSplits; - LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " << MF->getName() << '\n'); - if (VerifyCoalescing) MF->verify(this, "Before register coalescing"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp index bf192d1c530d..ecbc4ed63ef6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp @@ -858,7 +858,7 @@ void RegPressureTracker::recedeSkipDebugValues() { static_cast<RegionPressure&>(P).openTop(CurrPos); // Find the previous instruction. - CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin()); + CurrPos = prev_nodbg(CurrPos, MBB->begin()); SlotIndex SlotIdx; if (RequireIntervals && !CurrPos->isDebugInstr()) @@ -940,7 +940,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) { bumpDeadDefs(RegOpers.DeadDefs); // Find the next instruction. - CurrPos = skipDebugInstructionsForward(std::next(CurrPos), MBB->end()); + CurrPos = next_nodbg(CurrPos, MBB->end()); } void RegPressureTracker::advance() { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp index a5bea1463468..41b6de1441d7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -466,7 +466,7 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, const MachineFunction &MF = *Before->getMF(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned NeedSize = TRI->getSpillSize(RC); - unsigned NeedAlign = TRI->getSpillAlignment(RC); + Align NeedAlign = TRI->getSpillAlign(RC); unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max(); int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd(); @@ -478,7 +478,7 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, if (FI < FIB || FI >= FIE) continue; unsigned S = MFI.getObjectSize(FI); - unsigned A = MFI.getObjectAlignment(FI); + Align A = MFI.getObjectAlign(FI); if (NeedSize > S || NeedAlign > A) continue; // Avoid wasting slots with large size and/or large alignment. Pick one @@ -487,7 +487,7 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, // larger register is reserved before a slot for a smaller one. When // trying to spill a smaller register, the large slot would be found // first, thus making it impossible to spill the larger register later. - unsigned D = (S-NeedSize) + (A-NeedAlign); + unsigned D = (S - NeedSize) + (A.value() - NeedAlign.value()); if (D < Diff) { SI = I; Diff = D; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp index 8aa488e63913..55478c232dd7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "SafeStackColoring.h" #include "SafeStackLayout.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -27,13 +27,13 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/StackLifetime.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" @@ -96,6 +96,10 @@ static cl::opt<bool> SafeStackUsePointerAddress("safestack-use-pointer-address", cl::init(false), cl::Hidden); +// Disabled by default due to PR32143. +static cl::opt<bool> ClColoring("safe-stack-coloring", + cl::desc("enable safe stack coloring"), + cl::Hidden, cl::init(false)); namespace { @@ -200,7 +204,7 @@ class SafeStack { bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr, uint64_t AllocaSize); - bool ShouldInlinePointerAddress(CallSite &CS); + bool ShouldInlinePointerAddress(CallInst &CI); void TryInlinePointerAddress(); public: @@ -322,7 +326,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { case Instruction::Call: case Instruction::Invoke: { - ImmutableCallSite CS(I); + const CallBase &CS = *cast<CallBase>(I); if (I->isLifetimeStartOrEnd()) continue; @@ -344,8 +348,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { // FIXME: a more precise solution would require an interprocedural // analysis here, which would look at all uses of an argument inside // the function being called. - ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) + auto B = CS.arg_begin(), E = CS.arg_end(); + for (auto A = B; A != E; ++A) if (A->get() == V) if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) || CS.doesNotAccessMemory()))) { @@ -493,9 +497,18 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( DIBuilder DIB(*F.getParent()); - StackColoring SSC(F, StaticAllocas); - SSC.run(); - SSC.removeAllMarkers(); + StackLifetime SSC(F, StaticAllocas, StackLifetime::LivenessType::May); + static const StackLifetime::LiveRange NoColoringRange(1, true); + if (ClColoring) + SSC.run(); + + for (auto *I : SSC.getMarkers()) { + auto *Op = dyn_cast<Instruction>(I->getOperand(1)); + const_cast<IntrinsicInst *>(I)->eraseFromParent(); + // Remove the operand bitcast, too, if it has no more uses left. + if (Op && Op->use_empty()) + Op->eraseFromParent(); + } // Unsafe stack always grows down. StackLayout SSL(StackAlignment); @@ -529,7 +542,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()); - SSL.addObject(AI, Size, Align, SSC.getLiveRange(AI)); + SSL.addObject(AI, Size, Align, + ClColoring ? SSC.getLiveRange(AI) : NoColoringRange); } SSL.computeLayout(); @@ -576,8 +590,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( Arg->getName() + ".unsafe-byval"); // Replace alloc with the new location. - replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, - DIExpression::ApplyOffset, -Offset); + replaceDbgDeclare(Arg, BasePointer, DIB, DIExpression::ApplyOffset, + -Offset); Arg->replaceAllUsesWith(NewArg); IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlign(), Size); @@ -588,8 +602,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( IRB.SetInsertPoint(AI); unsigned Offset = SSL.getObjectOffset(AI); - replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::ApplyOffset, - -Offset); + replaceDbgDeclare(AI, BasePointer, DIB, DIExpression::ApplyOffset, -Offset); replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset); // Replace uses of the alloca with the new location. @@ -676,7 +689,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (AI->hasName() && isa<Instruction>(NewAI)) NewAI->takeName(AI); - replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::ApplyOffset, 0); + replaceDbgDeclare(AI, NewAI, DIB, DIExpression::ApplyOffset, 0); AI->replaceAllUsesWith(NewAI); AI->eraseFromParent(); } @@ -706,33 +719,34 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( } } -bool SafeStack::ShouldInlinePointerAddress(CallSite &CS) { - Function *Callee = CS.getCalledFunction(); - if (CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee)) +bool SafeStack::ShouldInlinePointerAddress(CallInst &CI) { + Function *Callee = CI.getCalledFunction(); + if (CI.hasFnAttr(Attribute::AlwaysInline) && + isInlineViable(*Callee).isSuccess()) return true; if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) || - CS.isNoInline()) + CI.isNoInline()) return false; return true; } void SafeStack::TryInlinePointerAddress() { - if (!isa<CallInst>(UnsafeStackPtr)) + auto *CI = dyn_cast<CallInst>(UnsafeStackPtr); + if (!CI) return; if(F.hasOptNone()) return; - CallSite CS(UnsafeStackPtr); - Function *Callee = CS.getCalledFunction(); + Function *Callee = CI->getCalledFunction(); if (!Callee || Callee->isDeclaration()) return; - if (!ShouldInlinePointerAddress(CS)) + if (!ShouldInlinePointerAddress(*CI)) return; InlineFunctionInfo IFI; - InlineFunction(CS, IFI); + InlineFunction(*CI, IFI); } bool SafeStack::run() { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.cpp deleted file mode 100644 index 04a5c4b6d892..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.cpp +++ /dev/null @@ -1,310 +0,0 @@ -//===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "SafeStackColoring.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/User.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <cassert> -#include <tuple> -#include <utility> - -using namespace llvm; -using namespace llvm::safestack; - -#define DEBUG_TYPE "safestackcoloring" - -// Disabled by default due to PR32143. -static cl::opt<bool> ClColoring("safe-stack-coloring", - cl::desc("enable safe stack coloring"), - cl::Hidden, cl::init(false)); - -const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) { - const auto IT = AllocaNumbering.find(AI); - assert(IT != AllocaNumbering.end()); - return LiveRanges[IT->second]; -} - -bool StackColoring::readMarker(Instruction *I, bool *IsStart) { - if (!I->isLifetimeStartOrEnd()) - return false; - - auto *II = cast<IntrinsicInst>(I); - *IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start; - return true; -} - -void StackColoring::removeAllMarkers() { - for (auto *I : Markers) { - auto *Op = dyn_cast<Instruction>(I->getOperand(1)); - I->eraseFromParent(); - // Remove the operand bitcast, too, if it has no more uses left. - if (Op && Op->use_empty()) - Op->eraseFromParent(); - } -} - -void StackColoring::collectMarkers() { - InterestingAllocas.resize(NumAllocas); - DenseMap<BasicBlock *, SmallDenseMap<Instruction *, Marker>> BBMarkerSet; - - // Compute the set of start/end markers per basic block. - for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) { - AllocaInst *AI = Allocas[AllocaNo]; - SmallVector<Instruction *, 8> WorkList; - WorkList.push_back(AI); - while (!WorkList.empty()) { - Instruction *I = WorkList.pop_back_val(); - for (User *U : I->users()) { - if (auto *BI = dyn_cast<BitCastInst>(U)) { - WorkList.push_back(BI); - continue; - } - auto *UI = dyn_cast<Instruction>(U); - if (!UI) - continue; - bool IsStart; - if (!readMarker(UI, &IsStart)) - continue; - if (IsStart) - InterestingAllocas.set(AllocaNo); - BBMarkerSet[UI->getParent()][UI] = {AllocaNo, IsStart}; - Markers.push_back(UI); - } - } - } - - // Compute instruction numbering. Only the following instructions are - // considered: - // * Basic block entries - // * Lifetime markers - // For each basic block, compute - // * the list of markers in the instruction order - // * the sets of allocas whose lifetime starts or ends in this BB - LLVM_DEBUG(dbgs() << "Instructions:\n"); - unsigned InstNo = 0; - for (BasicBlock *BB : depth_first(&F)) { - LLVM_DEBUG(dbgs() << " " << InstNo << ": BB " << BB->getName() << "\n"); - unsigned BBStart = InstNo++; - - BlockLifetimeInfo &BlockInfo = BlockLiveness[BB]; - BlockInfo.Begin.resize(NumAllocas); - BlockInfo.End.resize(NumAllocas); - BlockInfo.LiveIn.resize(NumAllocas); - BlockInfo.LiveOut.resize(NumAllocas); - - auto &BlockMarkerSet = BBMarkerSet[BB]; - if (BlockMarkerSet.empty()) { - unsigned BBEnd = InstNo; - BlockInstRange[BB] = std::make_pair(BBStart, BBEnd); - continue; - } - - auto ProcessMarker = [&](Instruction *I, const Marker &M) { - LLVM_DEBUG(dbgs() << " " << InstNo << ": " - << (M.IsStart ? "start " : "end ") << M.AllocaNo - << ", " << *I << "\n"); - - BBMarkers[BB].push_back({InstNo, M}); - - InstructionNumbering[I] = InstNo++; - - if (M.IsStart) { - if (BlockInfo.End.test(M.AllocaNo)) - BlockInfo.End.reset(M.AllocaNo); - BlockInfo.Begin.set(M.AllocaNo); - } else { - if (BlockInfo.Begin.test(M.AllocaNo)) - BlockInfo.Begin.reset(M.AllocaNo); - BlockInfo.End.set(M.AllocaNo); - } - }; - - if (BlockMarkerSet.size() == 1) { - ProcessMarker(BlockMarkerSet.begin()->getFirst(), - BlockMarkerSet.begin()->getSecond()); - } else { - // Scan the BB to determine the marker order. - for (Instruction &I : *BB) { - auto It = BlockMarkerSet.find(&I); - if (It == BlockMarkerSet.end()) - continue; - ProcessMarker(&I, It->getSecond()); - } - } - - unsigned BBEnd = InstNo; - BlockInstRange[BB] = std::make_pair(BBStart, BBEnd); - } - NumInst = InstNo; -} - -void StackColoring::calculateLocalLiveness() { - bool changed = true; - while (changed) { - changed = false; - - for (BasicBlock *BB : depth_first(&F)) { - BlockLifetimeInfo &BlockInfo = BlockLiveness[BB]; - - // Compute LiveIn by unioning together the LiveOut sets of all preds. - BitVector LocalLiveIn; - for (auto *PredBB : predecessors(BB)) { - LivenessMap::const_iterator I = BlockLiveness.find(PredBB); - // If a predecessor is unreachable, ignore it. - if (I == BlockLiveness.end()) - continue; - LocalLiveIn |= I->second.LiveOut; - } - - // Compute LiveOut by subtracting out lifetimes that end in this - // block, then adding in lifetimes that begin in this block. If - // we have both BEGIN and END markers in the same basic block - // then we know that the BEGIN marker comes after the END, - // because we already handle the case where the BEGIN comes - // before the END when collecting the markers (and building the - // BEGIN/END vectors). - BitVector LocalLiveOut = LocalLiveIn; - LocalLiveOut.reset(BlockInfo.End); - LocalLiveOut |= BlockInfo.Begin; - - // Update block LiveIn set, noting whether it has changed. - if (LocalLiveIn.test(BlockInfo.LiveIn)) { - changed = true; - BlockInfo.LiveIn |= LocalLiveIn; - } - - // Update block LiveOut set, noting whether it has changed. - if (LocalLiveOut.test(BlockInfo.LiveOut)) { - changed = true; - BlockInfo.LiveOut |= LocalLiveOut; - } - } - } // while changed. -} - -void StackColoring::calculateLiveIntervals() { - for (auto IT : BlockLiveness) { - BasicBlock *BB = IT.getFirst(); - BlockLifetimeInfo &BlockInfo = IT.getSecond(); - unsigned BBStart, BBEnd; - std::tie(BBStart, BBEnd) = BlockInstRange[BB]; - - BitVector Started, Ended; - Started.resize(NumAllocas); - Ended.resize(NumAllocas); - SmallVector<unsigned, 8> Start; - Start.resize(NumAllocas); - - // LiveIn ranges start at the first instruction. - for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) { - if (BlockInfo.LiveIn.test(AllocaNo)) { - Started.set(AllocaNo); - Start[AllocaNo] = BBStart; - } - } - - for (auto &It : BBMarkers[BB]) { - unsigned InstNo = It.first; - bool IsStart = It.second.IsStart; - unsigned AllocaNo = It.second.AllocaNo; - - if (IsStart) { - assert(!Started.test(AllocaNo) || Start[AllocaNo] == BBStart); - if (!Started.test(AllocaNo)) { - Started.set(AllocaNo); - Ended.reset(AllocaNo); - Start[AllocaNo] = InstNo; - } - } else { - assert(!Ended.test(AllocaNo)); - if (Started.test(AllocaNo)) { - LiveRanges[AllocaNo].AddRange(Start[AllocaNo], InstNo); - Started.reset(AllocaNo); - } - Ended.set(AllocaNo); - } - } - - for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) - if (Started.test(AllocaNo)) - LiveRanges[AllocaNo].AddRange(Start[AllocaNo], BBEnd); - } -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void StackColoring::dumpAllocas() { - dbgs() << "Allocas:\n"; - for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) - dbgs() << " " << AllocaNo << ": " << *Allocas[AllocaNo] << "\n"; -} - -LLVM_DUMP_METHOD void StackColoring::dumpBlockLiveness() { - dbgs() << "Block liveness:\n"; - for (auto IT : BlockLiveness) { - BasicBlock *BB = IT.getFirst(); - BlockLifetimeInfo &BlockInfo = BlockLiveness[BB]; - auto BlockRange = BlockInstRange[BB]; - dbgs() << " BB [" << BlockRange.first << ", " << BlockRange.second - << "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End - << ", livein " << BlockInfo.LiveIn << ", liveout " - << BlockInfo.LiveOut << "\n"; - } -} - -LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() { - dbgs() << "Alloca liveness:\n"; - for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) { - LiveRange &Range = LiveRanges[AllocaNo]; - dbgs() << " " << AllocaNo << ": " << Range << "\n"; - } -} -#endif - -void StackColoring::run() { - LLVM_DEBUG(dumpAllocas()); - - for (unsigned I = 0; I < NumAllocas; ++I) - AllocaNumbering[Allocas[I]] = I; - LiveRanges.resize(NumAllocas); - - collectMarkers(); - - if (!ClColoring) { - for (auto &R : LiveRanges) { - R.SetMaximum(1); - R.AddRange(0, 1); - } - return; - } - - for (auto &R : LiveRanges) - R.SetMaximum(NumInst); - for (unsigned I = 0; I < NumAllocas; ++I) - if (!InterestingAllocas.test(I)) - LiveRanges[I] = getFullLiveRange(); - - calculateLocalLiveness(); - LLVM_DEBUG(dumpBlockLiveness()); - calculateLiveIntervals(); - LLVM_DEBUG(dumpLiveRanges()); -} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.h deleted file mode 100644 index b696b1b6baed..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.h +++ /dev/null @@ -1,165 +0,0 @@ -//===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H -#define LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" -#include <cassert> -#include <utility> - -namespace llvm { - -class BasicBlock; -class Function; -class Instruction; - -namespace safestack { - -/// Compute live ranges of allocas. -/// Live ranges are represented as sets of "interesting" instructions, which are -/// defined as instructions that may start or end an alloca's lifetime. These -/// are: -/// * lifetime.start and lifetime.end intrinsics -/// * first instruction of any basic block -/// Interesting instructions are numbered in the depth-first walk of the CFG, -/// and in the program order inside each basic block. -class StackColoring { - /// A class representing liveness information for a single basic block. - /// Each bit in the BitVector represents the liveness property - /// for a different stack slot. - struct BlockLifetimeInfo { - /// Which slots BEGINs in each basic block. - BitVector Begin; - - /// Which slots ENDs in each basic block. - BitVector End; - - /// Which slots are marked as LIVE_IN, coming into each basic block. - BitVector LiveIn; - - /// Which slots are marked as LIVE_OUT, coming out of each basic block. - BitVector LiveOut; - }; - -public: - /// This class represents a set of interesting instructions where an alloca is - /// live. - struct LiveRange { - BitVector bv; - - void SetMaximum(int size) { bv.resize(size); } - void AddRange(unsigned start, unsigned end) { bv.set(start, end); } - - bool Overlaps(const LiveRange &Other) const { - return bv.anyCommon(Other.bv); - } - - void Join(const LiveRange &Other) { bv |= Other.bv; } - }; - -private: - Function &F; - - /// Maps active slots (per bit) for each basic block. - using LivenessMap = DenseMap<BasicBlock *, BlockLifetimeInfo>; - LivenessMap BlockLiveness; - - /// Number of interesting instructions. - int NumInst = -1; - - /// Numeric ids for interesting instructions. - DenseMap<Instruction *, unsigned> InstructionNumbering; - - /// A range [Start, End) of instruction ids for each basic block. - /// Instructions inside each BB have monotonic and consecutive ids. - DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange; - - ArrayRef<AllocaInst *> Allocas; - unsigned NumAllocas; - DenseMap<AllocaInst *, unsigned> AllocaNumbering; - - /// LiveRange for allocas. - SmallVector<LiveRange, 8> LiveRanges; - - /// The set of allocas that have at least one lifetime.start. All other - /// allocas get LiveRange that corresponds to the entire function. - BitVector InterestingAllocas; - SmallVector<Instruction *, 8> Markers; - - struct Marker { - unsigned AllocaNo; - bool IsStart; - }; - - /// List of {InstNo, {AllocaNo, IsStart}} for each BB, ordered by InstNo. - DenseMap<BasicBlock *, SmallVector<std::pair<unsigned, Marker>, 4>> BBMarkers; - - void dumpAllocas(); - void dumpBlockLiveness(); - void dumpLiveRanges(); - - bool readMarker(Instruction *I, bool *IsStart); - void collectMarkers(); - void calculateLocalLiveness(); - void calculateLiveIntervals(); - -public: - StackColoring(Function &F, ArrayRef<AllocaInst *> Allocas) - : F(F), Allocas(Allocas), NumAllocas(Allocas.size()) {} - - void run(); - void removeAllMarkers(); - - /// Returns a set of "interesting" instructions where the given alloca is - /// live. Not all instructions in a function are interesting: we pick a set - /// that is large enough for LiveRange::Overlaps to be correct. - const LiveRange &getLiveRange(AllocaInst *AI); - - /// Returns a live range that represents an alloca that is live throughout the - /// entire function. - LiveRange getFullLiveRange() { - assert(NumInst >= 0); - LiveRange R; - R.SetMaximum(NumInst); - R.AddRange(0, NumInst); - return R; - } -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, const BitVector &V) { - OS << "{"; - int idx = V.find_first(); - bool first = true; - while (idx >= 0) { - if (!first) { - OS << ", "; - } - first = false; - OS << idx; - idx = V.find_next(idx); - } - OS << "}"; - return OS; -} - -static inline raw_ostream &operator<<(raw_ostream &OS, - const StackColoring::LiveRange &R) { - return OS << R.bv; -} - -} // end namespace safestack - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp index 09964866e4d3..c823454f825c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "SafeStackLayout.h" -#include "SafeStackColoring.h" +#include "llvm/Analysis/StackLifetime.h" #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -39,7 +39,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) { } void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment, - const StackColoring::LiveRange &Range) { + const StackLifetime::LiveRange &Range) { StackObjects.push_back({V, Size, Alignment, Range}); ObjectAlignments[V] = Alignment; MaxAlignment = std::max(MaxAlignment, Alignment); @@ -76,7 +76,7 @@ void StackLayout::layoutObject(StackObject &Obj) { LLVM_DEBUG(dbgs() << " Does not intersect, skip.\n"); continue; } - if (Obj.Range.Overlaps(R.Range)) { + if (Obj.Range.overlaps(R.Range)) { // Find the next appropriate location. Start = AdjustStackOffset(R.End, Obj.Size, Obj.Alignment); End = Start + Obj.Size; @@ -96,7 +96,7 @@ void StackLayout::layoutObject(StackObject &Obj) { if (Start > LastRegionEnd) { LLVM_DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. " << Start << "\n"); - Regions.emplace_back(LastRegionEnd, Start, StackColoring::LiveRange()); + Regions.emplace_back(LastRegionEnd, Start, StackLifetime::LiveRange(0)); LastRegionEnd = Start; } LLVM_DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. " @@ -125,7 +125,7 @@ void StackLayout::layoutObject(StackObject &Obj) { // Update live ranges for all affected regions. for (StackRegion &R : Regions) { if (Start < R.End && End > R.Start) - R.Range.Join(Obj.Range); + R.Range.join(Obj.Range); if (End <= R.End) break; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h index 349d9a8b595c..f0db1b42aa00 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H #define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H -#include "SafeStackColoring.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/StackLifetime.h" namespace llvm { @@ -27,10 +27,10 @@ class StackLayout { struct StackRegion { unsigned Start; unsigned End; - StackColoring::LiveRange Range; + StackLifetime::LiveRange Range; StackRegion(unsigned Start, unsigned End, - const StackColoring::LiveRange &Range) + const StackLifetime::LiveRange &Range) : Start(Start), End(End), Range(Range) {} }; @@ -40,7 +40,7 @@ class StackLayout { struct StackObject { const Value *Handle; unsigned Size, Alignment; - StackColoring::LiveRange Range; + StackLifetime::LiveRange Range; }; SmallVector<StackObject, 8> StackObjects; @@ -56,7 +56,7 @@ public: /// Add an object to the stack frame. Value pointer is opaque and used as a /// handle to retrieve the object's offset in the frame later. void addObject(const Value *V, unsigned Size, unsigned Alignment, - const StackColoring::LiveRange &Range); + const StackLifetime::LiveRange &Range); /// Run the layout computation for all previously added objects. void computeLayout(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index ee72de67d875..c93b29617438 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -43,6 +43,7 @@ namespace { class ScalarizeMaskedMemIntrin : public FunctionPass { const TargetTransformInfo *TTI = nullptr; + const DataLayout *DL = nullptr; public: static char ID; // Pass identification, replacement for typeid @@ -82,7 +83,7 @@ static bool isConstantIntVector(Value *Mask) { if (!C) return false; - unsigned NumElts = Mask->getType()->getVectorNumElements(); + unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *CElt = C->getAggregateElement(i); if (!CElt || !isa<ConstantInt>(CElt)) @@ -130,8 +131,8 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { Value *Mask = CI->getArgOperand(2); Value *Src0 = CI->getArgOperand(3); - unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); - VectorType *VecType = cast<VectorType>(CI->getType()); + const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); + VectorType *VecType = cast<FixedVectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); @@ -151,12 +152,13 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { } // Adjust alignment for the scalar instruction. - AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); + const Align AdjustedAlignVal = + commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); // Bitcast %addr from i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); + unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); // The result vector Value *VResult = Src0; @@ -166,7 +168,7 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal); + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); VResult = Builder.CreateInsertElement(VResult, Load, Idx); } CI->replaceAllUsesWith(VResult); @@ -210,7 +212,7 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { Builder.SetInsertPoint(InsertPt); Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal); + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal); Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); // Create "else" block, fill it in the next iteration @@ -268,8 +270,8 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { Value *Alignment = CI->getArgOperand(2); Value *Mask = CI->getArgOperand(3); - unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); - VectorType *VecType = cast<VectorType>(Src->getType()); + const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); + auto *VecType = cast<VectorType>(Src->getType()); Type *EltTy = VecType->getElementType(); @@ -287,12 +289,13 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { } // Adjust alignment for the scalar instruction. - AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); + const Align AdjustedAlignVal = + commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); // Bitcast %addr from i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); + unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements(); if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { @@ -300,7 +303,7 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { continue; Value *OneElt = Builder.CreateExtractElement(Src, Idx); Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); } CI->eraseFromParent(); return; @@ -342,7 +345,7 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { Value *OneElt = Builder.CreateExtractElement(Src, Idx); Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); - Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = @@ -393,14 +396,14 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { Value *Mask = CI->getArgOperand(2); Value *Src0 = CI->getArgOperand(3); - VectorType *VecType = cast<VectorType>(CI->getType()); + auto *VecType = cast<FixedVectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); - unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); @@ -517,11 +520,12 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { Value *Alignment = CI->getArgOperand(2); Value *Mask = CI->getArgOperand(3); - assert(isa<VectorType>(Src->getType()) && - "Unexpected data type in masked scatter intrinsic"); - assert(isa<VectorType>(Ptrs->getType()) && - isa<PointerType>(Ptrs->getType()->getVectorElementType()) && - "Vector of pointers is expected in masked scatter intrinsic"); + auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); + + assert( + isa<VectorType>(Ptrs->getType()) && + isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) && + "Vector of pointers is expected in masked scatter intrinsic"); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; @@ -529,8 +533,8 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); - unsigned VectorWidth = Src->getType()->getVectorNumElements(); + MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); + unsigned VectorWidth = SrcFVTy->getNumElements(); // Shorten the way if the mask is a vector of constants. if (isConstantIntVector(Mask)) { @@ -601,7 +605,7 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { Value *Mask = CI->getArgOperand(1); Value *PassThru = CI->getArgOperand(2); - VectorType *VecType = cast<VectorType>(CI->getType()); + auto *VecType = cast<FixedVectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); @@ -624,8 +628,8 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); - LoadInst *Load = - Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1), + "Load" + Twine(Idx)); VResult = Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); ++MemIndex; @@ -670,7 +674,7 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { "cond.load"); Builder.SetInsertPoint(InsertPt); - LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, 1); + LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1)); Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); // Move the pointer if there are more blocks to come. @@ -714,7 +718,7 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); - VectorType *VecType = cast<VectorType>(Src->getType()); + auto *VecType = cast<FixedVectorType>(Src->getType()); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; @@ -723,7 +727,7 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - Type *EltTy = VecType->getVectorElementType(); + Type *EltTy = VecType->getElementType(); unsigned VectorWidth = VecType->getNumElements(); @@ -736,7 +740,7 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); - Builder.CreateAlignedStore(OneElt, NewPtr, 1); + Builder.CreateAlignedStore(OneElt, NewPtr, Align(1)); ++MemIndex; } CI->eraseFromParent(); @@ -777,7 +781,7 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { Builder.SetInsertPoint(InsertPt); Value *OneElt = Builder.CreateExtractElement(Src, Idx); - Builder.CreateAlignedStore(OneElt, Ptr, 1); + Builder.CreateAlignedStore(OneElt, Ptr, Align(1)); // Move the pointer if there are more blocks to come. Value *NewPtr; @@ -811,6 +815,7 @@ bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) { bool EverMadeChange = false; TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + DL = &F.getParent()->getDataLayout(); bool MadeChange = true; while (MadeChange) { @@ -849,39 +854,46 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { - unsigned Alignment; switch (II->getIntrinsicID()) { default: break; - case Intrinsic::masked_load: { + case Intrinsic::masked_load: // Scalarize unsupported vector masked load - Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); - if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment))) + if (TTI->isLegalMaskedLoad( + CI->getType(), + cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue())) return false; scalarizeMaskedLoad(CI, ModifiedDT); return true; - } - case Intrinsic::masked_store: { - Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); - if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), - MaybeAlign(Alignment))) + case Intrinsic::masked_store: + if (TTI->isLegalMaskedStore( + CI->getArgOperand(0)->getType(), + cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue())) return false; scalarizeMaskedStore(CI, ModifiedDT); return true; - } - case Intrinsic::masked_gather: - Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); - if (TTI->isLegalMaskedGather(CI->getType(), MaybeAlign(Alignment))) + case Intrinsic::masked_gather: { + unsigned AlignmentInt = + cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); + Type *LoadTy = CI->getType(); + Align Alignment = + DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy); + if (TTI->isLegalMaskedGather(LoadTy, Alignment)) return false; scalarizeMaskedGather(CI, ModifiedDT); return true; - case Intrinsic::masked_scatter: - Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); - if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType(), - MaybeAlign(Alignment))) + } + case Intrinsic::masked_scatter: { + unsigned AlignmentInt = + cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + Type *StoreTy = CI->getArgOperand(0)->getType(); + Align Alignment = + DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy); + if (TTI->isLegalMaskedScatter(StoreTy, Alignment)) return false; scalarizeMaskedScatter(CI, ModifiedDT); return true; + } case Intrinsic::masked_expandload: if (TTI->isLegalMaskedExpandLoad(CI->getType())) return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp index dc3a11670a16..60f8eec1b9bc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -713,6 +713,14 @@ bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { return false; } +void ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors(const SUnit *SU) { + assert(SU->NodeNum == Index2Node.size() && "Node cannot be added at the end"); + assert(SU->NumPreds == 0 && "Can only add SU's with no predecessors"); + Node2Index.push_back(Index2Node.size()); + Index2Node.push_back(SU->NodeNum); + Visited.resize(Node2Index.size()); +} + bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, const SUnit *TargetSU) { FixOrder(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index d11406cc330f..10da2d421797 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -269,13 +270,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { if (!ImplicitPseudoDef && !ImplicitPseudoUse) { Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, UseOp)); - ST.adjustSchedDependency(SU, UseSU, Dep); + ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep); } else { Dep.setLatency(0); // FIXME: We could always let target to adjustSchedDependency(), and // remove this condition, but that currently asserts in Hexagon BE. if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle())) - ST.adjustSchedDependency(SU, UseSU, Dep); + ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep); } UseSU->addPred(Dep); @@ -294,6 +295,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { if (MRI.isConstantPhysReg(Reg)) return; + const TargetSubtargetInfo &ST = MF.getSubtarget(); + // Optionally add output and anti dependencies. For anti // dependencies we use a latency of 0 because for a multi-issue // target we want to allow the defining instruction to issue @@ -311,14 +314,12 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(*Alias))) { - if (Kind == SDep::Anti) - DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); - else { - SDep Dep(SU, Kind, /*Reg=*/*Alias); + SDep Dep(SU, Kind, /*Reg=*/*Alias); + if (Kind != SDep::Anti) Dep.setLatency( SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); - DefSU->addPred(Dep); - } + ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep); + DefSU->addPred(Dep); } } } @@ -444,7 +445,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { SDep Dep(SU, SDep::Data, Reg); Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, I->OperandIndex)); - ST.adjustSchedDependency(SU, UseSU, Dep); + ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep); UseSU->addPred(Dep); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index 8d04711f07c6..a113c30f851b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -28,7 +28,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { - return G->MF.getName(); + return std::string(G->MF.getName()); } static bool renderGraphFromBottomUp() { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index a9fda56f2dac..6e05de888cc0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -92,10 +92,11 @@ LLVM_DUMP_METHOD void ScoreboardHazardRecognizer::Scoreboard::dump() const { last--; for (unsigned i = 0; i <= last; i++) { - unsigned FUs = (*this)[i]; + InstrStage::FuncUnits FUs = (*this)[i]; dbgs() << "\t"; - for (int j = 31; j >= 0; j--) - dbgs() << ((FUs & (1 << j)) ? '1' : '0'); + for (int j = std::numeric_limits<InstrStage::FuncUnits>::digits - 1; + j >= 0; j--) + dbgs() << ((FUs & (1ULL << j)) ? '1' : '0'); dbgs() << '\n'; } } @@ -142,7 +143,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { break; } - unsigned freeUnits = IS->getUnits(); + InstrStage::FuncUnits freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { case InstrStage::Required: // Required FUs conflict with both reserved and required ones @@ -193,7 +194,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { assert(((cycle + i) < RequiredScoreboard.getDepth()) && "Scoreboard depth exceeded!"); - unsigned freeUnits = IS->getUnits(); + InstrStage::FuncUnits freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { case InstrStage::Required: // Required FUs conflict with both reserved and required ones @@ -206,7 +207,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { } // reduce to a single unit - unsigned freeUnit = 0; + InstrStage::FuncUnits freeUnit = 0; do { freeUnit = freeUnits; freeUnits = freeUnit & (freeUnit - 1); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2476fd26f250..f14b3dba4f31 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -124,17 +125,29 @@ static cl::opt<unsigned> StoreMergeDependenceLimit( cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check")); +static cl::opt<bool> EnableReduceLoadOpStoreWidth( + "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), + cl::desc("DAG cominber enable reducing the width of load/op/store " + "sequence")); + +static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( + "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), + cl::desc("DAG cominber enable load/<replace bytes>/store with " + "a narrower store")); + namespace { class DAGCombiner { SelectionDAG &DAG; const TargetLowering &TLI; + const SelectionDAGTargetInfo *STI; CombineLevel Level; CodeGenOpt::Level OptLevel; bool LegalDAG = false; bool LegalOperations = false; bool LegalTypes = false; bool ForCodeSize; + bool DisableGenericCombines; /// Worklist of all of the nodes that need to be simplified. /// @@ -222,9 +235,11 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), AA(AA) { + : DAG(D), TLI(D.getTargetLoweringInfo()), + STI(D.getSubtarget().getSelectionDAGInfo()), + Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) { ForCodeSize = DAG.shouldOptForSize(); + DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel); MaximumLegalStoreInBits = 0; // We use the minimum store size here, since that's all we can guarantee @@ -307,23 +322,34 @@ namespace { } bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) { - EVT VT = Op.getValueType(); - unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1; - APInt DemandedElts = APInt::getAllOnesValue(NumElts); - return SimplifyDemandedBits(Op, DemandedBits, DemandedElts); + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + KnownBits Known; + if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false)) + return false; + + // Revisit the node. + AddToWorklist(Op.getNode()); + + CommitTargetLoweringOpt(TLO); + return true; } /// Check the specified vector node value to see if it can be simplified or /// if things it uses can be simplified as it only uses some of the /// elements. If so, return true. bool SimplifyDemandedVectorElts(SDValue Op) { + // TODO: For now just pretend it cannot be simplified. + if (Op.getValueType().isScalableVector()) + return false; + unsigned NumElts = Op.getValueType().getVectorNumElements(); APInt DemandedElts = APInt::getAllOnesValue(NumElts); return SimplifyDemandedVectorElts(Op, DemandedElts); } bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, - const APInt &DemandedElts); + const APInt &DemandedElts, + bool AssumeSingleUse = false); bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, bool AssumeSingleUse = false); @@ -429,11 +455,13 @@ namespace { SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); SDValue visitAssertExt(SDNode *N); + SDValue visitAssertAlign(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); + SDValue visitFREEZE(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); SDValue visitFSUB(SDNode *N); @@ -522,9 +550,8 @@ namespace { SDValue rebuildSetCC(SDValue N); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) const; + SDValue &CC, bool MatchStrict = false) const; bool isOneUseSetCC(SDValue N) const; - bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -553,6 +580,10 @@ namespace { SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); + SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, + SDValue InnerPos, SDValue InnerNeg, + unsigned PosOpcode, unsigned NegOpcode, + const SDLoc &DL); SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue MatchStoreCombine(StoreSDNode *N); @@ -562,6 +593,7 @@ namespace { SDValue TransformFPLoadStorePair(SDNode *N); SDValue convertBuildVecZextToZext(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecTruncToBitCast(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, @@ -606,6 +638,19 @@ namespace { : MemNode(N), OffsetFromBase(Offset) {} }; + // Classify the origin of a stored value. + enum class StoreSource { Unknown, Constant, Extract, Load }; + StoreSource getStoreSource(SDValue StoreVal) { + if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal)) + return StoreSource::Constant; + if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR) + return StoreSource::Extract; + if (isa<LoadSDNode>(StoreVal)) + return StoreSource::Load; + return StoreSource::Unknown; + } + /// This is a helper function for visitMUL to check the profitability /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). /// MulNode is the original multiply, AddNode is (add x, c1), @@ -633,43 +678,66 @@ namespace { /// can be combined into narrow loads. bool BackwardsPropagateMask(SDNode *N); - /// Helper function for MergeConsecutiveStores which merges the - /// component store chains. + /// Helper function for mergeConsecutiveStores which merges the component + /// store chains. SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); - /// This is a helper function for MergeConsecutiveStores. When the - /// source elements of the consecutive stores are all constants or - /// all extracted vector elements, try to merge them into one - /// larger store introducing bitcasts if necessary. \return True - /// if a merged store was created. - bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, + /// This is a helper function for mergeConsecutiveStores. When the source + /// elements of the consecutive stores are all constants or all extracted + /// vector elements, try to merge them into one larger store introducing + /// bitcasts if necessary. \return True if a merged store was created. + bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector, bool UseTrunc); - /// This is a helper function for MergeConsecutiveStores. Stores - /// that potentially may be merged with St are placed in - /// StoreNodes. RootNode is a chain predecessor to all store - /// candidates. + /// This is a helper function for mergeConsecutiveStores. Stores that + /// potentially may be merged with St are placed in StoreNodes. RootNode is + /// a chain predecessor to all store candidates. void getStoreMergeCandidates(StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, SDNode *&Root); - /// Helper function for MergeConsecutiveStores. Checks if - /// candidate stores have indirect dependency through their - /// operands. RootNode is the predecessor to all stores calculated - /// by getStoreMergeCandidates and is used to prune the dependency check. - /// \return True if safe to merge. + /// Helper function for mergeConsecutiveStores. Checks if candidate stores + /// have indirect dependency through their operands. RootNode is the + /// predecessor to all stores calculated by getStoreMergeCandidates and is + /// used to prune the dependency check. \return True if safe to merge. bool checkMergeStoreCandidatesForDependencies( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, SDNode *RootNode); + /// This is a helper function for mergeConsecutiveStores. Given a list of + /// store candidates, find the first N that are consecutive in memory. + /// Returns 0 if there are not at least 2 consecutive stores to try merging. + unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes, + int64_t ElementSizeBytes) const; + + /// This is a helper function for mergeConsecutiveStores. It is used for + /// store chains that are composed entirely of constant values. + bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumConsecutiveStores, + EVT MemVT, SDNode *Root, bool AllowVectors); + + /// This is a helper function for mergeConsecutiveStores. It is used for + /// store chains that are composed entirely of extracted vector elements. + /// When extracting multiple vector elements, try to store them in one + /// vector store rather than a sequence of scalar stores. + bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumConsecutiveStores, EVT MemVT, + SDNode *Root); + + /// This is a helper function for mergeConsecutiveStores. It is used for + /// store chains that are composed entirely of loaded values. + bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumConsecutiveStores, EVT MemVT, + SDNode *Root, bool AllowVectors, + bool IsNonTemporalStore, bool IsNonTemporalLoad); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. - /// \return number of stores that were merged into a merged store (the - /// affected nodes are stored as a prefix in \p StoreNodes). - bool MergeConsecutiveStores(StoreSDNode *St); + /// \return true if stores were merged. + bool mergeConsecutiveStores(StoreSDNode *St); /// Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) @@ -814,7 +882,7 @@ static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { // the appropriate nodes based on the type of node we are checking. This // simplifies life a bit for the callers. bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) const { + SDValue &CC, bool MatchStrict) const { if (N.getOpcode() == ISD::SETCC) { LHS = N.getOperand(0); RHS = N.getOperand(1); @@ -822,6 +890,15 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, return true; } + if (MatchStrict && + (N.getOpcode() == ISD::STRICT_FSETCC || + N.getOpcode() == ISD::STRICT_FSETCCS)) { + LHS = N.getOperand(1); + RHS = N.getOperand(2); + CC = N.getOperand(3); + return true; + } + if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2).getNode()) || !TLI.isConstFalseVal(N.getOperand(3).getNode())) @@ -958,14 +1035,11 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (N0.getOpcode() != Opc) return SDValue(); - // Don't reassociate reductions. - if (N0->getFlags().hasVectorReduction()) - return SDValue(); - - if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2)) + if (SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1})) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); return SDValue(); } @@ -985,9 +1059,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative."); - // Don't reassociate reductions. - if (Flags.hasVectorReduction()) - return SDValue(); // Floating-point reassociation is not allowed without loose FP math. if (N0.getValueType().isFloatingPoint() || @@ -1036,6 +1107,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + // Replace the old value with the new one. + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); + // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorklistRemover DeadNodes(*this); @@ -1054,21 +1131,17 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// Check the specified integer node value to see if it can be simplified or if /// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, - const APInt &DemandedElts) { + const APInt &DemandedElts, + bool AssumeSingleUse) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); KnownBits Known; - if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO)) + if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0, + AssumeSingleUse)) return false; // Revisit the node. AddToWorklist(Op.getNode()); - // Replace the old value with the new one. - ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); - dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); - dbgs() << '\n'); - CommitTargetLoweringOpt(TLO); return true; } @@ -1088,12 +1161,6 @@ bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, // Revisit the node. AddToWorklist(Op.getNode()); - // Replace the old value with the new one. - ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); - dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); - dbgs() << '\n'); - CommitTargetLoweringOpt(TLO); return true; } @@ -1217,8 +1284,11 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); - // We are always replacing N0/N1's use in N and only need - // additional replacements if there are additional uses. + // We are always replacing N0/N1's use in N and only need additional + // replacements if there are additional uses. + // Note: We are checking uses of the *nodes* (SDNode) rather than values + // (SDValue) here because the node may reference multiple values + // (for example, the chain value of a load node). Replace0 &= !N0->hasOneUse(); Replace1 &= (N0 != N1) && !N1->hasOneUse(); @@ -1568,6 +1638,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::AssertSext: case ISD::AssertZext: return visitAssertExt(N); + case ISD::AssertAlign: return visitAssertAlign(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); @@ -1617,6 +1688,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::LIFETIME_END: return visitLIFETIME_END(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); + case ISD::FREEZE: return visitFREEZE(N); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -1635,7 +1707,9 @@ SDValue DAGCombiner::visit(SDNode *N) { } SDValue DAGCombiner::combine(SDNode *N) { - SDValue RV = visit(N); + SDValue RV; + if (!DisableGenericCombines) + RV = visit(N); // If nothing happened, try a target-specific DAG combine. if (!RV.getNode()) { @@ -2053,12 +2127,11 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { // We need a constant operand for the add/sub, and the other operand is a // logical shift right: add (srl), C or sub C, (srl). - // TODO - support non-uniform vector amounts. bool IsAdd = N->getOpcode() == ISD::ADD; SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); - ConstantSDNode *C = isConstOrConstSplat(ConstantOp); - if (!C || ShiftOp.getOpcode() != ISD::SRL) + if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) || + ShiftOp.getOpcode() != ISD::SRL) return SDValue(); // The shift must be of a 'not' value. @@ -2079,8 +2152,11 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); - APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; - return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); + if (SDValue NewC = + DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT, + {ConstantOp, DAG.getConstant(1, DL, VT)})) + return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC); + return SDValue(); } /// Try to fold a node that behaves like an ADD (note that N isn't necessarily @@ -2116,8 +2192,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::ADD, DL, VT, N1, N0); // fold (add c1, c2) -> c1+c2 - return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), - N1.getNode()); + return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}); } // fold (add x, 0) -> x @@ -2128,8 +2203,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // fold ((A-c1)+c2) -> (A+(c2-c1)) if (N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { - SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(), - N0.getOperand(1).getNode()); + SDValue Sub = + DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)}); assert(Sub && "Constant folding failed"); return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub); } @@ -2137,8 +2212,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // fold ((c1-A)+c2) -> (c1+c2)-A if (N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(), - N0.getOperand(0).getNode()); + SDValue Add = + DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)}); assert(Add && "Constant folding failed"); return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); } @@ -2159,13 +2234,14 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { } } - // Undo the add -> or combine to merge constant offsets from a frame index. + // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is + // equivalent to (add x, c0). if (N0.getOpcode() == ISD::OR && - isa<FrameIndexSDNode>(N0.getOperand(0)) && - isa<ConstantSDNode>(N0.getOperand(1)) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) && DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { - SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); - return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); + if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, + {N1, N0.getOperand(1)})) + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); } } @@ -2324,6 +2400,23 @@ SDValue DAGCombiner::visitADD(SDNode *N) { DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); + // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). + if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) { + APInt C0 = N0->getConstantOperandAPInt(0); + APInt C1 = N1->getConstantOperandAPInt(0); + return DAG.getVScale(DL, VT, C0 + C1); + } + + // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2) + if ((N0.getOpcode() == ISD::ADD) && + (N0.getOperand(1).getOpcode() == ISD::VSCALE) && + (N1.getOpcode() == ISD::VSCALE)) { + auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0); + auto VS1 = N1->getConstantOperandAPInt(0); + auto VS = DAG.getVScale(DL, VT, VS0 + VS1); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS); + } + return SDValue(); } @@ -2354,8 +2447,7 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(Opcode, DL, VT, N1, N0); // fold (add_sat c1, c2) -> c3 - return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(), - N1.getNode()); + return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}); } // fold (add_sat x, 0) -> x @@ -2975,12 +3067,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - DAG.isConstantIntBuildVectorOrConstantInt(N1)) { - // fold (sub c1, c2) -> c1-c2 - return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), - N1.getNode()); - } + + // fold (sub c1, c2) -> c3 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1})) + return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -3047,8 +3137,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::ADD && isConstantOrConstantVector(N1, /* NoOpaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic( - ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode()); + SDValue NewC = + DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1}); assert(NewC && "Constant folding failed"); return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC); } @@ -3058,8 +3148,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N11 = N1.getOperand(1); if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && isConstantOrConstantVector(N11, /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), - N11.getNode()); + SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}); assert(NewC && "Constant folding failed"); return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); } @@ -3069,8 +3158,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N1, /* NoOpaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic( - ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode()); + SDValue NewC = + DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1}); assert(NewC && "Constant folding failed"); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC); } @@ -3079,8 +3168,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N1, /* NoOpaques */ true) && isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic( - ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode()); + SDValue NewC = + DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1}); assert(NewC && "Constant folding failed"); return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); } @@ -3251,6 +3340,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) + if (N1.getOpcode() == ISD::VSCALE) { + APInt IntVal = N1.getConstantOperandAPInt(0); + return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal)); + } + // Prefer an add for more folding potential and possibly better codegen: // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { @@ -3301,12 +3396,9 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { if (N0 == N1) return DAG.getConstant(0, DL, VT); - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - DAG.isConstantIntBuildVectorOrConstantInt(N1)) { - // fold (sub_sat c1, c2) -> c3 - return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(), - N1.getNode()); - } + // fold (sub_sat c1, c2) -> c3 + if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1})) + return C; // fold (sub_sat x, 0) -> x if (isNullConstant(N1)) @@ -3442,30 +3534,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); - bool N0IsConst = false; bool N1IsConst = false; bool N1IsOpaqueConst = false; - bool N0IsOpaqueConst = false; - APInt ConstValue0, ConstValue1; + APInt ConstValue1; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); - assert((!N0IsConst || - ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && - "Splat APInt should be element width"); assert((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"); } else { - N0IsConst = isa<ConstantSDNode>(N0); - if (N0IsConst) { - ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); - N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); - } N1IsConst = isa<ConstantSDNode>(N1); if (N1IsConst) { ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); @@ -3474,17 +3556,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul c1, c2) -> c1*c2 - if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) - return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, - N0.getNode(), N1.getNode()); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1})) + return C; // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); + // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1.isNullValue()) return N1; + // fold (mul x, 1) -> x if (N1IsConst && ConstValue1.isOneValue()) return N0; @@ -3498,6 +3581,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); } + // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1) && @@ -3508,6 +3592,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); } + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); @@ -3596,6 +3681,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); + // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). + if (N0.getOpcode() == ISD::VSCALE) + if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) { + APInt C0 = N0.getConstantOperandAPInt(0); + APInt C1 = NC1->getAPIntValue(); + return DAG.getVScale(SDLoc(N), VT, C0 * C1); + } + // reassociate mul if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) return RMUL; @@ -3753,13 +3846,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDLoc DL(N); // fold (sdiv c1, c2) -> c1/c2 - ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) + return C; + // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) if (N1C && N1C->getAPIntValue().isMinSignedValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), @@ -3897,12 +3991,10 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDLoc DL(N); // fold (udiv c1, c2) -> c1/c2 - ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, - N0C, N1C)) - return Folded; + if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) + return C; + // fold (udiv X, -1) -> select(X == -1, 1, 0) if (N1C && N1C->getAPIntValue().isAllOnesValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), @@ -3995,11 +4087,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) { SDLoc DL(N); // fold (rem c1, c2) -> c1%c2 - ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) - return Folded; + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) + return C; + // fold (urem X, -1) -> select(X == -1, 0, x) if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), @@ -4095,7 +4186,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. - if (VT.isSimple() && !VT.isVector()) { + if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); @@ -4151,7 +4242,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. - if (VT.isSimple() && !VT.isVector()) { + if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); @@ -4324,6 +4415,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + unsigned Opcode = N->getOpcode(); // fold vector ops if (VT.isVector()) @@ -4331,19 +4423,16 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return FoldedVOp; // fold operation with constant operands. - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1})) + return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. - unsigned Opcode = N->getOpcode(); if (!TLI.isOperationLegal(Opcode, VT) && (N0.isUndef() || DAG.SignBitIsZero(N0)) && (N1.isUndef() || DAG.SignBitIsZero(N1))) { @@ -4832,11 +4921,16 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, return false; // Ensure that this isn't going to produce an unsupported memory access. - if (ShAmt && - !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - LDST->getAddressSpace(), ShAmt / 8, - LDST->getMemOperand()->getFlags())) - return false; + if (ShAmt) { + assert(ShAmt % 8 == 0 && "ShAmt is byte offset"); + const unsigned ByteShAmt = ShAmt / 8; + const Align LDSTAlign = LDST->getAlign(); + const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + LDST->getAddressSpace(), NarrowAlign, + LDST->getMemOperand()->getFlags())) + return false; + } // It's not possible to generate a constant of extended or untyped type. EVT PtrType = LDST->getBasePtr().getValueType(); @@ -5181,17 +5275,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } // fold (and c1, c2) -> c1&c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1})) + return C; + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); + // fold (and x, -1) -> x if (isAllOnesConstant(N1)) return N0; + // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), @@ -5661,6 +5757,48 @@ static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) { return false; } +// Match this pattern: +// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff)) +// And rewrite this to: +// (rotr (bswap A), 16) +static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, + SelectionDAG &DAG, SDNode *N, SDValue N0, + SDValue N1, EVT VT, EVT ShiftAmountTy) { + assert(N->getOpcode() == ISD::OR && VT == MVT::i32 && + "MatchBSwapHWordOrAndAnd: expecting i32"); + if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) + return SDValue(); + // TODO: this is too restrictive; lifting this restriction requires more tests + if (!N0->hasOneUse() || !N1->hasOneUse()) + return SDValue(); + ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1)); + ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1)); + if (!Mask0 || !Mask1) + return SDValue(); + if (Mask0->getAPIntValue() != 0xff00ff00 || + Mask1->getAPIntValue() != 0x00ff00ff) + return SDValue(); + SDValue Shift0 = N0.getOperand(0); + SDValue Shift1 = N1.getOperand(0); + if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL) + return SDValue(); + ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1)); + ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1)); + if (!ShiftAmt0 || !ShiftAmt1) + return SDValue(); + if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8) + return SDValue(); + if (Shift0.getOperand(0) != Shift1.getOperand(0)) + return SDValue(); + + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0)); + SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy); + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5677,6 +5815,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT, + getShiftAmountTy(VT))) + return BSwap; + + // Try again with commuted operands. + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT, + getShiftAmountTy(VT))) + return BSwap; + + // Look for either // (or (bswaphpair), (bswaphpair)) // (or (or (bswaphpair), (and)), (and)) @@ -5882,17 +6030,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or c1, c2) -> c1|c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1})) + return C; + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); + // fold (or x, 0) -> x if (isNullConstant(N1)) return N0; + // fold (or x, -1) -> -1 if (isAllOnesConstant(N1)) return N1; @@ -5927,8 +6077,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { }; if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) { - if (SDValue COR = DAG.FoldConstantArithmetic( - ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, + {N1, N0.getOperand(1)})) { SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(IOR.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR); @@ -6027,6 +6177,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // (add v v) -> (shl v 1) + // TODO: Should this be a general DAG canonicalization? if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && ExtractFrom.getOpcode() == ISD::ADD && ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && @@ -6199,8 +6350,12 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // EltSize & Mask == NegC & Mask // // (because "x & Mask" is a truncation and distributes through subtraction). + // + // We also need to account for a potential truncation of NegOp1 if the amount + // has already been legalized to a shift amount type. APInt Width; - if (Pos == NegOp1) + if ((Pos == NegOp1) || + (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0))) Width = NegC->getAPIntValue(); // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. @@ -6253,19 +6408,91 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, return SDValue(); } +// A subroutine of MatchRotate used once we have found an OR of two opposite +// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces +// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the +// former being preferred if supported. InnerPos and InnerNeg are Pos and +// Neg with outer conversions stripped away. +// TODO: Merge with MatchRotatePosNeg. +SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, + SDValue Neg, SDValue InnerPos, + SDValue InnerNeg, unsigned PosOpcode, + unsigned NegOpcode, const SDLoc &DL) { + EVT VT = N0.getValueType(); + unsigned EltBits = VT.getScalarSizeInBits(); + + // fold (or (shl x0, (*ext y)), + // (srl x1, (*ext (sub 32, y)))) -> + // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y)) + // + // fold (or (shl x0, (*ext (sub 32, y))), + // (srl x1, (*ext y))) -> + // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) + if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) { + bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); + return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, + HasPos ? Pos : Neg); + } + + // Matching the shift+xor cases, we can't easily use the xor'd shift amount + // so for now just use the PosOpcode case if its legal. + // TODO: When can we use the NegOpcode case? + if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) { + auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) { + if (Op.getOpcode() != BinOpc) + return false; + ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1)); + return Cst && (Cst->getAPIntValue() == Imm); + }; + + // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31))) + // -> (fshl x0, x1, y) + if (IsBinOpImm(N1, ISD::SRL, 1) && + IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) && + InnerPos == InnerNeg.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) { + return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos); + } + + // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y)) + // -> (fshr x0, x1, y) + if (IsBinOpImm(N0, ISD::SHL, 1) && + IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) && + InnerNeg == InnerPos.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) { + return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg); + } + + // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y)) + // -> (fshr x0, x1, y) + // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization? + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) && + IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) && + InnerNeg == InnerPos.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) { + return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg); + } + } + + return SDValue(); +} + // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate -// a rot[lr]. +// a rot[lr]. This also matches funnel shift patterns, similar to rotation but +// with different shifted sources. SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return SDValue(); - // The target must have at least one rotate flavor. + // The target must have at least one rotate/funnel flavor. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) + bool HasFSHL = hasOperation(ISD::FSHL, VT); + bool HasFSHR = hasOperation(ISD::FSHR, VT); + if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) return SDValue(); // Check for truncated rotate. @@ -6315,12 +6542,13 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // At this point we've matched or extracted a shift op on each side. - if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return SDValue(); // Not shifting the same value. - if (LHSShift.getOpcode() == RHSShift.getOpcode()) return SDValue(); // Shifts must disagree. + bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); + if (!IsRotate && !(HasFSHL || HasFSHR)) + return SDValue(); // Requires funnel shift support. + // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { std::swap(LHS, RHS); @@ -6336,13 +6564,21 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1) + // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2) + // iff C1+C2 == EltSizeInBits auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; }; if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { - SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, - LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); + SDValue Res; + if (IsRotate && (HasROTL || HasROTR)) + Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt); + else + Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, + RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -6360,10 +6596,10 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); + Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask); } - return Rot; + return Res; } // If there is a mask here, and we have a variable shift, we can't be sure @@ -6386,13 +6622,29 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, - LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (IsRotate && (HasROTL || HasROTR)) { + SDValue TryL = + MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, + RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (TryL) + return TryL; + + SDValue TryR = + MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, + LExtOp0, ISD::ROTR, ISD::ROTL, DL); + if (TryR) + return TryR; + } + + SDValue TryL = + MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt, + LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL); if (TryL) return TryL; - SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, - RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); + SDValue TryR = + MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL); if (TryR) return TryR; @@ -6617,9 +6869,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT)) return SDValue(); - // Check if all the bytes of the combined value we are looking at are stored - // to the same base address. Collect bytes offsets from Base address into - // ByteOffsets. + // Check if all the bytes of the combined value we are looking at are stored + // to the same base address. Collect bytes offsets from Base address into + // ByteOffsets. SDValue CombinedValue; SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX); int64_t FirstOffset = INT64_MAX; @@ -6637,17 +6889,16 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { SDValue Value = Trunc.getOperand(0); if (Value.getOpcode() == ISD::SRL || Value.getOpcode() == ISD::SRA) { - ConstantSDNode *ShiftOffset = - dyn_cast<ConstantSDNode>(Value.getOperand(1)); - // Trying to match the following pattern. The shift offset must be + auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1)); + // Trying to match the following pattern. The shift offset must be // a constant and a multiple of 8. It is the byte offset in "y". - // + // // x = srl y, offset - // i8 z = trunc x + // i8 z = trunc x // store z, ... if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8)) return SDValue(); - + Offset = ShiftOffset->getSExtValue()/8; Value = Value.getOperand(0); } @@ -6692,7 +6943,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { assert(FirstOffset != INT64_MAX && "First byte offset must be set"); assert(FirstStore && "First store must be set"); - // Check if the bytes of the combined value we are looking at match with + // Check if the bytes of the combined value we are looking at match with // either big or little endian value store. Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); if (!IsBigEndian.hasValue()) @@ -7037,20 +7288,22 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDLoc DL(N); if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, DL, VT); + // fold (xor x, undef) -> undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; + // fold (xor c1, c2) -> c1^c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1})) + return C; + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + // fold (xor x, 0) -> x if (isNullConstant(N1)) return N0; @@ -7065,7 +7318,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold !(x cc y) -> (x !cc y) unsigned N0Opcode = N0.getOpcode(); SDValue LHS, RHS, CC; - if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { + if (TLI.isConstTrueVal(N1.getNode()) && + isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType()); if (!LegalOperations || @@ -7078,6 +7332,21 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { case ISD::SELECT_CC: return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + if (N0.hasOneUse()) { + // FIXME Can we handle multiple uses? Could we token factor the chain + // results from the new/old setcc? + SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, + N0.getOperand(0), + N0Opcode == ISD::STRICT_FSETCCS); + CombineTo(N, SetCC); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1)); + recursivelyDeleteUnusedNodes(N0.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + break; + } } } } @@ -7412,15 +7681,29 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { } // fold (rot x, c) -> (rot x, c % BitSize) - // TODO - support non-uniform vector amounts. - if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { - if (Cst->getAPIntValue().uge(Bitsize)) { - uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); - return DAG.getNode(N->getOpcode(), dl, VT, N0, - DAG.getConstant(RotAmt, dl, N1.getValueType())); - } + bool OutOfRange = false; + auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) { + OutOfRange |= C->getAPIntValue().uge(Bitsize); + return true; + }; + if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) { + EVT AmtVT = N1.getValueType(); + SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT); + if (SDValue Amt = + DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits})) + return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt); } + // rot i16 X, 8 --> bswap X + auto *RotAmtC = isConstOrConstSplat(N1); + if (RotAmtC && RotAmtC->getAPIntValue() == 8 && + VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT)) + return DAG.getNode(ISD::BSWAP, dl, VT, N0); + + // Simplify the operands using demanded-bits information. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -7437,12 +7720,11 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { EVT ShiftVT = C1->getValueType(0); bool SameSide = (N->getOpcode() == NextOp); unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; - if (SDValue CombinedShift = - DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) { + if (SDValue CombinedShift = DAG.FoldConstantArithmetic( + CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) { SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( - ISD::SREM, dl, ShiftVT, CombinedShift.getNode(), - BitsizeC.getNode()); + ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC}); return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm); } @@ -7478,8 +7760,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, - N01CV, N1CV)) + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1})) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } @@ -7489,10 +7771,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (shl c1, c2) -> c1<<c2 - // TODO - support non-uniform vector shift amounts. - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1})) + return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -7509,8 +7789,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } - // TODO - support non-uniform vector shift amounts. - if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) @@ -7698,9 +7977,90 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; + // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)). + if (N0.getOpcode() == ISD::VSCALE) + if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) { + auto DL = SDLoc(N); + APInt C0 = N0.getConstantOperandAPInt(0); + APInt C1 = NC1->getAPIntValue(); + return DAG.getVScale(DL, VT, C0 << C1); + } + return SDValue(); } +// Transform a right shift of a multiply into a multiply-high. +// Examples: +// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b) +// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b) +static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && + "SRL or SRA node is required here!"); + + // Check the shift amount. Proceed with the transformation if the shift + // amount is constant. + ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1)); + if (!ShiftAmtSrc) + return SDValue(); + + SDLoc DL(N); + + // The operation feeding into the shift must be a multiply. + SDValue ShiftOperand = N->getOperand(0); + if (ShiftOperand.getOpcode() != ISD::MUL) + return SDValue(); + + // Both operands must be equivalent extend nodes. + SDValue LeftOp = ShiftOperand.getOperand(0); + SDValue RightOp = ShiftOperand.getOperand(1); + bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; + bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; + + if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode()) + return SDValue(); + + EVT WideVT1 = LeftOp.getValueType(); + EVT WideVT2 = RightOp.getValueType(); + (void)WideVT2; + // Proceed with the transformation if the wide types match. + assert((WideVT1 == WideVT2) && + "Cannot have a multiply node with two different operand types."); + + EVT NarrowVT = LeftOp.getOperand(0).getValueType(); + // Check that the two extend nodes are the same type. + if (NarrowVT != RightOp.getOperand(0).getValueType()) + return SDValue(); + + // Only transform into mulh if mulh for the narrow type is cheaper than + // a multiply followed by a shift. This should also check if mulh is + // legal for NarrowVT on the target. + if (!TLI.isMulhCheaperThanMulShift(NarrowVT)) + return SDValue(); + + // Proceed with the transformation if the wide type is twice as large + // as the narrow type. + unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize) + return SDValue(); + + // Check the shift amount with the narrow type size. + // Proceed with the transformation if the shift amount is the width + // of the narrow type. + unsigned ShiftAmt = ShiftAmtSrc->getZExtValue(); + if (ShiftAmt != NarrowVTSize) + return SDValue(); + + // If the operation feeding into the MUL is a sign extend (sext), + // we use mulhs. Othewise, zero extends (zext) use mulhu. + unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU; + + SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), + RightOp.getOperand(0)); + return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1) + : DAG.getZExtOrTrunc(Result, DL, WideVT1)); +} + SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -7724,10 +8084,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (sra c1, c2) -> (sra c1, c2) - // TODO - support non-uniform vector shift amounts. - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1})) + return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -7818,7 +8176,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. // sra (add (shl X, N1C), AddC), N1C --> // sext (add (trunc X to (width - N1C)), AddC') - if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { @@ -7835,7 +8193,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // implementation and/or target-specific overrides (because // non-simple types likely require masking when legalized), but that // restriction may conflict with other transforms. - if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) { + if (TruncVT.isSimple() && isTypeLegal(TruncVT) && + TLI.isTruncateFree(VT, TruncVT)) { SDLoc DL(N); SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). @@ -7878,8 +8237,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // Simplify, based on bits shifted out of the LHS. - // TODO - support non-uniform vector shift amounts. - if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // If the sign bit is known to be zero, switch this to a SRL. @@ -7890,6 +8248,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (SDValue NewSRA = visitShiftByConstant(N)) return NewSRA; + // Try to transform this shift into a multiply-high if + // it matches the appropriate pattern detected in combineShiftToMULH. + if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) + return MULH; + return SDValue(); } @@ -7910,10 +8273,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (srl c1, c2) -> c1 >>u c2 - // TODO - support non-uniform vector shift amounts. - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1})) + return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -8077,8 +8438,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold operands of srl based on knowledge that the low bits are not // demanded. - // TODO - support non-uniform vector shift amounts. - if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); if (N1C && !N1C->isOpaque()) @@ -8118,6 +8478,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } } + // Try to transform this shift into a multiply-high if + // it matches the appropriate pattern detected in combineShiftToMULH. + if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) + return MULH; + return SDValue(); } @@ -8167,6 +8532,45 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, SDLoc(N), ShAmtTy)); + + // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive. + // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive. + // TODO - bigendian support once we have test coverage. + // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine? + // TODO - permit LHS EXTLOAD if extensions are shifted out. + if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() && + !DAG.getDataLayout().isBigEndian()) { + auto *LHS = dyn_cast<LoadSDNode>(N0); + auto *RHS = dyn_cast<LoadSDNode>(N1); + if (LHS && RHS && LHS->isSimple() && RHS->isSimple() && + LHS->getAddressSpace() == RHS->getAddressSpace() && + (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) && + ISD::isNON_EXTLoad(LHS)) { + if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) { + SDLoc DL(RHS); + uint64_t PtrOff = + IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8); + Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff); + bool Fast = false; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + RHS->getAddressSpace(), NewAlign, + RHS->getMemOperand()->getFlags(), &Fast) && + Fast) { + SDValue NewPtr = + DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL); + AddToWorklist(NewPtr.getNode()); + SDValue Load = DAG.getLoad( + VT, DL, RHS->getChain(), NewPtr, + RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign, + RHS->getMemOperand()->getFlags(), RHS->getAAInfo()); + // Replace the old load's chain with the new load's chain. + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1)); + return Load; + } + } + } + } } // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2) @@ -8616,7 +9020,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); - return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2, Flags); } // Otherwise see if we can optimize to a better pattern. @@ -8832,6 +9236,8 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { SDValue N2Elt = N2.getOperand(i); if (N1Elt.isUndef() || N2Elt.isUndef()) continue; + if (N1Elt.getValueType() != N2Elt.getValueType()) + continue; const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); @@ -9402,8 +9808,7 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, N1.getOperand(1)); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.zext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); SDLoc DL0(N0); SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, DAG.getConstant(Mask, DL0, VT)); @@ -9709,8 +10114,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN00->getChain(), LN00->getBasePtr(), LN00->getMemoryVT(), LN00->getMemOperand()); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.sext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits()); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND); @@ -9948,7 +10352,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { SDValue Op = N0.getOperand(0); - Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT); AddToWorklist(Op.getNode()); SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); // Transfer the debug info; the new node is equivalent to N0. @@ -9960,7 +10364,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); AddToWorklist(Op.getNode()); - SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT); // We may safely transfer the debug info describing the truncate node over // to the equivalent and operation. DAG.transferDbgValues(N0, And); @@ -9978,8 +10382,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.zext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(Mask, DL, VT)); @@ -10033,8 +10436,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN00->getChain(), LN00->getBasePtr(), LN00->getMemoryVT(), LN00->getMemOperand()); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.zext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); SDLoc DL(N); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); @@ -10087,23 +10489,22 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // that the element size of the sext'd result matches the element size of // the compare operands. SDLoc DL(N); - SDValue VecOnes = DAG.getConstant(1, DL, VT); if (VT.getSizeInBits() == N00VT.getSizeInBits()) { - // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + // zext(setcc) -> zext_in_reg(vsetcc) for vectors. SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); - return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); + return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType()); } // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then - // truncate/sign extend. + // truncate/any extend followed by zext_in_reg. EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); - return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), - VecOnes); + return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL, + N0.getValueType()); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc @@ -10134,7 +10535,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDLoc DL(N); // Ensure that the shift amount is wide enough for the shifted value. - if (VT.getSizeInBits() >= 256) + if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits()) ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); return DAG.getNode(N0.getOpcode(), DL, VT, @@ -10194,8 +10595,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDLoc DL(N); SDValue X = N0.getOperand(0).getOperand(0); X = DAG.getAnyExtOrTrunc(X, DL, VT); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.zext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(Mask, DL, VT)); } @@ -10355,6 +10755,45 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitAssertAlign(SDNode *N) { + SDLoc DL(N); + + Align AL = cast<AssertAlignSDNode>(N)->getAlign(); + SDValue N0 = N->getOperand(0); + + // Fold (assertalign (assertalign x, AL0), AL1) -> + // (assertalign x, max(AL0, AL1)) + if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0)) + return DAG.getAssertAlign(DL, N0.getOperand(0), + std::max(AL, AAN->getAlign())); + + // In rare cases, there are trivial arithmetic ops in source operands. Sink + // this assert down to source operands so that those arithmetic ops could be + // exposed to the DAG combining. + switch (N0.getOpcode()) { + default: + break; + case ISD::ADD: + case ISD::SUB: { + unsigned AlignShift = Log2(AL); + SDValue LHS = N0.getOperand(0); + SDValue RHS = N0.getOperand(1); + unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros(); + unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros(); + if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) { + if (LHSAlignShift < AlignShift) + LHS = DAG.getAssertAlign(DL, LHS, AL); + if (RHSAlignShift < AlignShift) + RHS = DAG.getAssertAlign(DL, RHS, AL); + return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS); + } + break; + } + } + + return SDValue(); +} + /// If the result of a wider load is shifted to right of N bits and then /// truncated to a narrower type and where N is a multiple of number of bits of /// the narrower type, transform it to a narrower load from address + N / num of @@ -10435,9 +10874,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { } // At this point, we must have a load or else we can't do the transform. - if (!isa<LoadSDNode>(N0)) return SDValue(); - - auto *LN0 = cast<LoadSDNode>(N0); + auto *LN0 = dyn_cast<LoadSDNode>(N0); + if (!LN0) return SDValue(); // Because a SRL must be assumed to *need* to zero-extend the high bits // (as opposed to anyext the high bits), we can't combine the zextload @@ -10456,8 +10894,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDNode *Mask = *(SRL->use_begin()); if (Mask->getOpcode() == ISD::AND && isa<ConstantSDNode>(Mask->getOperand(1))) { - const APInt &ShiftMask = - cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); + const APInt& ShiftMask = Mask->getConstantOperandAPInt(1); if (ShiftMask.isMask()) { EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countTrailingOnes()); @@ -10487,7 +10924,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); // Reducing the width of a volatile load is illegal. For atomics, we may be - // able to reduce the width provided we never widen again. (see D66309) + // able to reduce the width provided we never widen again. (see D66309) if (!LN0->isSimple() || !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); @@ -10568,26 +11005,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); - EVT EVT = cast<VTSDNode>(N1)->getVT(); + EVT ExtVT = cast<VTSDNode>(N1)->getVT(); unsigned VTBits = VT.getScalarSizeInBits(); - unsigned EVTBits = EVT.getScalarSizeInBits(); + unsigned ExtVTBits = ExtVT.getScalarSizeInBits(); + // sext_vector_inreg(undef) = 0 because the top bit will all be the same. if (N0.isUndef()) - return DAG.getUNDEF(VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (sext_in_reg c1) -> c1 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) + if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1)) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, - N0.getOperand(0), N1); + ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), + N1); // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) @@ -10596,8 +11034,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); - if ((N00Bits <= EVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) && + if ((N00Bits <= ExtVTBits || + (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } @@ -10606,7 +11044,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && - N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) { + N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) { if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, @@ -10617,14 +11055,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // iff we are extending the source sign bit. if (N0.getOpcode() == ISD::ZERO_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getScalarValueSizeInBits() == EVTBits && + if (N00.getScalarValueSizeInBits() == ExtVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. - if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1))) - return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); + if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1))) + return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. @@ -10641,11 +11079,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. if (N0.getOpcode() == ISD::SRL) { if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) - if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) { + if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); - if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits) + if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits) return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1)); } @@ -10657,14 +11095,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // extends that the target does support. if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() && N0.hasOneUse()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), EVT, + LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -10674,13 +11112,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && - EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), EVT, + LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -10688,11 +11126,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { } // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) - if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { + if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) { if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false)) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, - BSwap, N1); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } return SDValue(); @@ -10702,8 +11139,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // sext_vector_inreg(undef) = 0 because the top bit will all be the same. if (N0.isUndef()) - return DAG.getUNDEF(VT); + return DAG.getConstant(0, SDLoc(N), VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -10718,8 +11156,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // zext_vector_inreg(undef) = 0 because the top bits will be zero. if (N0.isUndef()) - return DAG.getUNDEF(VT); + return DAG.getConstant(0, SDLoc(N), VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -10795,13 +11234,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDLoc DL(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, DAG.getBitcast(NVT, N0.getOperand(0)), - DAG.getConstant(Index, DL, IndexTy)); + DAG.getVectorIdxConstant(Index, DL)); } } @@ -10839,7 +11277,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Attempt to pre-truncate BUILD_VECTOR sources. if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && - TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) { + TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) && + // Avoid creating illegal types if running after type legalizer. + (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) { SDLoc DL(N); EVT SVT = VT.getScalarType(); SmallVector<SDValue, 8> TruncOps; @@ -10968,10 +11408,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) { SDLoc SL(N); - EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc, - DAG.getConstant(Idx, SL, IdxVT)); + DAG.getVectorIdxConstant(Idx, SL)); } } @@ -11071,14 +11510,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { unsigned LD1Bytes = LD1VT.getStoreSize(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { - unsigned Align = LD1->getAlignment(); - unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + Align Alignment = LD1->getAlign(); + Align NewAlign = DAG.getDataLayout().getABITypeAlign( VT.getTypeForEVT(*DAG.getContext())); - if (NewAlign <= Align && + if (NewAlign <= Alignment && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), - LD1->getPointerInfo(), Align); + LD1->getPointerInfo(), Alignment); } return SDValue(); @@ -11396,6 +11835,20 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { return CombineConsecutiveLoads(N, VT); } +SDValue DAGCombiner::visitFREEZE(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // (freeze (freeze x)) -> (freeze x) + if (N0.getOpcode() == ISD::FREEZE) + return N0; + + // If the input is a constant, return it. + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) + return N0; + + return SDValue(); +} + /// We know that BV is a build_vector node with Constant, ConstantFP or Undef /// operands. DstEltVT indicates the destination element value type. SDValue DAGCombiner:: @@ -11526,7 +11979,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = @@ -11539,13 +11992,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDNodeFlags Flags = N->getFlags(); bool CanFuse = Options.UnsafeFPMath || isContractable(N); + bool CanReassociate = + Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || CanFuse || HasFMAD); // If the addition is not contractable, do not combine. if (!AllowFusionGlobally && !isContractable(N)) return SDValue(); - const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) return SDValue(); @@ -11580,6 +12034,30 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N0, Flags); } + // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E) + // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E) + // This requires reassociation because it changes the order of operations. + SDValue FMA, E; + if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() && + N0.getOperand(2).hasOneUse()) { + FMA = N0; + E = N1; + } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() && + N1.getOperand(2).hasOneUse()) { + FMA = N1; + E = N0; + } + if (FMA && E) { + SDValue A = FMA.getOperand(0); + SDValue B = FMA.getOperand(1); + SDValue C = FMA.getOperand(2).getOperand(0); + SDValue D = FMA.getOperand(2).getOperand(1); + SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags); + return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags); + } + // Look through FP_EXTEND nodes to do more combining. // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) @@ -11613,33 +12091,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // More folding opportunities when target permits. if (Aggressive) { - // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (CanFuse && - N0.getOpcode() == PreferredFusedOpcode && - N0.getOperand(2).getOpcode() == ISD::FMUL && - N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - N1, Flags), Flags); - } - - // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (CanFuse && - N1->getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FMUL && - N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N1.getOperand(0), N1.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - N1.getOperand(2).getOperand(0), - N1.getOperand(2).getOperand(1), - N0, Flags), Flags); - } - - // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&] ( @@ -11743,7 +12194,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = @@ -11763,13 +12214,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (!AllowFusionGlobally && !isContractable(N)) return SDValue(); - const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) return SDValue(); // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros(); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -11780,19 +12231,43 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { }; // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); - } + auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) { + if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0), + XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z), + Flags); + } + return SDValue(); + }; // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - N1.getOperand(0)), - N1.getOperand(1), N0, Flags); + auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) { + if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)), + YZ.getOperand(1), X, Flags); + } + return SDValue(); + }; + + // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (isContractableFMUL(N0) && isContractableFMUL(N1) && + (N0.getNode()->use_size() > N1.getNode()->use_size())) { + // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b)) + if (SDValue V = tryToFoldXSubYZ(N0, N1)) + return V; + // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d))) + if (SDValue V = tryToFoldXYSubZ(N0, N1)) + return V; + } else { + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (SDValue V = tryToFoldXYSubZ(N0, N1)) + return V; + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + if (SDValue V = tryToFoldXSubYZ(N0, N1)) + return V; } // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) @@ -11909,7 +12384,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && isContractableFMUL(N1.getOperand(2)) && - N1->hasOneUse()) { + N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -12062,7 +12537,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // Floating-point multiply-add with intermediate rounding. This can result // in a less precise result due to the changed rounding order. bool HasFMAD = Options.UnsafeFPMath && - (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + (LegalOperations && TLI.isFMADLegal(DAG, N)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) @@ -12139,6 +12614,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -12162,18 +12640,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return NewSel; // fold (fadd A, (fneg B)) -> (fsub A, B) - if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2) - return DAG.getNode( - ISD::FSUB, DL, VT, N0, - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) + if (SDValue NegN1 = TLI.getCheaperNegatedExpression( + N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags); // fold (fadd (fneg A), B) -> (fsub B, A) - if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2) - return DAG.getNode( - ISD::FSUB, DL, VT, N1, - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) + if (SDValue NegN0 = TLI.getCheaperNegatedExpression( + N0, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -12318,6 +12794,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -12352,8 +12831,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) - return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return NegN1; if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } @@ -12371,10 +12851,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) - return DAG.getNode( - ISD::FADD, DL, VT, N0, - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -12385,21 +12864,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } -/// Return true if both inputs are at least as cheap in negated form and at -/// least one input is strictly cheaper in negated form. -bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { - if (char LHSNeg = - TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize)) - if (char RHSNeg = - TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize)) - // Both negated operands are at least as cheap as their counterparts. - // Check to see if at least one is cheaper negated. - if (LHSNeg == 2 || RHSNeg == 2) - return true; - - return false; -} - SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12410,6 +12874,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; + // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. @@ -12471,13 +12938,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, DL, VT, N0); // -N0 * -N1 --> N0 * N1 - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue NegN0 = - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); - } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) @@ -12556,13 +13028,18 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (-N0 * -N1) + N2 --> (N0 * N1) + N2 - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue NegN0 = - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); - } if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) @@ -12648,13 +13125,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z)) // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z)) - if (!TLI.isFNegFree(VT) && - TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations, - ForCodeSize) == 2) - return DAG.getNode(ISD::FNEG, DL, VT, - TLI.getNegatedExpression(SDValue(N, 0), DAG, - LegalOperations, ForCodeSize), - Flags); + if (!TLI.isFNegFree(VT)) + if (SDValue Neg = TLI.getCheaperNegatedExpression( + SDValue(N, 0), DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags); return SDValue(); } @@ -12671,7 +13145,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // that only minsize should restrict this. bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; const SDNodeFlags Flags = N->getFlags(); - if (!UnsafeMath && !Flags.hasAllowReciprocal()) + if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal())) return SDValue(); // Skip if current node is a reciprocal/fneg-reciprocal. @@ -12742,6 +13216,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags Flags = N->getFlags(); + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -12801,37 +13278,62 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } else if (N1.getOpcode() == ISD::FMUL) { // Look through an FMUL. Even though this won't remove the FDIV directly, // it's still worthwhile to get rid of the FSQRT if possible. - SDValue SqrtOp; - SDValue OtherOp; + SDValue Sqrt, Y; if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { - SqrtOp = N1.getOperand(0); - OtherOp = N1.getOperand(1); + Sqrt = N1.getOperand(0); + Y = N1.getOperand(1); } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { - SqrtOp = N1.getOperand(1); - OtherOp = N1.getOperand(0); + Sqrt = N1.getOperand(1); + Y = N1.getOperand(0); } - if (SqrtOp.getNode()) { + if (Sqrt.getNode()) { + // If the other multiply operand is known positive, pull it into the + // sqrt. That will eliminate the division if we convert to an estimate: + // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z) + // TODO: Also fold the case where A == Z (fabs is missing). + if (Flags.hasAllowReassociation() && N1.hasOneUse() && + N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() && + Y.getOpcode() == ISD::FABS && Y.hasOneUse()) { + SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0), + Y.getOperand(0), Flags); + SDValue AAZ = + DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags); + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags); + + // Estimate creation failed. Clean up speculatively created nodes. + recursivelyDeleteUnusedNodes(AAZ.getNode()); + } + // We found a FSQRT, so try to make this fold: - // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) - if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { - RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); - AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) + if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { + SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags); + AddToWorklist(Div.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags); } } } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) - return RV; + if (Options.NoInfsFPMath || Flags.hasNoInfs()) + if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) + return RV; } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (isCheaperToUseNegatedFPOps(N0, N1)) - return DAG.getNode( - ISD::FDIV, SDLoc(N), VT, - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags); return SDValue(); } @@ -12842,6 +13344,10 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + SDNodeFlags Flags = N->getFlags(); + + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) @@ -12855,8 +13361,12 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue DAGCombiner::visitFSQRT(SDNode *N) { SDNodeFlags Flags = N->getFlags(); - if (!DAG.getTarget().Options.UnsafeFPMath && - !Flags.hasApproximateFuncs()) + const TargetOptions &Options = DAG.getTarget().Options; + + // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as: + // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN + if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) || + (!Options.NoInfsFPMath && !Flags.hasNoInfs())) return SDValue(); SDValue N0 = N->getOperand(0); @@ -13068,33 +13578,24 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { } // The next optimizations are desirable only if SELECT_CC can be lowered. - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { - // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) - if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && - !VT.isVector() && - (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { - SDLoc DL(N); - SDValue Ops[] = - { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), - N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); - } + // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0) + if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && + !VT.isVector() && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { + SDLoc DL(N); + return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT), + DAG.getConstantFP(0.0, DL, VT)); + } - // fold (sint_to_fp (zext (setcc x, y, cc))) -> - // (select_cc x, y, 1.0, 0.0,, cc) - if (N0.getOpcode() == ISD::ZERO_EXTEND && - N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && - (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { - SDLoc DL(N); - SDValue Ops[] = - { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), - DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), - N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); - } + // fold (sint_to_fp (zext (setcc x, y, cc))) -> + // (select (setcc x, y, cc), 1.0, 0.0) + if (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { + SDLoc DL(N); + return DAG.getSelect(DL, VT, N0.getOperand(0), + DAG.getConstantFP(1.0, DL, VT), + DAG.getConstantFP(0.0, DL, VT)); } if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) @@ -13128,19 +13629,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desirable only if SELECT_CC can be lowered. - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { - // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) - if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && - (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { - SDLoc DL(N); - SDValue Ops[] = - { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), - N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); - } + // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0) + if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { + SDLoc DL(N); + return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT), + DAG.getConstantFP(0.0, DL, VT)); } if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) @@ -13385,12 +13879,14 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); - if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) - return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + if (SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize)) + return NegN0; - // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is - // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it - // was called from a context with a nsz flag if the input fsub does not. + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 + // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't + // know it was called from a context with a nsz flag if the input fsub does + // not. if (N0.getOpcode() == ISD::FSUB && (DAG.getTarget().Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) { @@ -13546,8 +14042,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { } if (N1.hasOneUse()) { + // rebuildSetCC calls visitXor which may change the Chain when there is a + // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes. + HandleSDNode ChainHandle(Chain); if (SDValue NewN1 = rebuildSetCC(N1)) - return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, + ChainHandle.getValue(), NewN1, N2); } return SDValue(); @@ -13599,8 +14099,8 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { } } - // Transform br(xor(x, y)) -> br(x != y) - // Transform br(xor(xor(x,y), 1)) -> br (x == y) + // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne)) + // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq)) if (N.getOpcode() == ISD::XOR) { // Because we may call this on a speculatively constructed // SimplifiedSetCC Node, we need to simplify this node first. @@ -13624,16 +14124,17 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { if (N.getOpcode() != ISD::XOR) return N; - SDNode *TheXor = N.getNode(); - - SDValue Op0 = TheXor->getOperand(0); - SDValue Op1 = TheXor->getOperand(1); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; - if (isOneConstant(Op0) && Op0.hasOneUse() && - Op0.getOpcode() == ISD::XOR) { - TheXor = Op0.getNode(); + // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq)) + if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR && + Op0.getValueType() == MVT::i1) { + N = Op0; + Op0 = N->getOperand(0); + Op1 = N->getOperand(1); Equal = true; } @@ -13641,7 +14142,7 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); // Replace the uses of XOR with SETCC - return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, + return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); } } @@ -14001,118 +14502,142 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return true; } -/// Try to combine a load/store with a add/sub of the base pointer node into a -/// post-indexed load/store. The transformation folded the add/subtract into the -/// new indexed load/store effectively and all of its uses are redirected to the -/// new load/store. -bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { - if (Level < AfterLegalizeDAG) +static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, + SDValue &BasePtr, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG, + const TargetLowering &TLI) { + if (PtrUse == N || + (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB)) return false; - bool IsLoad = true; - bool IsMasked = false; - SDValue Ptr; - if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, - Ptr, TLI)) + if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG)) return false; - if (Ptr.getNode()->hasOneUse()) + // Don't create a indexed load / store with zero offset. + if (isNullConstant(Offset)) return false; - for (SDNode *Op : Ptr.getNode()->uses()) { - if (Op == N || - (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) - continue; + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + return false; - SDValue BasePtr; - SDValue Offset; - ISD::MemIndexedMode AM = ISD::UNINDEXED; - if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { - // Don't create a indexed load / store with zero offset. - if (isNullConstant(Offset)) - continue; + SmallPtrSet<const SDNode *, 32> Visited; + for (SDNode *Use : BasePtr.getNode()->uses()) { + if (Use == Ptr.getNode()) + continue; - // Try turning it into a post-indexed load / store except when - // 1) All uses are load / store ops that use it as base ptr (and - // it may be folded as addressing mmode). - // 2) Op must be independent of N, i.e. Op is neither a predecessor - // nor a successor of N. Otherwise, if Op is folded that would - // create a cycle. + // No if there's a later user which could perform the index instead. + if (isa<MemSDNode>(Use)) { + bool IsLoad = true; + bool IsMasked = false; + SDValue OtherPtr; + if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad, + IsMasked, OtherPtr, TLI)) { + SmallVector<const SDNode *, 2> Worklist; + Worklist.push_back(Use); + if (SDNode::hasPredecessorHelper(N, Visited, Worklist)) + return false; + } + } - if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) - continue; + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { + for (SDNode *UseUse : Use->uses()) + if (canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + return false; + } + } + return true; +} - // Check for #1. - bool TryNext = false; - for (SDNode *Use : BasePtr.getNode()->uses()) { - if (Use == Ptr.getNode()) - continue; +static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, + bool &IsMasked, SDValue &Ptr, + SDValue &BasePtr, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG, + const TargetLowering &TLI) { + if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, + IsMasked, Ptr, TLI) || + Ptr.getNode()->hasOneUse()) + return nullptr; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr (and + // it may be folded as addressing mmode). + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + for (SDNode *Op : Ptr->uses()) { + // Check for #1. + if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI)) + continue; - // If all the uses are load / store addresses, then don't do the - // transformation. - if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { - bool RealUse = false; - for (SDNode *UseUse : Use->uses()) { - if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) - RealUse = true; - } + // Check for #2. + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 8> Worklist; + // Ptr is predecessor to both N and Op. + Visited.insert(Ptr.getNode()); + Worklist.push_back(N); + Worklist.push_back(Op); + if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && + !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) + return Op; + } + return nullptr; +} - if (!RealUse) { - TryNext = true; - break; - } - } - } +/// Try to combine a load/store with a add/sub of the base pointer node into a +/// post-indexed load/store. The transformation folded the add/subtract into the +/// new indexed load/store effectively and all of its uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; - if (TryNext) - continue; + bool IsLoad = true; + bool IsMasked = false; + SDValue Ptr; + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr, + Offset, AM, DAG, TLI); + if (!Op) + return false; - // Check for #2. - SmallPtrSet<const SDNode *, 32> Visited; - SmallVector<const SDNode *, 8> Worklist; - // Ptr is predecessor to both N and Op. - Visited.insert(Ptr.getNode()); - Worklist.push_back(N); - Worklist.push_back(Op); - if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && - !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { - SDValue Result; - if (!IsMasked) - Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, - Offset, AM) - : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), + SDValue Result; + if (!IsMasked) + Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM) + : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); + else + Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM) + : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); - else - Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), - BasePtr, Offset, AM) - : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), - BasePtr, Offset, AM); - ++PostIndexedNodes; - ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); - dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); - dbgs() << '\n'); - WorklistRemover DeadNodes(*this); - if (IsLoad) { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); - } else { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); - } - - // Finally, since the node is now dead, remove it from the graph. - deleteAndRecombine(N); - - // Replace the uses of Use with uses of the updated base value. - DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), - Result.getValue(IsLoad ? 1 : 0)); - deleteAndRecombine(Op); - return true; - } - } + ++PostIndexedNodes; + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); + dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + if (IsLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } - return false; + // Finally, since the node is now dead, remove it from the graph. + deleteAndRecombine(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), + Result.getValue(IsLoad ? 1 : 0)); + deleteAndRecombine(Op); + return true; } /// Return the base-pointer arithmetic from an indexed \p LD. @@ -14361,11 +14886,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) { - if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { + if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { + if (*Alignment > LD->getAlign() && + isAligned(*Alignment, LD->getSrcValueOffset())) { SDValue NewLoad = DAG.getExtLoad( LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), LD->getMemoryVT(), Align, + LD->getPointerInfo(), LD->getMemoryVT(), *Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); // NewLoad will always be N as we are only refining the alignment assert(NewLoad.getNode() == N); @@ -14562,11 +15088,11 @@ struct LoadedSlice { } /// Get the alignment of the load used for this slice. - unsigned getAlignment() const { - unsigned Alignment = Origin->getAlignment(); + Align getAlign() const { + Align Alignment = Origin->getAlign(); uint64_t Offset = getOffsetFromBase(); if (Offset != 0) - Alignment = MinAlign(Alignment, Alignment + Offset); + Alignment = commonAlignment(Alignment, Alignment.value() + Offset); return Alignment; } @@ -14662,8 +15188,8 @@ struct LoadedSlice { // Create the load for the slice. SDValue LastInst = DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, - Origin->getPointerInfo().getWithOffset(Offset), - getAlignment(), Origin->getMemOperand()->getFlags()); + Origin->getPointerInfo().getWithOffset(Offset), getAlign(), + Origin->getMemOperand()->getFlags()); // If the final type is not the same as the loaded type, this means that // we have to pad with zero. Create a zero extend for that. EVT FinalType = Inst->getValueType(0); @@ -14704,10 +15230,10 @@ struct LoadedSlice { // Check if it will be merged with the load. // 1. Check the alignment constraint. - unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( + Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign( ResVT.getTypeForEVT(*DAG->getContext())); - if (RequiredAlignment > getAlignment()) + if (RequiredAlignment > getAlign()) return false; // 2. Check that the load is a legal operation for that type. @@ -14793,14 +15319,14 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, continue; // Check if the target supplies paired loads for this type. - unsigned RequiredAlignment = 0; + Align RequiredAlignment; if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { // move to the next pair, this type is hopeless. Second = nullptr; continue; } // Check if we meet the alignment requirement. - if (RequiredAlignment > First->getAlignment()) + if (First->getAlign() < RequiredAlignment) continue; // Check that both loads are next to each other in memory. @@ -14873,6 +15399,12 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { !LD->getValueType(0).isInteger()) return false; + // The algorithm to split up a load of a scalable vector into individual + // elements currently requires knowing the length of the loaded type, + // so will need adjusting to work on scalable vectors. + if (LD->getValueType(0).isScalableVector()) + return false; + // Keep track of already used bits to detect overlapping values. // In that case, we will just abort the transformation. APInt UsedBits(LD->getValueSizeInBits(0), 0); @@ -15117,7 +15649,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { // Y is known to provide just those bytes. If so, we try to replace the // load + replace + store sequence with a single (narrower) store, which makes // the load dead. - if (Opc == ISD::OR) { + if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) { std::pair<unsigned, unsigned> MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) @@ -15133,6 +15665,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return NewST; } + if (!EnableReduceLoadOpStoreWidth) + return SDValue(); + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); @@ -15186,9 +15721,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; - unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) + if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD)); @@ -15234,17 +15769,24 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { ST->getPointerInfo().getAddrSpace() != 0) return SDValue(); - EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + TypeSize VTSize = VT.getSizeInBits(); + + // We don't know the size of scalable types at compile time so we cannot + // create an integer of the equivalent size. + if (VTSize.isScalable()) + return SDValue(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) return SDValue(); - unsigned LDAlign = LD->getAlignment(); - unsigned STAlign = ST->getAlignment(); + Align LDAlign = LD->getAlign(); + Align STAlign = ST->getAlign(); Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); + Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); @@ -15361,7 +15903,7 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, return DAG.getTokenFactor(StoreDL, Chains); } -bool DAGCombiner::MergeStoresOfConstantsOrVecElts( +bool DAGCombiner::mergeStoresOfConstantsOrVecElts( SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector, bool UseTrunc) { // Make sure we have something to merge. @@ -15535,14 +16077,12 @@ void DAGCombiner::getStoreMergeCandidates( if (BasePtr.getBase().isUndef()) return; - bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val); - bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); - bool IsLoadSrc = isa<LoadSDNode>(Val); + StoreSource StoreSrc = getStoreSource(Val); + assert(StoreSrc != StoreSource::Unknown && "Expected known source for store"); BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. EVT LoadVT; - if (IsLoadSrc) { + if (StoreSrc == StoreSource::Load) { auto *Ld = cast<LoadSDNode>(Val); LBasePtr = BaseIndexOffset::match(Ld, DAG); LoadVT = Ld->getMemoryVT(); @@ -15570,7 +16110,7 @@ void DAGCombiner::getStoreMergeCandidates( // Allow merging constants of different types as integers. bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) : Other->getMemoryVT() != MemVT; - if (IsLoadSrc) { + if (StoreSrc == StoreSource::Load) { if (NoTypeMatch) return false; // The Load's Base Ptr must also match @@ -15594,13 +16134,13 @@ void DAGCombiner::getStoreMergeCandidates( } else return false; } - if (IsConstantSrc) { + if (StoreSrc == StoreSource::Constant) { if (NoTypeMatch) return false; if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC))) return false; } - if (IsExtractVecSrc) { + if (StoreSrc == StoreSource::Extract) { // Do not merge truncated stores here. if (Other->isTruncatingStore()) return false; @@ -15741,77 +16281,22 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( return true; } -bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) - return false; - - EVT MemVT = St->getMemoryVT(); - int64_t ElementSizeBytes = MemVT.getStoreSize(); - unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; - - if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) - return false; - - bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute( - Attribute::NoImplicitFloat); - - // This function cannot currently deal with non-byte-sized memory sizes. - if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits()) - return false; - - if (!MemVT.isSimple()) - return false; - - // Perform an early exit check. Do not bother looking at stored values that - // are not constants, loads, or extracted vector elements. - SDValue StoredVal = peekThroughBitcasts(St->getValue()); - bool IsLoadSrc = isa<LoadSDNode>(StoredVal); - bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || - isa<ConstantFPSDNode>(StoredVal); - bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || - StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); - bool IsNonTemporalStore = St->isNonTemporal(); - bool IsNonTemporalLoad = - IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal(); - - if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) - return false; - - SmallVector<MemOpLink, 8> StoreNodes; - SDNode *RootNode; - // Find potential store merge candidates by searching through chain sub-DAG - getStoreMergeCandidates(St, StoreNodes, RootNode); - - // Check if there is anything to merge. - if (StoreNodes.size() < 2) - return false; - - // Sort the memory operands according to their distance from the - // base pointer. - llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase; - }); - - // Store Merge attempts to merge the lowest stores. This generally - // works out as if successful, as the remaining stores are checked - // after the first collection of stores is merged. However, in the - // case that a non-mergeable store is found first, e.g., {p[-2], - // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent - // mergeable cases. To prevent this, we prune such stores from the - // front of StoreNodes here. - - bool RV = false; - while (StoreNodes.size() > 1) { +unsigned +DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes, + int64_t ElementSizeBytes) const { + while (true) { + // Find a store past the width of the first store. size_t StartIdx = 0; while ((StartIdx + 1 < StoreNodes.size()) && StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != - StoreNodes[StartIdx + 1].OffsetFromBase) + StoreNodes[StartIdx + 1].OffsetFromBase) ++StartIdx; // Bail if we don't have enough candidates to merge. if (StartIdx + 1 >= StoreNodes.size()) - return RV; + return 0; + // Trim stores that overlapped with the first store. if (StartIdx) StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); @@ -15827,302 +16312,345 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { break; NumConsecutiveStores = i + 1; } + if (NumConsecutiveStores > 1) + return NumConsecutiveStores; - if (NumConsecutiveStores < 2) { - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumConsecutiveStores); - continue; - } - - // The node with the lowest store address. - LLVMContext &Context = *DAG.getContext(); - const DataLayout &DL = DAG.getDataLayout(); - - // Store the constants into memory as one consecutive store. - if (IsConstantSrc) { - while (NumConsecutiveStores >= 2) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned LastLegalType = 1; - unsigned LastLegalVectorType = 1; - bool LastIntegerTrunc = false; - bool NonZero = false; - unsigned FirstZeroAfterNonZero = NumConsecutiveStores; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue StoredVal = ST->getValue(); - bool IsElementZero = false; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) - IsElementZero = C->isNullValue(); - else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) - IsElementZero = C->getConstantFPValue()->isNullValue(); - if (IsElementZero) { - if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) - FirstZeroAfterNonZero = i; - } - NonZero |= !IsElementZero; - - // Find a legal type for the constant store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast = false; + // There are no consecutive stores at the start of the list. + // Remove the first store and try again. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + } +} - // Break early when size is too large to be legal. - if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) - break; +bool DAGCombiner::tryStoreMergeOfConstants( + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores, + EVT MemVT, SDNode *RootNode, bool AllowVectors) { + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + int64_t ElementSizeBytes = MemVT.getStoreSize(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + bool MadeChange = false; + + // Store the constants into memory as one consecutive store. + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned LastLegalType = 1; + unsigned LastLegalVectorType = 1; + bool LastIntegerTrunc = false; + bool NonZero = false; + unsigned FirstZeroAfterNonZero = NumConsecutiveStores; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = ST->getValue(); + bool IsElementZero = false; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) + IsElementZero = C->isNullValue(); + else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) + IsElementZero = C->getConstantFPValue()->isNullValue(); + if (IsElementZero) { + if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) + FirstZeroAfterNonZero = i; + } + NonZero |= !IsElementZero; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), &IsFast) && - IsFast) { - LastIntegerTrunc = false; - LastLegalType = i + 1; - // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValTy = - TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), - &IsFast) && - IsFast) { - LastIntegerTrunc = true; - LastLegalType = i + 1; - } - } + // Find a legal type for the constant store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast = false; - // We only use vectors if the constant is known to be zero or the - // target allows it and the function is not marked with the - // noimplicitfloat attribute. - if ((!NonZero || - TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && - !NoVectors) { - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess( - Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && - IsFast) - LastLegalVectorType = i + 1; - } - } + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; - bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; - unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - - // Check if we found a legal integer type that creates a meaningful - // merge. - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved or we've dropped a non-zero value. Drop as many - // candidates as we can here. - unsigned NumSkip = 1; - while ( - (NumSkip < NumConsecutiveStores) && - (NumSkip < FirstZeroAfterNonZero) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - NumConsecutiveStores -= NumSkip; - continue; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) { + LastIntegerTrunc = false; + LastLegalType = i + 1; + // Or check whether a truncstore is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) { + LastIntegerTrunc = true; + LastLegalType = i + 1; } + } - // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, - RootNode)) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; - continue; - } + // We only use vectors if the constant is known to be zero or the + // target allows it and the function is not marked with the + // noimplicitfloat attribute. + if ((!NonZero || + TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && + AllowVectors) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) + LastLegalVectorType = i + 1; + } + } - RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, - UseVector, LastIntegerTrunc); + bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + + // Check if we found a legal integer type that creates a meaningful + // merge. + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved or we've dropped a non-zero value. Drop as many + // candidates as we can here. + unsigned NumSkip = 1; + while ((NumSkip < NumConsecutiveStores) && + (NumSkip < FirstZeroAfterNonZero) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - // Remove merged stores for next iteration. - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; - } + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; continue; } - // When extracting multiple vector elements, try to store them - // in one vector store rather than a sequence of scalar stores. - if (IsExtractVecSrc) { - // Loop on Consecutive Stores on success. - while (NumConsecutiveStores >= 2) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned NumStoresToMerge = 1; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast; - - // Break early when size is too large to be legal. - if (Ty.getSizeInBits() > MaximumLegalStoreInBits) - break; + MadeChange |= mergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); - if (TLI.isTypeLegal(Ty) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, - *FirstInChain->getMemOperand(), &IsFast) && - IsFast) - NumStoresToMerge = i + 1; - } + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } + return MadeChange; +} - // Check if we found a legal integer type creating a meaningful - // merge. - if (NumStoresToMerge < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved. Drop as many candidates as we can here. - unsigned NumSkip = 1; - while ( - (NumSkip < NumConsecutiveStores) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - NumConsecutiveStores -= NumSkip; - continue; - } +bool DAGCombiner::tryStoreMergeOfExtracts( + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores, + EVT MemVT, SDNode *RootNode) { + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + bool MadeChange = false; + + // Loop on Consecutive Stores on success. + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned NumStoresToMerge = 1; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast = false; - // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies( - StoreNodes, NumStoresToMerge, RootNode)) { - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumStoresToMerge); - NumConsecutiveStores -= NumStoresToMerge; - continue; - } + // Break early when size is too large to be legal. + if (Ty.getSizeInBits() > MaximumLegalStoreInBits) + break; - RV |= MergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumStoresToMerge, false, true, false); + if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) + NumStoresToMerge = i + 1; + } + + // Check if we found a legal integer type creating a meaningful + // merge. + if (NumStoresToMerge < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved. Drop as many candidates as we can here. + unsigned NumSkip = 1; + while ((NumSkip < NumConsecutiveStores) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumStoresToMerge); - NumConsecutiveStores -= NumStoresToMerge; - } + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; continue; } - // Below we handle the case of multiple consecutive stores that - // come from multiple consecutive loads. We merge them into a single - // wide load and a single wide store. + MadeChange |= mergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumStoresToMerge, false, true, false); - // Look for load nodes which are used by the stored values. - SmallVector<MemOpLink, 8> LoadNodes; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; + } + return MadeChange; +} - // Find acceptable loads. Loads need to have the same chain (token factor), - // must not be zext, volatile, indexed, and they must be consecutive. - BaseIndexOffset LdBasePtr; +bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumConsecutiveStores, EVT MemVT, + SDNode *RootNode, bool AllowVectors, + bool IsNonTemporalStore, + bool IsNonTemporalLoad) { + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + int64_t ElementSizeBytes = MemVT.getStoreSize(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + bool MadeChange = false; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue Val = peekThroughBitcasts(St->getValue()); - LoadSDNode *Ld = cast<LoadSDNode>(Val); - - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); - // If this is not the first ptr that we check. - int64_t LdOffset = 0; - if (LdBasePtr.getBase().getNode()) { - // The base ptr must be the same. - if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) - break; - } else { - // Check that all other base pointers are the same as this one. - LdBasePtr = LdPtr; - } + int64_t StartAddress = StoreNodes[0].OffsetFromBase; - // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdOffset)); + // Look for load nodes which are used by the stored values. + SmallVector<MemOpLink, 8> LoadNodes; + + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + BaseIndexOffset LdBasePtr; + + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue Val = peekThroughBitcasts(St->getValue()); + LoadSDNode *Ld = cast<LoadSDNode>(Val); + + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); + // If this is not the first ptr that we check. + int64_t LdOffset = 0; + if (LdBasePtr.getBase().getNode()) { + // The base ptr must be the same. + if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr; } - while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdOffset)); + } + + while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { + Align RequiredAlignment; + bool NeedRotate = false; + if (LoadNodes.size() == 2) { // If we have load/store pair instructions and we only have two values, // don't bother merging. - unsigned RequiredAlignment; - if (LoadNodes.size() == 2 && - TLI.hasPairedLoad(MemVT, RequiredAlignment) && - StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { + if (TLI.hasPairedLoad(MemVT, RequiredAlignment) && + StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); break; } - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); - unsigned FirstLoadAlign = FirstLoad->getAlignment(); - - // Scan the memory operations on the chain and find the first - // non-consecutive load memory address. These variables hold the index in - // the store node array. - - unsigned LastConsecutiveLoad = 1; - - // This variable refers to the size and not index in the array. - unsigned LastLegalVectorType = 1; - unsigned LastLegalIntegerType = 1; - bool isDereferenceable = true; - bool DoIntegerTruncate = false; - StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = FirstLoad->getChain(); - for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads must share the same chain. - if (LoadNodes[i].MemNode->getChain() != FirstChain) - break; + // If the loads are reversed, see if we can rotate the halves into place. + int64_t Offset0 = LoadNodes[0].OffsetFromBase; + int64_t Offset1 = LoadNodes[1].OffsetFromBase; + EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2); + if (Offset0 - Offset1 == ElementSizeBytes && + (hasOperation(ISD::ROTL, PairVT) || + hasOperation(ISD::ROTR, PairVT))) { + std::swap(LoadNodes[0], LoadNodes[1]); + NeedRotate = true; + } + } + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); - int64_t CurrAddress = LoadNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - LastConsecutiveLoad = i; + // Scan the memory operations on the chain and find the first + // non-consecutive load memory address. These variables hold the index in + // the store node array. + + unsigned LastConsecutiveLoad = 1; + + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 1; + unsigned LastLegalIntegerType = 1; + bool isDereferenceable = true; + bool DoIntegerTruncate = false; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue LoadChain = FirstLoad->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads must share the same chain. + if (LoadNodes[i].MemNode->getChain() != LoadChain) + break; - if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) - isDereferenceable = false; + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) + isDereferenceable = false; - // Break early when size is too large to be legal. - if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) - break; + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - bool IsFastSt, IsFastLd; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstLoad->getMemOperand(), &IsFastLd) && - IsFastLd) { - LastLegalVectorType = i + 1; - } + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; + + bool IsFastSt = false; + bool IsFastLd = false; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && + IsFastLd) { + LastLegalVectorType = i + 1; + } - // Find a legal type for the integer store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(Context, SizeInBits); - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + // Find a legal type for the integer store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && + IsFastLd) { + LastLegalIntegerType = i + 1; + DoIntegerTruncate = false; + // Or check whether a truncstore and extload is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && @@ -16130,149 +16658,225 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; - DoIntegerTruncate = false; - // Or check whether a truncstore and extload is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); - if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), - &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstLoad->getMemOperand(), &IsFastLd) && - IsFastLd) { - LastLegalIntegerType = i + 1; - DoIntegerTruncate = true; - } + DoIntegerTruncate = true; } } + } - // Only use vector types if the vector type is larger than the integer - // type. If they are the same, use integers. - bool UseVectorTy = - LastLegalVectorType > LastLegalIntegerType && !NoVectors; - unsigned LastLegalType = - std::max(LastLegalVectorType, LastLegalIntegerType); - - // We add +1 here because the LastXXX variables refer to location while - // the NumElem refers to array/index size. - unsigned NumElem = - std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); - NumElem = std::min(LastLegalType, NumElem); - - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have is if the alignment or either - // the load or store has improved. Drop as many candidates as we - // can here. - unsigned NumSkip = 1; - while ((NumSkip < LoadNodes.size()) && - (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); - NumConsecutiveStores -= NumSkip; - continue; - } + // Only use vector types if the vector type is larger than the integer + // type. If they are the same, use integers. + bool UseVectorTy = + LastLegalVectorType > LastLegalIntegerType && AllowVectors; + unsigned LastLegalType = + std::max(LastLegalVectorType, LastLegalIntegerType); + + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); + NumElem = std::min(LastLegalType, NumElem); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have is if the alignment or either + // the load or store has improved. Drop as many candidates as we + // can here. + unsigned NumSkip = 1; + while ((NumSkip < LoadNodes.size()) && + (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, - RootNode)) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; - continue; - } + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; + } - // Find if it is better to use vectors or integers to load and store - // to memory. - EVT JointMemOpVT; - if (UseVectorTy) { - // Find a legal type for the vector store. - unsigned Elts = NumElem * NumMemElts; - JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - } else { - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + // Find a legal type for the vector store. + unsigned Elts = NumElem * NumMemElts; + JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + } else { + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + } + + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); + + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. + + SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); + AddToWorklist(NewStoreChain.getNode()); + + MachineMemOperand::Flags LdMMOFlags = + isDereferenceable ? MachineMemOperand::MODereferenceable + : MachineMemOperand::MONone; + if (IsNonTemporalLoad) + LdMMOFlags |= MachineMemOperand::MONonTemporal; + + MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore + ? MachineMemOperand::MONonTemporal + : MachineMemOperand::MONone; + + SDValue NewLoad, NewStore; + if (UseVectorTy || !DoIntegerTruncate) { + NewLoad = DAG.getLoad( + JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags); + SDValue StoreOp = NewLoad; + if (NeedRotate) { + unsigned LoadWidth = ElementSizeBytes * 8 * 2; + assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && + "Unexpected type for rotate-able load pair"); + SDValue RotAmt = + DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL); + // Target can convert to the identical ROTR if it does not have ROTL. + StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt); } + NewStore = DAG.getStore( + NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); + } else { // This must be the truncstore/extload case + EVT ExtendedTy = + TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); + NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, + FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), JointMemOpVT, + FirstLoadAlign, LdMMOFlags); + NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), JointMemOpVT, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } + + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + } + + // Replace all stores with the new store. Recursively remove corresponding + // values if they are no longer used. + for (unsigned i = 0; i < NumElem; ++i) { + SDValue Val = StoreNodes[i].MemNode->getOperand(1); + CombineTo(StoreNodes[i].MemNode, NewStore); + if (Val.getNode()->use_empty()) + recursivelyDeleteUnusedNodes(Val.getNode()); + } + + MadeChange = true; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } + return MadeChange; +} + +bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) + return false; - SDLoc LoadDL(LoadNodes[0].MemNode); - SDLoc StoreDL(StoreNodes[0].MemNode); - - // The merged loads are required to have the same incoming chain, so - // using the first's chain is acceptable. - - SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); - AddToWorklist(NewStoreChain.getNode()); - - MachineMemOperand::Flags LdMMOFlags = - isDereferenceable ? MachineMemOperand::MODereferenceable - : MachineMemOperand::MONone; - if (IsNonTemporalLoad) - LdMMOFlags |= MachineMemOperand::MONonTemporal; - - MachineMemOperand::Flags StMMOFlags = - IsNonTemporalStore ? MachineMemOperand::MONonTemporal - : MachineMemOperand::MONone; - - SDValue NewLoad, NewStore; - if (UseVectorTy || !DoIntegerTruncate) { - NewLoad = - DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), - FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), - FirstLoadAlign, LdMMOFlags); - NewStore = DAG.getStore( - NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); - } else { // This must be the truncstore/extload case - EVT ExtendedTy = - TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); - NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, - FirstLoad->getChain(), FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), JointMemOpVT, - FirstLoadAlign, LdMMOFlags); - NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - JointMemOpVT, FirstInChain->getAlignment(), - FirstInChain->getMemOperand()->getFlags()); - } + // TODO: Extend this function to merge stores of scalable vectors. + // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8> + // store since we know <vscale x 16 x i8> is exactly twice as large as + // <vscale x 8 x i8>). Until then, bail out for scalable vectors. + EVT MemVT = St->getMemoryVT(); + if (MemVT.isScalableVector()) + return false; + if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) + return false; - // Transfer chain users from old loads to the new load. - for (unsigned i = 0; i < NumElem; ++i) { - LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - } + // This function cannot currently deal with non-byte-sized memory sizes. + int64_t ElementSizeBytes = MemVT.getStoreSize(); + if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits()) + return false; - // Replace the all stores with the new store. Recursively remove - // corresponding value if its no longer used. - for (unsigned i = 0; i < NumElem; ++i) { - SDValue Val = StoreNodes[i].MemNode->getOperand(1); - CombineTo(StoreNodes[i].MemNode, NewStore); - if (Val.getNode()->use_empty()) - recursivelyDeleteUnusedNodes(Val.getNode()); - } + // Do not bother looking at stored values that are not constants, loads, or + // extracted vector elements. + SDValue StoredVal = peekThroughBitcasts(St->getValue()); + const StoreSource StoreSrc = getStoreSource(StoredVal); + if (StoreSrc == StoreSource::Unknown) + return false; - RV = true; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; + SmallVector<MemOpLink, 8> StoreNodes; + SDNode *RootNode; + // Find potential store merge candidates by searching through chain sub-DAG + getStoreMergeCandidates(St, StoreNodes, RootNode); + + // Check if there is anything to merge. + if (StoreNodes.size() < 2) + return false; + + // Sort the memory operands according to their distance from the + // base pointer. + llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + }); + + bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + bool IsNonTemporalStore = St->isNonTemporal(); + bool IsNonTemporalLoad = StoreSrc == StoreSource::Load && + cast<LoadSDNode>(StoredVal)->isNonTemporal(); + + // Store Merge attempts to merge the lowest stores. This generally + // works out as if successful, as the remaining stores are checked + // after the first collection of stores is merged. However, in the + // case that a non-mergeable store is found first, e.g., {p[-2], + // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent + // mergeable cases. To prevent this, we prune such stores from the + // front of StoreNodes here. + bool MadeChange = false; + while (StoreNodes.size() > 1) { + unsigned NumConsecutiveStores = + getConsecutiveStores(StoreNodes, ElementSizeBytes); + // There are no more stores in the list to examine. + if (NumConsecutiveStores == 0) + return MadeChange; + + // We have at least 2 consecutive stores. Try to merge them. + assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores"); + switch (StoreSrc) { + case StoreSource::Constant: + MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores, + MemVT, RootNode, AllowVectors); + break; + + case StoreSource::Extract: + MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores, + MemVT, RootNode); + break; + + case StoreSource::Load: + MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores, + MemVT, RootNode, AllowVectors, + IsNonTemporalStore, IsNonTemporalLoad); + break; + + default: + llvm_unreachable("Unhandled store source type"); } } - return RV; + return MadeChange; } SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { @@ -16413,11 +17017,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) { - if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { + if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { + if (*Alignment > ST->getAlign() && + isAligned(*Alignment, ST->getSrcValueOffset())) { SDValue NewStore = DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), - ST->getMemoryVT(), Align, + ST->getMemoryVT(), *Alignment, ST->getMemOperand()->getFlags(), ST->getAAInfo()); // NewStore will always be N as we are only refining the alignment assert(NewStore.getNode() == N); @@ -16502,7 +17107,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && - !ST1->getBasePtr().isUndef()) { + !ST1->getBasePtr().isUndef() && + // BaseIndexOffset and the code below requires knowing the size + // of a vector, so bail out if MemoryVT is scalable. + !ST1->getMemoryVT().isScalableVector()) { const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); unsigned STBitSize = ST->getMemoryVT().getSizeInBits(); @@ -16537,7 +17145,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. - bool Changed = MergeConsecutiveStores(ST); + bool Changed = mergeConsecutiveStores(ST); if (!Changed) break; // Return N as merge only uses CombineTo and no worklist clean // up is necessary. @@ -16813,6 +17421,10 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { EVT SubVecVT = SubVec.getValueType(); EVT VT = DestVec.getValueType(); unsigned NumSrcElts = SubVecVT.getVectorNumElements(); + // If the source only has a single vector element, the cost of creating adding + // it to a vector is likely to exceed the cost of a insert_vector_elt. + if (NumSrcElts == 1) + return SDValue(); unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); unsigned NumMaskVals = ExtendRatio * NumSrcElts; @@ -16858,12 +17470,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDLoc DL(N); EVT VT = InVec.getValueType(); - unsigned NumElts = VT.getVectorNumElements(); + auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); // Insert into out-of-bounds element is undefined. - if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo)) - if (IndexC->getZExtValue() >= VT.getVectorNumElements()) - return DAG.getUNDEF(VT); + if (IndexC && VT.isFixedLengthVector() && + IndexC->getZExtValue() >= VT.getVectorNumElements()) + return DAG.getUNDEF(VT); // Remove redundant insertions: // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x @@ -16871,17 +17483,25 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) return InVec; - auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); if (!IndexC) { // If this is variable insert to undef vector, it might be better to splat: // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { - SmallVector<SDValue, 8> Ops(NumElts, InVal); - return DAG.getBuildVector(VT, DL, Ops); + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, InVal); + else { + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal); + return DAG.getBuildVector(VT, DL, Ops); + } } return SDValue(); } + if (VT.isScalableVector()) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + // We must know which element is being inserted for folds below here. unsigned Elt = IndexC->getZExtValue(); if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) @@ -16946,11 +17566,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); - unsigned Align = OriginalLoad->getAlignment(); - unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + Align Alignment = OriginalLoad->getAlign(); + Align NewAlign = DAG.getDataLayout().getABITypeAlign( VecEltVT.getTypeForEVT(*DAG.getContext())); - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) + if (NewAlign > Alignment || + !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) return SDValue(); ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? @@ -16958,7 +17579,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) return SDValue(); - Align = NewAlign; + Alignment = NewAlign; SDValue NewPtr = OriginalLoad->getBasePtr(); SDValue Offset; @@ -16998,13 +17619,13 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, : ISD::EXTLOAD; Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, - Align, OriginalLoad->getMemOperand()->getFlags(), + Alignment, OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); Chain = Load.getValue(1); } else { - Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, - MPI, Align, OriginalLoad->getMemOperand()->getFlags(), - OriginalLoad->getAAInfo()); + Load = DAG.getLoad( + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment, + OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); Chain = Load.getValue(1); if (ResultVT.bitsLT(VecEltVT)) Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); @@ -17080,6 +17701,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // Only 0'th element of SCALAR_TO_VECTOR is defined. + if (DAG.isKnownNeverZero(Index)) + return DAG.getUNDEF(ScalarVT); + // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. @@ -17093,15 +17718,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // extract_vector_elt of out-of-bounds element -> UNDEF auto *IndexC = dyn_cast<ConstantSDNode>(Index); - unsigned NumElts = VecVT.getVectorNumElements(); - if (IndexC && IndexC->getAPIntValue().uge(NumElts)) + if (IndexC && VecVT.isFixedLengthVector() && + IndexC->getAPIntValue().uge(VecVT.getVectorNumElements())) return DAG.getUNDEF(ScalarVT); // extract_vector_elt (build_vector x, y), 1 -> y - if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR && + if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || + VecOp.getOpcode() == ISD::SPLAT_VECTOR) && TLI.isTypeLegal(VecVT) && (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) { - SDValue Elt = VecOp.getOperand(IndexC->getZExtValue()); + assert((VecOp.getOpcode() != ISD::BUILD_VECTOR || + VecVT.isFixedLengthVector()) && + "BUILD_VECTOR used for scalable vectors"); + unsigned IndexVal = + VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0; + SDValue Elt = VecOp.getOperand(IndexVal); EVT InEltVT = Elt.getValueType(); // Sometimes build_vector's scalar input types do not match result type. @@ -17112,6 +17743,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // converts. } + if (VecVT.isScalableVector()) + return SDValue(); + + // All the code from this point onwards assumes fixed width vectors, but it's + // possible that some of the combinations could be made to work for scalable + // vectors too. + unsigned NumElts = VecVT.getVectorNumElements(); + unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); + // TODO: These transforms should not require the 'hasOneUse' restriction, but // there are regressions on multiple targets without it. We can end up with a // mess of scalar and vector code if we reduce only part of the DAG to scalar. @@ -17135,7 +17775,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { "Extract element and scalar to vector can't change element type " "from FP to integer."); unsigned XBitWidth = X.getValueSizeInBits(); - unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1; // An extract element return value type can be wider than its vector @@ -17193,9 +17832,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // FIXME: Should really be just isOperationLegalOrCustom. TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) || TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) { - EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec, - DAG.getConstant(OrigElt, DL, IndexTy)); + DAG.getVectorIdxConstant(OrigElt, DL)); } } @@ -17219,6 +17857,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { AddToWorklist(N); return SDValue(N, 0); } + APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth); + if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) { + // We simplified the vector operand of this extract element. If this + // extract is not dead, visit it again so it is folded properly. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); + return SDValue(N, 0); + } } // Everything under here is trying to match an extract of a loaded value. @@ -17304,6 +17950,30 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts; Index = DAG.getConstant(Elt, DL, Index.getValueType()); } + } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged && + VecVT.getVectorElementType() == ScalarVT && + (!LegalTypes || + TLI.isTypeLegal( + VecOp.getOperand(0).getValueType().getVectorElementType()))) { + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0 + // -> extract_vector_elt a, 0 + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1 + // -> extract_vector_elt a, 1 + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2 + // -> extract_vector_elt b, 0 + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3 + // -> extract_vector_elt b, 1 + SDLoc SL(N); + EVT ConcatVT = VecOp.getOperand(0).getValueType(); + unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); + SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL, + Index.getValueType()); + + SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, + ConcatVT.getVectorElementType(), + ConcatOp, NewIdx); + return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt); } // Make sure we found a non-volatile load and the extractelement is @@ -17385,6 +18055,11 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!ValidTypes) return SDValue(); + // If we already have a splat buildvector, then don't fold it if it means + // introducing zeros. + if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true)) + return SDValue(); + bool isLE = DAG.getDataLayout().isLittleEndian(); unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); @@ -17431,12 +18106,89 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return DAG.getBitcast(VT, BV); } +// Simplify (build_vec (trunc $1) +// (trunc (srl $1 half-width)) +// (trunc (srl $1 (2 * half-width))) …) +// to (bitcast $1) +SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); + + // Only for little endian + if (!DAG.getDataLayout().isLittleEndian()) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT OutScalarTy = VT.getScalarType(); + uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits(); + + // Only for power of two types to be sure that bitcast works well + if (!isPowerOf2_64(ScalarTypeBitsize)) + return SDValue(); + + unsigned NumInScalars = N->getNumOperands(); + + // Look through bitcasts + auto PeekThroughBitcast = [](SDValue Op) { + if (Op.getOpcode() == ISD::BITCAST) + return Op.getOperand(0); + return Op; + }; + + // The source value where all the parts are extracted. + SDValue Src; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = PeekThroughBitcast(N->getOperand(i)); + // Ignore undef inputs. + if (In.isUndef()) continue; + + if (In.getOpcode() != ISD::TRUNCATE) + return SDValue(); + + In = PeekThroughBitcast(In.getOperand(0)); + + if (In.getOpcode() != ISD::SRL) { + // For now only build_vec without shuffling, handle shifts here in the + // future. + if (i != 0) + return SDValue(); + + Src = In; + } else { + // In is SRL + SDValue part = PeekThroughBitcast(In.getOperand(0)); + + if (!Src) { + Src = part; + } else if (Src != part) { + // Vector parts do not stem from the same variable + return SDValue(); + } + + SDValue ShiftAmtVal = In.getOperand(1); + if (!isa<ConstantSDNode>(ShiftAmtVal)) + return SDValue(); + + uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1); + + // The extracted value is not extracted at the right position + if (ShiftAmt != i * ScalarTypeBitsize) + return SDValue(); + } + } + + // Only cast if the size is the same + if (Src.getValueType().getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + return DAG.getBitcast(VT, Src); +} + SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx, bool DidSplitVec) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL); EVT VT = N->getValueType(0); EVT InVT1 = VecIn1.getValueType(); @@ -17470,7 +18222,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, // If we only have one input vector, and it's twice the size of the // output, split it in two. VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, - DAG.getConstant(NumElems, DL, IdxTy)); + DAG.getVectorIdxConstant(NumElems, DL)); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); // Since we now have shorter input vectors, adjust the offset of the // second vector's start. @@ -17677,6 +18429,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return SDValue(); SDValue ExtractedFromVec = Op.getOperand(0); + if (ExtractedFromVec.getValueType().isScalableVector()) + return SDValue(); + const APInt &ExtractIdx = Op.getConstantOperandAPInt(1); if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) return SDValue(); @@ -17711,7 +18466,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { unsigned NearestPow2 = 0; SDValue Vec = VecIn.back(); EVT InVT = Vec.getValueType(); - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); SmallVector<unsigned, 8> IndexVec(NumElems, 0); for (unsigned i = 0; i < NumElems; i++) { @@ -17730,9 +18484,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { InVT.getVectorElementType(), SplitSize); if (TLI.isTypeLegal(SplitVT)) { SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, - DAG.getConstant(SplitSize, DL, IdxTy)); + DAG.getVectorIdxConstant(SplitSize, DL)); SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, - DAG.getConstant(0, DL, IdxTy)); + DAG.getVectorIdxConstant(0, DL)); VecIn.pop_back(); VecIn.push_back(VecIn1); VecIn.push_back(VecIn2); @@ -17964,6 +18718,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; + if (SDValue V = reduceBuildVecTruncToBitCast(N)) + return V; + if (SDValue V = reduceBuildVecToShuffle(N)) return V; @@ -18058,6 +18815,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { // What vector are we extracting the subvector from and at what index? SDValue ExtVec = Op.getOperand(0); + int ExtIdx = Op.getConstantOperandVal(1); // We want the EVT of the original extraction to correctly scale the // extraction index. @@ -18070,10 +18828,6 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { continue; } - if (!isa<ConstantSDNode>(Op.getOperand(1))) - return SDValue(); - int ExtIdx = Op.getConstantOperandVal(1); - // Ensure that we are extracting a subvector from a vector the same // size as the result. if (ExtVT.getSizeInBits() != VT.getSizeInBits()) @@ -18107,6 +18861,69 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { DAG.getBitcast(VT, SV1), Mask, DAG); } +static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) { + unsigned CastOpcode = N->getOperand(0).getOpcode(); + switch (CastOpcode) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + // TODO: Allow more opcodes? + // case ISD::BITCAST: + // case ISD::TRUNCATE: + // case ISD::ZERO_EXTEND: + // case ISD::SIGN_EXTEND: + // case ISD::FP_EXTEND: + break; + default: + return SDValue(); + } + + EVT SrcVT = N->getOperand(0).getOperand(0).getValueType(); + if (!SrcVT.isVector()) + return SDValue(); + + // All operands of the concat must be the same kind of cast from the same + // source type. + SmallVector<SDValue, 4> SrcOps; + for (SDValue Op : N->ops()) { + if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() || + Op.getOperand(0).getValueType() != SrcVT) + return SDValue(); + SrcOps.push_back(Op.getOperand(0)); + } + + // The wider cast must be supported by the target. This is unusual because + // the operation support type parameter depends on the opcode. In addition, + // check the other type in the cast to make sure this is really legal. + EVT VT = N->getValueType(0); + EVT SrcEltVT = SrcVT.getVectorElementType(); + unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands(); + EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + switch (CastOpcode) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) || + !TLI.isTypeLegal(VT)) + return SDValue(); + break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) || + !TLI.isTypeLegal(ConcatSrcVT)) + return SDValue(); + break; + default: + llvm_unreachable("Unexpected cast opcode"); + } + + // concat (cast X), (cast Y)... -> cast (concat X, Y...) + SDLoc DL(N); + SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps); + return DAG.getNode(CastOpcode, DL, VT, NewConcat); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) @@ -18234,6 +19051,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) return V; + if (SDValue V = combineConcatVectorOfCasts(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -18265,14 +19085,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } - auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - // The extract index must be constant. - if (!CS) - return SDValue(); - // Check that we are reading from the identity index. unsigned IdentityIndex = i * PartNumElem; - if (CS->getAPIntValue() != IdentityIndex) + if (Op.getConstantOperandAPInt(1) != IdentityIndex) return SDValue(); } @@ -18355,6 +19170,15 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1) return SDValue(); + // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be + // reduced to the unary fneg when it is visited, and we probably want to deal + // with fneg in a target-specific way. + if (BOpcode == ISD::FSUB) { + auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true); + if (C && C->getValueAPF().isNegZero()) + return SDValue(); + } + // The binop must be a vector type, so we can extract some fraction of it. EVT WideBVT = BinOp.getValueType(); if (!WideBVT.isVector()) @@ -18390,12 +19214,11 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // bitcasted. unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements(); unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); - EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) && BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) { // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N) SDLoc DL(Extract); - SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); + SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL); SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(0), NewExtIndex); SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, @@ -18435,7 +19258,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC) // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN SDLoc DL(Extract); - SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); + SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL); SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL) : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(0), IndexC); @@ -18467,6 +19290,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { // Allow targets to opt-out. EVT VT = Extract->getValueType(0); + + // We can only create byte sized loads. + if (!VT.isByteSized()) + return SDValue(); + + unsigned Index = ExtIdx->getZExtValue(); + unsigned NumElts = VT.getVectorNumElements(); + + // If the index is a multiple of the extract element count, we can offset the + // address by the store size multiplied by the subvector index. Otherwise if + // the scalar type is byte sized, we can just use the index multiplied by + // the element size in bytes as the offset. + unsigned Offset; + if (Index % NumElts == 0) + Offset = (Index / NumElts) * VT.getStoreSize(); + else if (VT.getScalarType().isByteSized()) + Offset = Index * VT.getScalarType().getStoreSize(); + else + return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) return SDValue(); @@ -18474,8 +19317,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { // The narrow load will be offset from the base address of the old load if // we are extracting from something besides index 0 (little-endian). SDLoc DL(Extract); - SDValue BaseAddr = Ld->getOperand(1); - unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); + SDValue BaseAddr = Ld->getBasePtr(); // TODO: Use "BaseIndexOffset" to make this more effective. SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); @@ -18490,6 +19332,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); + uint64_t ExtIdx = N->getConstantOperandVal(1); // Extract from UNDEF is UNDEF. if (V.isUndef()) @@ -18501,9 +19344,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // Combine an extract of an extract into a single extract_subvector. // ext (ext X, C), 0 --> ext X, C - SDValue Index = N->getOperand(1); - if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && - V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) { + if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) { if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), V.getConstantOperandVal(1)) && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) { @@ -18514,21 +19355,20 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') - if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST && + if (V.getOpcode() == ISD::BITCAST && V.getOperand(0).getValueType().isVector()) { SDValue SrcOp = V.getOperand(0); EVT SrcVT = SrcOp.getValueType(); - unsigned SrcNumElts = SrcVT.getVectorNumElements(); - unsigned DestNumElts = V.getValueType().getVectorNumElements(); + unsigned SrcNumElts = SrcVT.getVectorMinNumElements(); + unsigned DestNumElts = V.getValueType().getVectorMinNumElements(); if ((SrcNumElts % DestNumElts) == 0) { unsigned SrcDestRatio = SrcNumElts / DestNumElts; - unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio; + ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio; EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), - NewExtNumElts); + NewExtEC); if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { - unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio; SDLoc DL(N); - SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); + SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, V.getOperand(0), NewIndex); return DAG.getBitcast(NVT, NewExtract); @@ -18536,34 +19376,43 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { } if ((DestNumElts % SrcNumElts) == 0) { unsigned DestSrcRatio = DestNumElts / SrcNumElts; - if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) { - unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio; - EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), - SrcVT.getScalarType(), NewExtNumElts); - if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 && - TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { - unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio; + if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) { + ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio; + EVT ScalarVT = SrcVT.getScalarType(); + if ((ExtIdx % DestSrcRatio) == 0) { SDLoc DL(N); - SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); - SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, - V.getOperand(0), NewIndex); - return DAG.getBitcast(NVT, NewExtract); + unsigned IndexValScaled = ExtIdx / DestSrcRatio; + EVT NewExtVT = + EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC); + if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { + SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL); + SDValue NewExtract = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, + V.getOperand(0), NewIndex); + return DAG.getBitcast(NVT, NewExtract); + } + if (NewExtEC == 1 && + TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) { + SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL); + SDValue NewExtract = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, + V.getOperand(0), NewIndex); + return DAG.getBitcast(NVT, NewExtract); + } } } } } - if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) { + if (V.getOpcode() == ISD::CONCAT_VECTORS) { + unsigned ExtNumElts = NVT.getVectorMinNumElements(); EVT ConcatSrcVT = V.getOperand(0).getValueType(); assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && "Concat and extract subvector do not change element type"); - - unsigned ExtIdx = N->getConstantOperandVal(1); - unsigned ExtNumElts = NVT.getVectorNumElements(); - assert(ExtIdx % ExtNumElts == 0 && + assert((ExtIdx % ExtNumElts) == 0 && "Extract index is not a multiple of the input vector length."); - unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements(); + unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements(); unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts; // If the concatenated source types match this extract, it's a direct @@ -18577,15 +19426,14 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // concat operand. Example: // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 --> // v2i8 extract_subvec v8i8 Y, 6 - if (ConcatSrcNumElts % ExtNumElts == 0) { + if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { SDLoc DL(N); unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?"); assert(NewExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length."); - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy); + SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(ConcatOpIdx), NewIndexC); } @@ -18595,37 +19443,33 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // If the input is a build vector. Try to make a smaller build vector. if (V.getOpcode() == ISD::BUILD_VECTOR) { - if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) { - EVT InVT = V.getValueType(); - unsigned ExtractSize = NVT.getSizeInBits(); - unsigned EltSize = InVT.getScalarSizeInBits(); - // Only do this if we won't split any elements. - if (ExtractSize % EltSize == 0) { - unsigned NumElems = ExtractSize / EltSize; - EVT EltVT = InVT.getVectorElementType(); - EVT ExtractVT = NumElems == 1 ? EltVT - : EVT::getVectorVT(*DAG.getContext(), - EltVT, NumElems); - if ((Level < AfterLegalizeDAG || - (NumElems == 1 || - TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && - (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { - unsigned IdxVal = IdxC->getZExtValue(); - IdxVal *= NVT.getScalarSizeInBits(); - IdxVal /= EltSize; - - if (NumElems == 1) { - SDValue Src = V->getOperand(IdxVal); - if (EltVT != Src.getValueType()) - Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); - return DAG.getBitcast(NVT, Src); - } - - // Extract the pieces from the original build_vector. - SDValue BuildVec = DAG.getBuildVector( - ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems)); - return DAG.getBitcast(NVT, BuildVec); + EVT InVT = V.getValueType(); + unsigned ExtractSize = NVT.getSizeInBits(); + unsigned EltSize = InVT.getScalarSizeInBits(); + // Only do this if we won't split any elements. + if (ExtractSize % EltSize == 0) { + unsigned NumElems = ExtractSize / EltSize; + EVT EltVT = InVT.getVectorElementType(); + EVT ExtractVT = + NumElems == 1 ? EltVT + : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); + if ((Level < AfterLegalizeDAG || + (NumElems == 1 || + TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && + (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { + unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize; + + if (NumElems == 1) { + SDValue Src = V->getOperand(IdxVal); + if (EltVT != Src.getValueType()) + Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); + return DAG.getBitcast(NVT, Src); } + + // Extract the pieces from the original build_vector. + SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), + V->ops().slice(IdxVal, NumElems)); + return DAG.getBitcast(NVT, BuildVec); } } } @@ -18637,23 +19481,19 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (!NVT.bitsEq(SmallVT)) return SDValue(); - // Only handle cases where both indexes are constants. - auto *ExtIdx = dyn_cast<ConstantSDNode>(Index); - auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2)); - if (InsIdx && ExtIdx) { - // Combine: - // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) - // Into: - // indices are equal or bit offsets are equal => V1 - // otherwise => (extract_subvec V1, ExtIdx) - if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == - ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) - return DAG.getBitcast(NVT, V.getOperand(1)); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, - DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), - Index); - } + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal or bit offsets are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + uint64_t InsIdx = V.getConstantOperandVal(2); + if (InsIdx * SmallVT.getScalarSizeInBits() == + ExtIdx * NVT.getScalarSizeInBits()) + return DAG.getBitcast(NVT, V.getOperand(1)); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, + DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), + N->getOperand(1)); } if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) @@ -19042,6 +19882,57 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, NewMask); } +/// Combine shuffle of shuffle of the form: +/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X +static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, + SelectionDAG &DAG) { + if (!OuterShuf->getOperand(1).isUndef()) + return SDValue(); + auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0)); + if (!InnerShuf || !InnerShuf->getOperand(1).isUndef()) + return SDValue(); + + ArrayRef<int> OuterMask = OuterShuf->getMask(); + ArrayRef<int> InnerMask = InnerShuf->getMask(); + unsigned NumElts = OuterMask.size(); + assert(NumElts == InnerMask.size() && "Mask length mismatch"); + SmallVector<int, 32> CombinedMask(NumElts, -1); + int SplatIndex = -1; + for (unsigned i = 0; i != NumElts; ++i) { + // Undef lanes remain undef. + int OuterMaskElt = OuterMask[i]; + if (OuterMaskElt == -1) + continue; + + // Peek through the shuffle masks to get the underlying source element. + int InnerMaskElt = InnerMask[OuterMaskElt]; + if (InnerMaskElt == -1) + continue; + + // Initialize the splatted element. + if (SplatIndex == -1) + SplatIndex = InnerMaskElt; + + // Non-matching index - this is not a splat. + if (SplatIndex != InnerMaskElt) + return SDValue(); + + CombinedMask[i] = InnerMaskElt; + } + assert((all_of(CombinedMask, [](int M) { return M == -1; }) || + getSplatIndex(CombinedMask) != -1) && + "Expected a splat mask"); + + // TODO: The transform may be a win even if the mask is not legal. + EVT VT = OuterShuf->getValueType(0); + assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types"); + if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT)) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0), + InnerShuf->getOperand(1), CombinedMask); +} + /// If the shuffle mask is taking exactly one element from the first vector /// operand and passing through all other elements from the second vector /// operand, return the index of the mask element that is choosing an element @@ -19114,8 +20005,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, // element used. Therefore, our new insert element occurs at the shuffle's // mask index value, not the insert's index value. // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' - SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf), - Op0.getOperand(2).getValueType()); + SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf)); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), Op1, Op0.getOperand(1), NewInsIndex); } @@ -19201,6 +20091,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue V = combineShuffleOfSplatVal(SVN, DAG)) return V; + if (SDValue V = formSplatFromShuffles(SVN, DAG)) + return V; + // If it is a splat, check if the argument vector is another splat or a // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { @@ -19212,7 +20105,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue L = N0.getOperand(0), R = N0.getOperand(1); SDLoc DL(N); EVT EltVT = VT.getScalarType(); - SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL); + SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL); SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index); SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index); SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, @@ -19332,16 +20225,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && N1.isUndef() && Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { - auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { - if (Scale == 1) - return SmallVector<int, 8>(Mask.begin(), Mask.end()); - - SmallVector<int, 8> NewMask; - for (int M : Mask) - for (int s = 0; s != Scale; ++s) - NewMask.push_back(M < 0 ? -1 : Scale * M + s); - return NewMask; - }; SDValue BC0 = peekThroughOneUseBitcasts(N0); if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { @@ -19361,10 +20244,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Scale the shuffle masks to the smaller scalar type. ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); - SmallVector<int, 8> InnerMask = - ScaleShuffleMask(InnerSVN->getMask(), InnerScale); - SmallVector<int, 8> OuterMask = - ScaleShuffleMask(SVN->getMask(), OuterScale); + SmallVector<int, 8> InnerMask; + SmallVector<int, 8> OuterMask; + narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask); + narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask); // Merge the shuffle masks. SmallVector<int, 8> NewMask; @@ -19525,7 +20408,9 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern // with a VECTOR_SHUFFLE and possible truncate. - if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + VT.isFixedLengthVector() && + InVal->getOperand(0).getValueType().isFixedLengthVector()) { SDValue InVec = InVal->getOperand(0); SDValue EltNo = InVal->getOperand(1); auto InVecT = InVec.getValueType(); @@ -19554,11 +20439,10 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { return LegalShuffle; // If not we must truncate the vector. if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); - EVT SubVT = - EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), - VT.getVectorNumElements()); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N)); + EVT SubVT = EVT::getVectorVT(*DAG.getContext(), + InVecT.getVectorElementType(), + VT.getVectorNumElements()); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle, ZeroIdx); } @@ -19575,6 +20459,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + uint64_t InsIdx = N->getConstantOperandVal(2); // If inserting an UNDEF, just return the original vector. if (N1.isUndef()) @@ -19635,11 +20520,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0, N1.getOperand(1), N2); - if (!isa<ConstantSDNode>(N2)) - return SDValue(); - - uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue(); - // Push subvector bitcasts to the output, adjusting the index as we go. // insert_subvector(bitcast(v), bitcast(s), c1) // -> bitcast(insert_subvector(v, s, c2)) @@ -19654,19 +20534,18 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { EVT NewVT; SDLoc DL(N); SDValue NewIdx; - MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); LLVMContext &Ctx = *DAG.getContext(); unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) { unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits(); NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale); - NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT); + NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL); } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) { unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits; if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) { NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale); - NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT); + NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL); } } if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) { @@ -19682,8 +20561,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { // (insert_subvector (insert_subvector A, Idx0), Idx1) // -> (insert_subvector (insert_subvector A, Idx1), Idx0) if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && - N1.getValueType() == N0.getOperand(1).getValueType() && - isa<ConstantSDNode>(N0.getOperand(2))) { + N1.getValueType() == N0.getOperand(1).getValueType()) { unsigned OtherIdx = N0.getConstantOperandVal(2); if (InsIdx < OtherIdx) { // Swap nodes. @@ -19700,10 +20578,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && N0.getOperand(0).getValueType() == N1.getValueType()) { unsigned Factor = N1.getValueType().getVectorNumElements(); - SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end()); - Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1; - + Ops[InsIdx / Factor] = N1; return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } @@ -19747,9 +20623,9 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { // VECREDUCE over 1-element vector is just an extract. if (VT.getVectorNumElements() == 1) { SDLoc dl(N); - SDValue Res = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Res = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0, + DAG.getVectorIdxConstant(0, dl)); if (Res.getValueType() != N->getValueType(0)) Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res); return Res; @@ -19882,10 +20758,9 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { return SDValue(); SDLoc DL(N); - SDValue IndexC = - DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); - SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC); - SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC); + SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); + SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC); + SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC); SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()); // If all lanes but 1 are undefined, no need to splat the scalar result. @@ -19915,6 +20790,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue Ops[] = {LHS, RHS}; EVT VT = N->getValueType(0); unsigned Opcode = N->getOpcode(); + SDNodeFlags Flags = N->getFlags(); // See if we can constant fold the vector operation. if (SDValue Fold = DAG.FoldConstantVectorArithmetic( @@ -19938,10 +20814,37 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { SDLoc DL(N); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0), - RHS.getOperand(0), N->getFlags()); + RHS.getOperand(0), Flags); SDValue UndefV = LHS.getOperand(1); return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask()); } + + // Try to sink a splat shuffle after a binop with a uniform constant. + // This is limited to cases where neither the shuffle nor the constant have + // undefined elements because that could be poison-unsafe or inhibit + // demanded elements analysis. It is further limited to not change a splat + // of an inserted scalar because that may be optimized better by + // load-folding or other target-specific behaviors. + if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) && + Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() && + Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { + // binop (splat X), (splat C) --> splat (binop X, C) + SDLoc DL(N); + SDValue X = Shuf0->getOperand(0); + SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags); + return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), + Shuf0->getMask()); + } + if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) && + Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() && + Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { + // binop (splat C), (splat X) --> splat (binop C, X) + SDLoc DL(N); + SDValue X = Shuf1->getOperand(0); + SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags); + return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), + Shuf1->getMask()); + } } // The following pattern is likely to emerge with vector reduction ops. Moving @@ -20339,8 +21242,8 @@ SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), - TD.getPrefTypeAlignment(FPTy)); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + TD.getPrefTypeAlign(FPTy)); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); // Get offsets to the 0 and 1 elements of the array, so we can select between // them. @@ -20775,7 +21678,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, EVT CCVT = getSetCCResultType(VT); ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; DenormalMode DenormMode = DAG.getDenormalMode(VT); - if (DenormMode == DenormalMode::IEEE) { + if (DenormMode.Input == DenormalMode::IEEE) { + // This is specifically a check for the handling of denormal inputs, + // not the result. + // fabs(X) < SmallestNormal ? 0.0 : Est const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); @@ -20827,9 +21733,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; + uint64_t Size = + MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize()); return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), Offset /*base offset*/, - Optional<int64_t>(LSN->getMemoryVT().getStoreSize()), + Optional<int64_t>(Size), LSN->getMemOperand()}; } if (const auto *LN = cast<LifetimeSDNode>(N)) @@ -20889,21 +21797,24 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { // If we know required SrcValue1 and SrcValue2 have relatively large // alignment compared to the size and offset of the access, we may be able // to prove they do not alias. This check is conservative for now to catch - // cases created by splitting vector types. + // cases created by splitting vector types, it only works when the offsets are + // multiples of the size of the data. int64_t SrcValOffset0 = MUC0.MMO->getOffset(); int64_t SrcValOffset1 = MUC1.MMO->getOffset(); - unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment(); - unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment(); + Align OrigAlignment0 = MUC0.MMO->getBaseAlign(); + Align OrigAlignment1 = MUC1.MMO->getBaseAlign(); + auto &Size0 = MUC0.NumBytes; + auto &Size1 = MUC1.NumBytes; if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() && - *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) { - int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; - int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; + Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 && + OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && + SrcValOffset1 % *Size1 == 0) { + int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); + int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); // There is no overlap between these relatively aligned accesses of // similar size. Return no alias. - if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 || - (OffAlign1 + *MUC1.NumBytes) <= OffAlign0) + if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0) return false; } @@ -20916,11 +21827,12 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { UseAA = false; #endif - if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) { + if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && + Size0.hasValue() && Size1.hasValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset; - int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset; + int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset; + int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset; AliasResult AAResult = AA->alias( MemoryLocation(MUC0.MMO->getValue(), Overlap0, UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), @@ -21077,10 +21989,10 @@ bool operator!=(const UnitT &, const UnitT &) { return false; } // redundant, as this function gets called when visiting every store // node, so why not let the work be done on each store as it's visited? // -// I believe this is mainly important because MergeConsecutiveStores +// I believe this is mainly important because mergeConsecutiveStores // is unable to deal with merging stores of different sizes, so unless // we improve the chains of all the potential candidates up-front -// before running MergeConsecutiveStores, it might only see some of +// before running mergeConsecutiveStores, it might only see some of // the nodes that will eventually be candidates, and then not be able // to go from a partially-merged state to the desired final // fully-merged state. @@ -21109,6 +22021,12 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { if (BasePtr.getBase().isUndef()) return false; + // BaseIndexOffset assumes that offsets are fixed-size, which + // is not valid for scalable vectors where the offsets are + // scaled by `vscale`, so bail out early. + if (St->getMemoryVT().isScalableVector()) + return false; + // Add ST's interval. Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 6ecde9b43c07..fc6c3a145f13 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -68,7 +68,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -156,7 +155,7 @@ bool FastISel::lowerArguments() { for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), E = FuncInfo.Fn->arg_end(); I != E; ++I) { - DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I); + DenseMap<const Value *, Register>::iterator VI = LocalValueMap.find(&*I); assert(VI != LocalValueMap.end() && "Missed an argument?"); FuncInfo.ValueMap[&*I] = VI->second; } @@ -165,8 +164,8 @@ bool FastISel::lowerArguments() { /// Return the defined register if this instruction defines exactly one /// virtual register and uses no other virtual registers. Otherwise return 0. -static unsigned findSinkableLocalRegDef(MachineInstr &MI) { - unsigned RegDef = 0; +static Register findSinkableLocalRegDef(MachineInstr &MI) { + Register RegDef; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; @@ -174,9 +173,9 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) { if (RegDef) return 0; RegDef = MO.getReg(); - } else if (Register::isVirtualRegister(MO.getReg())) { + } else if (MO.getReg().isVirtual()) { // This is another use of a vreg. Don't try to sink it. - return 0; + return Register(); } } return RegDef; @@ -202,7 +201,7 @@ void FastISel::flushLocalValueMap() { bool Store = true; if (!LocalMI.isSafeToMove(nullptr, Store)) continue; - unsigned DefReg = findSinkableLocalRegDef(LocalMI); + Register DefReg = findSinkableLocalRegDef(LocalMI); if (DefReg == 0) continue; @@ -217,7 +216,7 @@ void FastISel::flushLocalValueMap() { LastFlushPoint = FuncInfo.InsertPt; } -static bool isRegUsedByPhiNodes(unsigned DefReg, +static bool isRegUsedByPhiNodes(Register DefReg, FunctionLoweringInfo &FuncInfo) { for (auto &P : FuncInfo.PHINodesToUpdate) if (P.second == DefReg) @@ -261,7 +260,7 @@ void FastISel::InstOrderMap::initialize( } void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI, - unsigned DefReg, + Register DefReg, InstOrderMap &OrderMap) { // If this register is used by a register fixup, MRI will not contain all // the uses until after register fixups, so don't attempt to sink or DCE @@ -356,7 +355,7 @@ bool FastISel::hasTrivialKill(const Value *V) { // Even the value might have only one use in the LLVM IR, it is possible that // FastISel might fold the use into another instruction and now there is more // than one use at the Machine Instruction level. - unsigned Reg = lookUpRegForValue(V); + Register Reg = lookUpRegForValue(V); if (Reg && !MRI.use_empty(Reg)) return false; @@ -374,11 +373,11 @@ bool FastISel::hasTrivialKill(const Value *V) { cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); } -unsigned FastISel::getRegForValue(const Value *V) { +Register FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) - return 0; + return Register(); // Ignore illegal types. We must do this before looking up the value // in ValueMap because Arguments are given virtual registers regardless @@ -389,11 +388,11 @@ unsigned FastISel::getRegForValue(const Value *V) { if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); else - return 0; + return Register(); } // Look up the value to see if we already have a register for it. - unsigned Reg = lookUpRegForValue(V); + Register Reg = lookUpRegForValue(V); if (Reg) return Reg; @@ -415,8 +414,8 @@ unsigned FastISel::getRegForValue(const Value *V) { return Reg; } -unsigned FastISel::materializeConstant(const Value *V, MVT VT) { - unsigned Reg = 0; +Register FastISel::materializeConstant(const Value *V, MVT VT) { + Register Reg; if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getValue().getActiveBits() <= 64) Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); @@ -443,9 +442,9 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) { bool isExact; (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact); if (isExact) { - unsigned IntegerReg = + Register IntegerReg = getRegForValue(ConstantInt::get(V->getContext(), SIntVal)); - if (IntegerReg != 0) + if (IntegerReg) Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, /*Kill=*/false); } @@ -467,8 +466,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) { /// Helper for getRegForValue. This function is called when the value isn't /// already available in a register and must be materialized with new /// instructions. -unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { - unsigned Reg = 0; +Register FastISel::materializeRegForValue(const Value *V, MVT VT) { + Register Reg; // Give the target-specific code a try first. if (isa<Constant>(V)) Reg = fastMaterializeConstant(cast<Constant>(V)); @@ -487,25 +486,25 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { return Reg; } -unsigned FastISel::lookUpRegForValue(const Value *V) { +Register FastISel::lookUpRegForValue(const Value *V) { // Look up the value to see if we already have a register for it. We // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(V); if (I != FuncInfo.ValueMap.end()) return I->second; return LocalValueMap[V]; } -void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { +void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) { if (!isa<Instruction>(I)) { LocalValueMap[I] = Reg; return; } - unsigned &AssignedReg = FuncInfo.ValueMap[I]; - if (AssignedReg == 0) + Register &AssignedReg = FuncInfo.ValueMap[I]; + if (!AssignedReg) // Use the new register. AssignedReg = Reg; else if (Reg != AssignedReg) { @@ -519,11 +518,11 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { } } -std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { - unsigned IdxN = getRegForValue(Idx); - if (IdxN == 0) +std::pair<Register, bool> FastISel::getRegForGEPIndex(const Value *Idx) { + Register IdxN = getRegForValue(Idx); + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. - return std::pair<unsigned, bool>(0, false); + return std::pair<Register, bool>(Register(), false); bool IdxNIsKill = hasTrivialKill(Idx); @@ -539,7 +538,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill); IdxNIsKill = true; } - return std::pair<unsigned, bool>(IdxN, IdxNIsKill); + return std::pair<Register, bool>(IdxN, IdxNIsKill); } void FastISel::recomputeInsertPt() { @@ -620,12 +619,12 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { // we don't have anything that canonicalizes operand order. if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0))) if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { - unsigned Op1 = getRegForValue(I->getOperand(1)); + Register Op1 = getRegForValue(I->getOperand(1)); if (!Op1) return false; bool Op1IsKill = hasTrivialKill(I->getOperand(1)); - unsigned ResultReg = + Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill, CI->getZExtValue(), VT.getSimpleVT()); if (!ResultReg) @@ -636,7 +635,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { return true; } - unsigned Op0 = getRegForValue(I->getOperand(0)); + Register Op0 = getRegForValue(I->getOperand(0)); if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); @@ -659,7 +658,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::AND; } - unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); if (!ResultReg) return false; @@ -669,13 +668,13 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { return true; } - unsigned Op1 = getRegForValue(I->getOperand(1)); + Register Op1 = getRegForValue(I->getOperand(1)); if (!Op1) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op1IsKill = hasTrivialKill(I->getOperand(1)); // Now we have both operands in registers. Emit the instruction. - unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + Register ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); if (!ResultReg) // Target-specific code wasn't able to find a machine opcode for @@ -688,7 +687,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { } bool FastISel::selectGetElementPtr(const User *I) { - unsigned N = getRegForValue(I->getOperand(0)); + Register N = getRegForValue(I->getOperand(0)); if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; bool NIsKill = hasTrivialKill(I->getOperand(0)); @@ -744,8 +743,8 @@ bool FastISel::selectGetElementPtr(const User *I) { // N = N + Idx * ElementSize; uint64_t ElementSize = DL.getTypeAllocSize(Ty); - std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); - unsigned IdxN = Pair.first; + std::pair<Register, bool> Pair = getRegForGEPIndex(Idx); + Register IdxN = Pair.first; bool IdxNIsKill = Pair.second; if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -793,7 +792,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, else return false; } else { - unsigned Reg = getRegForValue(Val); + Register Reg = getRegForValue(Val); if (!Reg) return false; Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); @@ -886,7 +885,6 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, Args.reserve(NumArgs); // Populate the argument list. - ImmutableCallSite CS(CI); for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { Value *V = CI->getOperand(ArgI); @@ -895,7 +893,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, ArgListEntry Entry; Entry.Val = V; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgI); + Entry.setAttributes(CI, ArgI); Args.push_back(Entry); } @@ -1002,7 +1000,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { // place these in any free register. if (IsAnyRegCC) { for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) { - unsigned Reg = getRegForValue(I->getArgOperand(i)); + Register Reg = getRegForValue(I->getArgOperand(i)); if (!Reg) return false; Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); @@ -1119,10 +1117,8 @@ bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName, bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, unsigned NumArgs) { - ImmutableCallSite CS(CI); - - FunctionType *FTy = CS.getFunctionType(); - Type *RetTy = CS.getType(); + FunctionType *FTy = CI->getFunctionType(); + Type *RetTy = CI->getType(); ArgListTy Args; Args.reserve(NumArgs); @@ -1137,13 +1133,13 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, ArgListEntry Entry; Entry.Val = V; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgI); + Entry.setAttributes(CI, ArgI); Args.push_back(Entry); } - TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args); + TLI.markLibCallAttributes(MF, CI->getCallingConv(), Args); CallLoweringInfo CLI; - CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs); + CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), *CI, NumArgs); return lowerCallTo(CLI); } @@ -1218,7 +1214,16 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // the various CC lowering callbacks. Flags.setByVal(); } - if (Arg.IsByVal || Arg.IsInAlloca) { + if (Arg.IsPreallocated) { + Flags.setPreallocated(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // preallocated. This way we can know how many bytes we should've + // allocated and how many bytes a callee cleanup function will pop. If we + // port preallocated to more targets, we'll have to add custom + // preallocated handling in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) { PointerType *Ty = cast<PointerType>(Arg.Ty); Type *ElementTy = Ty->getElementType(); unsigned FrameSize = @@ -1226,17 +1231,17 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // For ByVal, alignment should come from FE. BE will guess if this info // is not there, but there are cases it cannot get right. - unsigned FrameAlign = Arg.Alignment; + MaybeAlign FrameAlign = Arg.Alignment; if (!FrameAlign) - FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); + FrameAlign = Align(TLI.getByValTypeAlignment(ElementTy, DL)); Flags.setByValSize(FrameSize); - Flags.setByValAlign(Align(FrameAlign)); + Flags.setByValAlign(*FrameAlign); } if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); + Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); CLI.OutVals.push_back(Arg.Val); CLI.OutFlags.push_back(Flags); @@ -1249,29 +1254,26 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { assert(CLI.Call && "No call instruction specified."); CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); - if (CLI.NumResultRegs && CLI.CS) - updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + if (CLI.NumResultRegs && CLI.CB) + updateValueMap(CLI.CB, CLI.ResultReg, CLI.NumResultRegs); // Set labels for heapallocsite call. - if (CLI.CS) - if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite")) + if (CLI.CB) + if (MDNode *MD = CLI.CB->getMetadata("heapallocsite")) CLI.Call->setHeapAllocMarker(*MF, MD); return true; } bool FastISel::lowerCall(const CallInst *CI) { - ImmutableCallSite CS(CI); - - FunctionType *FuncTy = CS.getFunctionType(); - Type *RetTy = CS.getType(); + FunctionType *FuncTy = CI->getFunctionType(); + Type *RetTy = CI->getType(); ArgListTy Args; ArgListEntry Entry; - Args.reserve(CS.arg_size()); + Args.reserve(CI->arg_size()); - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { + for (auto i = CI->arg_begin(), e = CI->arg_end(); i != e; ++i) { Value *V = *i; // Skip empty types @@ -1282,14 +1284,14 @@ bool FastISel::lowerCall(const CallInst *CI) { Entry.Ty = V->getType(); // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin()); + Entry.setAttributes(CI, i - CI->arg_begin()); Args.push_back(Entry); } // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within fastLowerCall. bool IsTailCall = CI->isTailCall(); - if (IsTailCall && !isInTailCallPosition(CS, TM)) + if (IsTailCall && !isInTailCallPosition(*CI, TM)) IsTailCall = false; if (IsTailCall && MF->getFunction() .getFnAttribute("disable-tail-calls") @@ -1297,7 +1299,7 @@ bool FastISel::lowerCall(const CallInst *CI) { IsTailCall = false; CallLoweringInfo CLI; - CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) + CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI) .setTailCall(IsTailCall); return lowerCallTo(CLI); @@ -1307,7 +1309,7 @@ bool FastISel::selectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); // Handle simple inline asms. - if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) { + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) { // If the inline asm has side effects, then make sure that no local value // lives across by flushing the local value map. if (IA->hasSideEffects()) @@ -1322,12 +1324,19 @@ bool FastISel::selectCall(const User *I) { ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + if (Call->isConvergent()) + ExtraInfo |= InlineAsm::Extra_IsConvergent; ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::INLINEASM)) - .addExternalSymbol(IA->getAsmString().c_str()) - .addImm(ExtraInfo); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::INLINEASM)); + MIB.addExternalSymbol(IA->getAsmString().c_str()); + MIB.addImm(ExtraInfo); + + const MDNode *SrcLoc = Call->getMetadata("srcloc"); + if (SrcLoc) + MIB.addMetadata(SrcLoc); + return true; } @@ -1365,13 +1374,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); assert(DI->getVariable() && "Missing variable"); if (!FuncInfo.MF->getMMI().hasDebugInfo()) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI + << " (!hasDebugInfo)\n"); return true; } const Value *Address = DI->getAddress(); if (!Address || isa<UndefValue>(Address)) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI + << " (bad/undef address)\n"); return true; } @@ -1383,7 +1394,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { return true; Optional<MachineOperand> Op; - if (unsigned Reg = lookUpRegForValue(Address)) + if (Register Reg = lookUpRegForValue(Address)) Op = MachineOperand::CreateReg(Reg, false); // If we have a VLA that has a "use" in a metadata node that's then used @@ -1414,7 +1425,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI + << " (no materialized reg for address)\n"); } return true; } @@ -1425,9 +1437,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const Value *V = DI->getValue(); assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && "Expected inlined-at fields to agree"); - if (!V) { + if (!V || isa<UndefValue>(V)) { // Currently the optimizer can produce this; insert an undef to - // help debugging. Probably the optimizer should not do this. + // help debugging. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, DI->getVariable(), DI->getExpression()); } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { @@ -1449,14 +1461,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); - } else if (unsigned Reg = lookUpRegForValue(V)) { + } else if (Register Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, DI->getVariable(), DI->getExpression()); } else { - // We can't yet handle anything else here because it would require - // generating code, thus altering codegen because of debug info. + // We don't know how to handle other cases, so we drop. LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; @@ -1482,7 +1493,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::expect: { - unsigned ResultReg = getRegForValue(II->getArgOperand(0)); + Register ResultReg = getRegForValue(II->getArgOperand(0)); if (!ResultReg) return false; updateValueMap(II, ResultReg); @@ -1520,14 +1531,14 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) { if (!TLI.isTypeLegal(SrcVT)) return false; - unsigned InputReg = getRegForValue(I->getOperand(0)); + Register InputReg = getRegForValue(I->getOperand(0)); if (!InputReg) // Unhandled operand. Halt "fast" selection and bail. return false; bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); - unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + Register ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opcode, InputReg, InputRegIsKill); if (!ResultReg) return false; @@ -1539,7 +1550,7 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) { bool FastISel::selectBitCast(const User *I) { // If the bitcast doesn't change the type, just use the operand value. if (I->getType() == I->getOperand(0)->getType()) { - unsigned Reg = getRegForValue(I->getOperand(0)); + Register Reg = getRegForValue(I->getOperand(0)); if (!Reg) return false; updateValueMap(I, Reg); @@ -1556,13 +1567,13 @@ bool FastISel::selectBitCast(const User *I) { MVT SrcVT = SrcEVT.getSimpleVT(); MVT DstVT = DstEVT.getSimpleVT(); - unsigned Op0 = getRegForValue(I->getOperand(0)); + Register Op0 = getRegForValue(I->getOperand(0)); if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. - unsigned ResultReg = 0; + Register ResultReg; if (SrcVT == DstVT) { const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); @@ -1585,6 +1596,27 @@ bool FastISel::selectBitCast(const User *I) { return true; } +bool FastISel::selectFreeze(const User *I) { + Register Reg = getRegForValue(I->getOperand(0)); + if (!Reg) + // Unhandled operand. + return false; + + EVT ETy = TLI.getValueType(DL, I->getOperand(0)->getType()); + if (ETy == MVT::Other || !TLI.isTypeLegal(ETy)) + // Unhandled type, bail out. + return false; + + MVT Ty = ETy.getSimpleVT(); + const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty); + Register ResultReg = createResultReg(TyRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg); + + updateValueMap(I, ResultReg); + return true; +} + // Remove local value instructions starting from the instruction after // SavedLastLocalValue to the current function insert point. void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) @@ -1620,9 +1652,9 @@ bool FastISel::selectInstruction(const Instruction *I) { } // FastISel does not handle any operand bundles except OB_funclet. - if (ImmutableCallSite CS = ImmutableCallSite(I)) - for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) - if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) + if (auto *Call = dyn_cast<CallBase>(I)) + for (unsigned i = 0, e = Call->getNumOperandBundles(); i != e; ++i) + if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) return false; DbgLoc = I->getDebugLoc(); @@ -1723,14 +1755,14 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB, /// Emit an FNeg operation. bool FastISel::selectFNeg(const User *I, const Value *In) { - unsigned OpReg = getRegForValue(In); + Register OpReg = getRegForValue(In); if (!OpReg) return false; bool OpRegIsKill = hasTrivialKill(In); // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(DL, I->getType()); - unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, + Register ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, OpReg, OpRegIsKill); if (ResultReg) { updateValueMap(I, ResultReg); @@ -1745,12 +1777,12 @@ bool FastISel::selectFNeg(const User *I, const Value *In) { if (!TLI.isTypeLegal(IntVT)) return false; - unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + Register IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), ISD::BITCAST, OpReg, OpRegIsKill); if (!IntReg) return false; - unsigned IntResultReg = fastEmit_ri_( + Register IntResultReg = fastEmit_ri_( IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true, UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); if (!IntResultReg) @@ -1784,7 +1816,7 @@ bool FastISel::selectExtractValue(const User *U) { // Get the base result register. unsigned ResultReg; - DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0); + DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(Op0); if (I != FuncInfo.ValueMap.end()) ResultReg = I->second; else if (isa<Instruction>(Op0)) @@ -1916,7 +1948,7 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return selectCast(I, ISD::ZERO_EXTEND); if (DstVT.bitsLT(SrcVT)) return selectCast(I, ISD::TRUNCATE); - unsigned Reg = getRegForValue(I->getOperand(0)); + Register Reg = getRegForValue(I->getOperand(0)); if (!Reg) return false; updateValueMap(I, Reg); @@ -1926,6 +1958,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { case Instruction::ExtractValue: return selectExtractValue(I); + case Instruction::Freeze: + return selectFreeze(I); + case Instruction::PHI: llvm_unreachable("FastISel shouldn't visit PHI nodes!"); @@ -1988,7 +2023,7 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, /// instruction with an immediate operand using fastEmit_ri. /// If that fails, it materializes the immediate into a register and try /// fastEmit_rr instead. -unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, +Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType) { // If this is a multiply by a power of two, emit this as a shift left. if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { @@ -2007,10 +2042,10 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, return 0; // First check if immediate type is legal. If not, we can't use the ri form. - unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg) return ResultReg; - unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + Register MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); bool IsImmKill = true; if (!MaterialReg) { // This is a bit ugly/slow, but failing here means falling out of @@ -2031,19 +2066,19 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill); } -unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { +Register FastISel::createResultReg(const TargetRegisterClass *RC) { return MRI.createVirtualRegister(RC); } -unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, +Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, unsigned OpNum) { - if (Register::isVirtualRegister(Op)) { + if (Op.isVirtual()) { const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); if (!MRI.constrainRegClass(Op, RegClass)) { // If it's not legal to COPY between the register classes, something // has gone very wrong before we got here. - unsigned NewOp = createResultReg(RegClass); + Register NewOp = createResultReg(RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), NewOp).addReg(Op); return NewOp; @@ -2052,21 +2087,21 @@ unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, return Op; } -unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass *RC) { - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); return ResultReg; } -unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) @@ -2082,13 +2117,13 @@ unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); @@ -2106,14 +2141,14 @@ unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); @@ -2134,12 +2169,12 @@ unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) @@ -2156,13 +2191,13 @@ unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) @@ -2181,12 +2216,12 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode, const TargetRegisterClass *RC, const ConstantFP *FPImm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) @@ -2200,13 +2235,13 @@ unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); @@ -2226,9 +2261,9 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) @@ -2242,9 +2277,9 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, +Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { - unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + Register ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(Register::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); @@ -2256,7 +2291,7 @@ unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, /// Emit MachineInstrs to compute the value of Op with all but the least /// significant bit set to zero. -unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { +Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); } @@ -2318,7 +2353,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) DbgLoc = Inst->getDebugLoc(); - unsigned Reg = getRegForValue(PHIOp); + Register Reg = getRegForValue(PHIOp); if (!Reg) { FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; @@ -2364,7 +2399,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { // Figure out which vreg this is going into. If there is no assigned vreg yet // then there actually was no reference to it. Perhaps the load is referenced // by a dead instruction. - unsigned LoadReg = getRegForValue(LI); + Register LoadReg = getRegForValue(LI); if (!LoadReg) return false; @@ -2407,18 +2442,18 @@ MachineMemOperand * FastISel::createMachineMemOperandFor(const Instruction *I) const { const Value *Ptr; Type *ValTy; - unsigned Alignment; + MaybeAlign Alignment; MachineMemOperand::Flags Flags; bool IsVolatile; if (const auto *LI = dyn_cast<LoadInst>(I)) { - Alignment = LI->getAlignment(); + Alignment = LI->getAlign(); IsVolatile = LI->isVolatile(); Flags = MachineMemOperand::MOLoad; Ptr = LI->getPointerOperand(); ValTy = LI->getType(); } else if (const auto *SI = dyn_cast<StoreInst>(I)) { - Alignment = SI->getAlignment(); + Alignment = SI->getAlign(); IsVolatile = SI->isVolatile(); Flags = MachineMemOperand::MOStore; Ptr = SI->getPointerOperand(); @@ -2434,8 +2469,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { AAMDNodes AAInfo; I->getAAMetadata(AAInfo); - if (Alignment == 0) // Ensure that codegen never sees alignment 0. - Alignment = DL.getABITypeAlignment(ValTy); + if (!Alignment) // Ensure that codegen never sees alignment 0. + Alignment = DL.getABITypeAlign(ValTy); unsigned Size = DL.getTypeStoreSize(ValTy); @@ -2449,7 +2484,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { Flags |= MachineMemOperand::MOInvariant; return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, - Alignment, AAInfo, Ranges); + *Alignment, AAInfo, Ranges); } CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index fa33400cd4b3..5cf83cff3a90 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/ADT/APInt.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -85,7 +86,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); - unsigned StackAlign = TFI->getStackAlignment(); DA = DAG->getDivergenceAnalysis(); // Check whether the function can return without sret-demotion. @@ -130,19 +130,31 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. + const Align StackAlign = TFI->getStackAlign(); for (const BasicBlock &BB : *Fn) { for (const Instruction &I : BB) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { Type *Ty = AI->getAllocatedType(); - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), - AI->getAlignment()); + Align TyPrefAlign = MF->getDataLayout().getPrefTypeAlign(Ty); + // The "specified" alignment is the alignment written on the alloca, + // or the preferred alignment of the type if none is specified. + // + // (Unspecified alignment on allocas will be going away soon.) + Align SpecifiedAlign = AI->getAlign(); + + // If the preferred alignment of the type is higher than the specified + // alignment of the alloca, promote the alignment, as long as it doesn't + // require realigning the stack. + // + // FIXME: Do we really want to second-guess the IR in isel? + Align Alignment = + std::max(std::min(TyPrefAlign, StackAlign), SpecifiedAlign); // Static allocas can be folded into the initial stack frame // adjustment. For targets that don't realign the stack, don't // do this if there is an extra alignment requirement. if (AI->isStaticAlloca() && - (TFI->isStackRealignable() || (Align <= StackAlign))) { + (TFI->isStackRealignable() || (Alignment <= StackAlign))) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize(); @@ -154,15 +166,15 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) { FrameIndex = MF->getFrameInfo().CreateFixedObject( TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true); - MF->getFrameInfo().setObjectAlignment(FrameIndex, Align); + MF->getFrameInfo().setObjectAlignment(FrameIndex, Alignment); } else { - FrameIndex = - MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI); + FrameIndex = MF->getFrameInfo().CreateStackObject(TySize, Alignment, + false, AI); } // Scalable vectors may need a special StackID to distinguish // them from other (fixed size) stack objects. - if (Ty->isVectorTy() && Ty->getVectorIsScalable()) + if (isa<ScalableVectorType>(Ty)) MF->getFrameInfo().setStackID(FrameIndex, TFI->getStackIDForScalableVectors()); @@ -176,21 +188,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // FIXME: Overaligned static allocas should be grouped into // a single dynamic allocation instead of using a separate // stack allocation for each one. - if (Align <= StackAlign) - Align = 0; // Inform the Frame Information that we have variable-sized objects. - MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI); + MF->getFrameInfo().CreateVariableSizedObject( + Alignment <= StackAlign ? Align(1) : Alignment, AI); } } // Look for inline asm that clobbers the SP register. - if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - ImmutableCallSite CS(&I); - if (isa<InlineAsm>(CS.getCalledValue())) { + if (auto *Call = dyn_cast<CallBase>(&I)) { + if (Call->isInlineAsm()) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector<TargetLowering::AsmOperandInfo> Ops = - TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS); + TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, + *Call); for (TargetLowering::AsmOperandInfo &Op : Ops) { if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). @@ -354,7 +365,7 @@ void FunctionLoweringInfo::clear() { } /// CreateReg - Allocate a single virtual register for the given type. -unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { +Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { return RegInfo->createVirtualRegister( MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); } @@ -366,29 +377,29 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { +Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); - unsigned FirstReg = 0; + Register FirstReg; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT); unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = CreateReg(RegisterVT, isDivergent); + Register R = CreateReg(RegisterVT, isDivergent); if (!FirstReg) FirstReg = R; } } return FirstReg; } -unsigned FunctionLoweringInfo::CreateRegs(const Value *V) { - return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) && - DA->isDivergent(V)); +Register FunctionLoweringInfo::CreateRegs(const Value *V) { + return CreateRegs(V->getType(), DA && DA->isDivergent(V) && + !TLI->requiresUniformRegister(*MF, V)); } /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the @@ -397,7 +408,7 @@ unsigned FunctionLoweringInfo::CreateRegs(const Value *V) { /// the larger bit width by zero extension. The bit width must be no smaller /// than the LiveOutInfo's existing bit width. const FunctionLoweringInfo::LiveOutInfo * -FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { +FunctionLoweringInfo::GetLiveOutRegInfo(Register Reg, unsigned BitWidth) { if (!LiveOutRegInfo.inBounds(Reg)) return nullptr; @@ -407,7 +418,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (BitWidth > LOI->Known.getBitWidth()) { LOI->NumSignBits = 1; - LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */); + LOI->Known = LOI->Known.anyext(BitWidth); } return LOI; @@ -431,7 +442,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); unsigned BitWidth = IntVT.getSizeInBits(); - unsigned DestReg = ValueMap[PN]; + Register DestReg = ValueMap[PN]; if (!Register::isVirtualRegister(DestReg)) return; LiveOutRegInfo.grow(DestReg); @@ -452,7 +463,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { } else { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); - unsigned SrcReg = ValueMap[V]; + Register SrcReg = ValueMap[V]; if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; @@ -487,8 +498,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when " "its CopyToReg node was created."); - unsigned SrcReg = ValueMap[V]; - if (!Register::isVirtualRegister(SrcReg)) { + Register SrcReg = ValueMap[V]; + if (!SrcReg.isVirtual()) { DestLOI.IsValid = false; return; } @@ -522,11 +533,11 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { return INT_MAX; } -unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( +Register FunctionLoweringInfo::getCatchPadExceptionPointerVReg( const Value *CPI, const TargetRegisterClass *RC) { MachineRegisterInfo &MRI = MF->getRegInfo(); auto I = CatchPadExceptionPointers.insert({CPI, 0}); - unsigned &VReg = I.first->second; + Register &VReg = I.first->second; if (I.second) VReg = MRI.createVirtualRegister(RC); assert(VReg && "null vreg in exception pointer table!"); @@ -534,7 +545,7 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( } const Value * -FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { +FunctionLoweringInfo::getValueFromVirtualReg(Register Vreg) { if (VirtReg2Value.empty()) { SmallVector<EVT, 4> ValueVTs; for (auto &P : ValueMap) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 176d71643e1a..0e4e99214aa2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -28,6 +29,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "instr-emitter" @@ -84,9 +86,9 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// implicit physical register output. void InstrEmitter:: EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, - unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { - unsigned VRBase = 0; - if (Register::isVirtualRegister(SrcReg)) { + Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap) { + Register VRBase; + if (SrcReg.isVirtual()) { // Just use the input register directly! SDValue Op(Node, ResNo); if (IsClone) @@ -113,8 +115,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { - unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (Register::isVirtualRegister(DestReg)) { + Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (DestReg.isVirtual()) { VRBase = DestReg; Match = false; } else if (DestReg != SrcReg) @@ -190,16 +192,19 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); unsigned NumResults = CountResults(Node); - for (unsigned i = 0; i < II.getNumDefs(); ++i) { + bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && + II.isVariadic() && II.variadicOpsAreDefs(); + unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs(); + for (unsigned i = 0; i < NumVRegs; ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. - unsigned VRBase = 0; + Register VRBase; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); // Always let the value type influence the used register class. The @@ -216,10 +221,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, RC = VTRC; } - if (II.OpInfo[i].isOptionalDef()) { + if (II.OpInfo != nullptr && II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); - assert(Register::isPhysicalRegister(VRBase)); + assert(VRBase.isPhysical()); MIB.addReg(VRBase, RegState::Define); } @@ -263,8 +268,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. -unsigned InstrEmitter::getVR(SDValue Op, - DenseMap<SDValue, unsigned> &VRBaseMap) { +Register InstrEmitter::getVR(SDValue Op, + DenseMap<SDValue, Register> &VRBaseMap) { if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. @@ -278,7 +283,7 @@ unsigned InstrEmitter::getVR(SDValue Op, return VReg; } - DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); assert(I != VRBaseMap.end() && "Node emitted out of order - late"); return I->second; } @@ -292,13 +297,13 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"); // Get/emit the operand. - unsigned VReg = getVR(Op, VRBaseMap); + Register VReg = getVR(Op, VRBaseMap); const MCInstrDesc &MCID = MIB->getDesc(); bool isOptDef = IIOpNum < MCID.getNumOperands() && @@ -363,7 +368,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, @@ -373,7 +378,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { MIB.addFPImm(F->getConstantFPValue()); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { - unsigned VReg = R->getReg(); + Register VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); const TargetRegisterClass *IIRC = II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) @@ -409,23 +414,14 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { int Offset = CP->getOffset(); - unsigned Align = CP->getAlignment(); - Type *Type = CP->getType(); - // MachineConstantPool wants an explicit alignment. - if (Align == 0) { - Align = MF->getDataLayout().getPrefTypeAlignment(Type); - if (Align == 0) { - // Alignment of vector types. FIXME! - Align = MF->getDataLayout().getTypeAllocSize(Type); - } - } + Align Alignment = CP->getAlign(); unsigned Idx; MachineConstantPool *MCP = MF->getConstantPool(); if (CP->isMachineConstantPoolEntry()) - Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); + Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Alignment); else - Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); + Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Alignment); MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); @@ -446,7 +442,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } } -unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, +Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx, MVT VT, bool isDivergent, const DebugLoc &DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); @@ -473,9 +469,9 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, /// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned) { - unsigned VRBase = 0; + Register VRBase; unsigned Opc = Node->getMachineOpcode(); // If the node is only used by a CopyToReg and the dest reg is a vreg, use @@ -483,8 +479,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { - unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (Register::isVirtualRegister(DestReg)) { + Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (DestReg.isVirtual()) { VRBase = DestReg; break; } @@ -499,7 +495,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); - unsigned Reg; + Register Reg; MachineInstr *DefMI; RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); if (R && Register::isPhysicalRegister(R->getReg())) { @@ -510,7 +506,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, DefMI = MRI->getVRegDef(Reg); } - unsigned SrcReg, DstReg, DefSubIdx; + Register SrcReg, DstReg; + unsigned DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && SubIdx == DefSubIdx && @@ -528,19 +525,19 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) Reg = ConstrainForSubReg(Reg, SubIdx, Node->getOperand(0).getSimpleValueType(), Node->isDivergent(), Node->getDebugLoc()); // Create the destreg if it is missing. - if (VRBase == 0) + if (!VRBase) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. MachineInstrBuilder CopyMI = BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase); - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) CopyMI.addReg(Reg, 0, SubIdx); else CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); @@ -606,7 +603,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, /// void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. @@ -626,7 +623,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// void InstrEmitter::EmitRegSequence(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned) { unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); @@ -675,7 +672,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, /// MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { MDNode *Var = SD->getVariable(); MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -720,7 +717,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // they happen and transfer the debug info, but trying to guarantee that // in all cases would be very fragile; this is a safeguard for any // that were missed. - DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); if (I==VRBaseMap.end()) MIB.addReg(0U); // undef else @@ -781,7 +778,7 @@ InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially @@ -829,7 +826,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned NumImpUses = 0; unsigned NodeOperands = countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); - bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; + bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && + II.isVariadic() && II.variadicOpsAreDefs(); + bool HasPhysRegOuts = NumResults > NumDefs && + II.getImplicitDefs() != nullptr && !HasVRegVariadicDefs; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -979,7 +979,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG @@ -992,7 +992,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { - unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() && SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { @@ -1002,7 +1002,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); break; } - unsigned SrcReg; + Register SrcReg; if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) SrcReg = R->getReg(); else diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index cfe99dd977b5..c3567eae9161 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -17,13 +17,15 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" namespace llvm { class MachineInstrBuilder; class MCInstrDesc; +class SDDbgLabel; class SDDbgValue; +class TargetLowering; class LLVM_LIBRARY_VISIBILITY InstrEmitter { MachineFunction *MF; @@ -39,19 +41,19 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// implicit physical register output. void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, - unsigned SrcReg, - DenseMap<SDValue, unsigned> &VRBaseMap); + Register SrcReg, + DenseMap<SDValue, Register> &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. - unsigned getVR(SDValue Op, - DenseMap<SDValue, unsigned> &VRBaseMap); + Register getVR(SDValue Op, + DenseMap<SDValue, Register> &VRBaseMap); /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is @@ -60,7 +62,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II @@ -71,18 +73,18 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// ConstrainForSubReg - Try to constrain VReg to a register class that /// supports SubIdx sub-registers. Emit a copy if that isn't possible. /// Return the virtual register to use. - unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT, + Register ConstrainForSubReg(Register VReg, unsigned SubIdx, MVT VT, bool isDivergent, const DebugLoc &DL); /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + void EmitSubregNode(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. @@ -90,11 +92,11 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// register is constrained to be in a particular register class. /// void EmitCopyToRegClassNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned); public: /// CountResults - The results of target nodes have register or immediate @@ -105,7 +107,7 @@ public: /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr *EmitDbgValue(SDDbgValue *SD, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); /// Generate machine instruction for a dbg_label node. MachineInstr *EmitDbgLabel(SDDbgLabel *SD); @@ -113,7 +115,7 @@ public: /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { if (Node->isMachineOpcode()) EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); else @@ -132,9 +134,9 @@ public: private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); }; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 91404ee7728b..6a6004c158bb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -328,7 +328,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); if (Extend) { SDValue Result = DAG.getExtLoad( ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, @@ -348,7 +348,7 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { EVT VT = CP->getValueType(0); SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); SDValue Result = DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); @@ -387,7 +387,9 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3); // Store the scalar value. - Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT); + Ch = DAG.getTruncStore( + Ch, dl, Tmp2, StackPtr2, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), SPFI)); @@ -434,7 +436,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // We generally can't do this one for long doubles. SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); @@ -444,8 +445,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), SDLoc(CFP), MVT::i32); - return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, - MMOFlags, AAInfo); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), + ST->getOriginalAlign(), MMOFlags, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -454,7 +455,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -467,12 +468,12 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, - MMOFlags, AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), + ST->getOriginalAlign(), MMOFlags, AAInfo); Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - MinAlign(Alignment, 4U), MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -487,7 +488,6 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Ptr = ST->getBasePtr(); SDLoc dl(Node); - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); @@ -528,9 +528,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote stores to same size type"); Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -553,7 +552,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -575,7 +574,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); + RoundVT, ST->getOriginalAlign(), MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -584,10 +583,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore( - Chain, dl, Hi, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -596,18 +594,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, + ST->getOriginalAlign(), MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Lo = DAG.getTruncStore( - Chain, dl, Value, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo); } // The order of the stores doesn't matter. @@ -643,15 +640,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { if (TLI.isTypeLegal(StVT)) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); } else { // The in-memory type isn't legal. Truncate to the type it would promote // to, and then do a truncstore. Value = DAG.getNode(ISD::TRUNCATE, dl, TLI.getTypeToTransformTo(*DAG.getContext(), StVT), Value); - Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - StVT, Alignment, MMOFlags, AAInfo); + Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), StVT, + ST->getOriginalAlign(), MMOFlags, AAInfo); } ReplaceNode(SDValue(Node, 0), Result); @@ -721,7 +719,6 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); - unsigned Alignment = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); @@ -748,9 +745,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); + SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), NVT, + LD->getOriginalAlign(), MMOFlags, AAInfo); Ch = Result.getValue(1); // The chain. @@ -788,16 +785,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, - AAInfo); + LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(), + MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, - AAInfo); + ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -817,16 +813,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, - AAInfo); + LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(), + MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, - AAInfo); + ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -933,7 +928,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT); Value = ValRes; Chain = Result.getValue(1); break; @@ -1009,6 +1004,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; + case ISD::STRICT_FP_TO_FP16: case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: case ISD::STRICT_LRINT: @@ -1131,7 +1127,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: - case ISD::UDIVFIX: { + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -1383,19 +1381,26 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); // Store the subvector. - Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo()); + Ch = DAG.getStore( + Ch, dl, Part, SubStackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + assert((Node->getOpcode() == ISD::BUILD_VECTOR || + Node->getOpcode() == ISD::CONCAT_VECTORS) && + "Unexpected opcode!"); + // We can't handle this case efficiently. Allocate a sufficiently - // aligned object on the stack, store each element into it, then load + // aligned object on the stack, store each operand into it, then load // the result as a vector. // Create the stack frame object. EVT VT = Node->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + EVT MemVT = isa<BuildVectorSDNode>(Node) ? VT.getVectorElementType() + : Node->getOperand(0).getValueType(); SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); @@ -1404,7 +1409,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Emit a store of each element to the stack slot. SmallVector<SDValue, 8> Stores; - unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + unsigned TypeByteSize = MemVT.getSizeInBits() / 8; assert(TypeByteSize > 0 && "Vector element type too small for stack store!"); // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -1413,16 +1418,15 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { unsigned Offset = TypeByteSize*i; - SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); - Idx = DAG.getMemBasePlusOffset(FIPtr, Idx, dl); + SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, Offset, dl); // If the destination vector element type is narrower than the source // element type, only store the bits necessary. - if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { + if (MemVT.bitsLT(Node->getOperand(i).getValueType())) Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, - PtrInfo.getWithOffset(Offset), EltVT)); - } else + PtrInfo.getWithOffset(Offset), MemVT)); + else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset))); } @@ -1600,13 +1604,17 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); - unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); - unsigned StackAlign = - DAG.getSubtarget().getFrameLowering()->getStackAlignment(); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value - if (Align > StackAlign) + Align Alignment = cast<ConstantSDNode>(Tmp3)->getAlignValue(); + const TargetFrameLowering *TFL = DAG.getSubtarget().getFrameLowering(); + unsigned Opc = + TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ? + ISD::ADD : ISD::SUB; + + Align StackAlign = TFL->getStackAlign(); + Tmp1 = DAG.getNode(Opc, dl, VT, SP, Size); // Value + if (Alignment > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Align, dl, VT)); + DAG.getConstant(-Alignment.value(), dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), @@ -1968,7 +1976,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { Constant *CP = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); return DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), @@ -2360,36 +2368,34 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, // Get the stack frame index of a 8 byte buffer. SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); - // word offset constant for Hi/Lo address computation - SDValue WordOff = DAG.getConstant(sizeof(int), dl, - StackSlot.getValueType()); - // set up Hi and Lo (into buffer) address based on endian - SDValue Hi = StackSlot; - SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), - StackSlot, WordOff); - if (DAG.getDataLayout().isLittleEndian()) - std::swap(Hi, Lo); - + SDValue Lo = Op0; // if signed map to unsigned space - SDValue Op0Mapped; if (isSigned) { - // constant used to invert sign bit (signed to unsigned mapping) - SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32); - Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); - } else { - Op0Mapped = Op0; + // Invert sign bit (signed to unsigned mapping). + Lo = DAG.getNode(ISD::XOR, dl, MVT::i32, Lo, + DAG.getConstant(0x80000000u, dl, MVT::i32)); } - // store the lo of the constructed double - based on integer input - SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo, + // Initial hi portion of constructed double. + SDValue Hi = DAG.getConstant(0x43300000u, dl, MVT::i32); + + // If this a big endian target, swap the lo and high data. + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + SDValue MemChain = DAG.getEntryNode(); + + // Store the lo of the constructed double. + SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot, MachinePointerInfo()); - // initial hi portion of constructed double - SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); - // store the hi of the constructed double - biased exponent + // Store the hi of the constructed double. + SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, 4, dl); SDValue Store2 = - DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo()); + DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo()); + MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + // load the constructed double SDValue Load = - DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo()); + DAG.getLoad(MVT::f64, dl, MemChain, StackSlot, MachinePointerInfo()); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2417,10 +2423,65 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, } return Result; } - assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); // Code below here assumes !isSigned without checking again. - // FIXME: This can produce slightly incorrect results. See details in - // FIXME: https://reviews.llvm.org/D69275 + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + + // TODO: Generalize this for use with other types. + if ((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) { + LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32\n"); + // For unsigned conversions, convert them to signed conversions using the + // algorithm from the x86_64 __floatundisf in compiler_rt. That method + // should be valid for i32->f32 as well. + + // TODO: This really should be implemented using a branch rather than a + // select. We happen to get lucky and machinesink does the right + // thing most of the time. This would be a good candidate for a + // pseudo-op, or, even better, for whole-function isel. + EVT SetCCVT = getSetCCResultType(SrcVT); + + SDValue SignBitTest = DAG.getSetCC( + dl, SetCCVT, Op0, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); + + EVT ShiftVT = TLI.getShiftAmountTy(SrcVT, DAG.getDataLayout()); + SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); + SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Op0, ShiftConst); + SDValue AndConst = DAG.getConstant(1, dl, SrcVT); + SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Op0, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); + + SDValue Slow, Fast; + if (Node->isStrictFPOpcode()) { + // In strict mode, we must avoid spurious exceptions, and therefore + // must make sure to only emit a single STRICT_SINT_TO_FP. + SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Op0); + Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other }, + { Node->getOperand(0), InCvt }); + Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other }, + { Fast.getValue(1), Fast, Fast }); + Chain = Slow.getValue(1); + // The STRICT_SINT_TO_FP inherits the exception mode from the + // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can + // never raise any exception. + SDNodeFlags Flags; + Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept()); + Fast->setFlags(Flags); + Flags.setNoFPExcept(true); + Slow->setFlags(Flags); + } else { + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Or); + Slow = DAG.getNode(ISD::FADD, dl, DestVT, SignCvt, SignCvt); + Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + } + + return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast); + } + + // The following optimization is valid only if every value in SrcVT (when + // treated as signed) is representable in DestVT. Check that the mantissa + // size of DestVT is >= than the number of bits in SrcVT -1. + assert(APFloat::semanticsPrecision(DAG.EVTToAPFloatSemantics(DestVT)) >= + SrcVT.getSizeInBits() - 1 && + "Cannot perform lossless SINT_TO_FP!"); SDValue Tmp1; if (Node->isStrictFPOpcode()) { @@ -2454,9 +2515,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); - Alignment = std::min(Alignment, 4u); + Alignment = commonAlignment(Alignment, 4); SDValue FudgeInReg; if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad( @@ -2765,6 +2826,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::FLT_ROUNDS_: Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); break; case ISD::EH_RETURN: case ISD::EH_LABEL: @@ -3090,14 +3152,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } unsigned Idx = Mask[i]; if (Idx < NumElems) - Ops.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, + DAG.getVectorIdxConstant(Idx, dl))); else - Ops.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, - DAG.getConstant(Idx - NumElems, dl, - TLI.getVectorIdxTy(DAG.getDataLayout())))); + Ops.push_back( + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, + DAG.getVectorIdxConstant(Idx - NumElems, dl))); } Tmp1 = DAG.getBuildVector(VT, dl, Ops); @@ -3219,6 +3279,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); } break; + case ISD::STRICT_FP16_TO_FP: + if (Node->getValueType(0) != MVT::f32) { + // We can extend to types bigger than f32 in two steps without changing + // the result. Since "f16 -> f32" is much more commonly available, give + // CodeGen the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + {Node->getValueType(0), MVT::Other}, + {Res.getValue(1), Res}); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + } + break; case ISD::FP_TO_FP16: LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { @@ -3273,26 +3348,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::UREM: - case ISD::SREM: { - EVT VT = Node->getValueType(0); - bool isSigned = Node->getOpcode() == ISD::SREM; - unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; - Tmp2 = Node->getOperand(0); - Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { - SDVTList VTs = DAG.getVTList(VT, VT); - Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); - Results.push_back(Tmp1); - } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { - // X % Y -> X-X/Y*Y - Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); - Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + case ISD::SREM: + if (TLI.expandREM(Node, Tmp1, DAG)) Results.push_back(Tmp1); - } break; - } case ISD::UDIV: case ISD::SDIV: { bool isSigned = Node->getOpcode() == ISD::SDIV; @@ -3420,7 +3479,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(TLI.expandFixedPointMul(Node, DAG)); break; case ISD::SDIVFIX: + case ISD::SDIVFIXSAT: case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node), Node->getOperand(0), Node->getOperand(1), @@ -3457,8 +3518,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); // Add of the sum and the carry. + SDValue One = DAG.getConstant(1, dl, VT); SDValue CarryExt = - DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1); + DAG.getNode(ISD::AND, dl, VT, DAG.getZExtOrTrunc(Carry, dl, VT), One); SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt); // Second check for overflow. If we are adding, we can only overflow if the @@ -3780,12 +3842,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Scalars; for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0), - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue Sh = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1), - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Ex = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), + Node->getOperand(0), DAG.getVectorIdxConstant(Idx, dl)); + SDValue Sh = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), + Node->getOperand(1), DAG.getVectorIdxConstant(Idx, dl)); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } @@ -4038,6 +4100,14 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128, Results); break; + case ISD::FROUNDEVEN: + case ISD::STRICT_FROUNDEVEN: + ExpandFPLibCall(Node, RTLIB::ROUNDEVEN_F32, + RTLIB::ROUNDEVEN_F64, + RTLIB::ROUNDEVEN_F80, + RTLIB::ROUNDEVEN_F128, + RTLIB::ROUNDEVEN_PPCF128, Results); + break; case ISD::FPOWI: case ISD::STRICT_FPOWI: { RTLIB::Libcall LC; @@ -4132,6 +4202,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); } break; + case ISD::STRICT_FP16_TO_FP: { + if (Node->getValueType(0) == MVT::f32) { + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall( + DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Node->getOperand(1), CallOptions, + SDLoc(Node), Node->getOperand(0)); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + } + break; + } case ISD::FP_TO_FP16: { RTLIB::Libcall LC = RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); @@ -4139,6 +4220,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Results.push_back(ExpandLibCall(LC, Node, false)); break; } + case ISD::STRICT_FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Unable to expand strict_fp_to_fp16"); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1), + CallOptions, SDLoc(Node), Node->getOperand(0)); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } case ISD::FSUB: case ISD::STRICT_FSUB: ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, @@ -4240,8 +4334,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: - // Zero extend the argument. - Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + // Zero extend the argument unless its cttz, then use any_extend. + if (Node->getOpcode() == ISD::CTTZ || + Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF) + Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0)); + else + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + if (Node->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off @@ -4503,6 +4602,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FTRUNC: case ISD::FNEG: case ISD::FSQRT: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index f191160dee4f..7e8ad28f9b14 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -113,6 +113,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; case ISD::STRICT_FROUND: case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; + case ISD::STRICT_FROUNDEVEN: + case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break; case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::STRICT_FSQRT: @@ -125,6 +127,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::FREEZE: R = SoftenFloatRes_FREEZE(N); break; case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: @@ -184,6 +187,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) { + EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::FREEZE, SDLoc(N), Ty, + GetSoftenedFloat(N->getOperand(0))); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -609,6 +618,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { RTLIB::ROUND_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUNDEVEN(SDNode *N) { + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUNDEVEN_F32, + RTLIB::ROUNDEVEN_F64, + RTLIB::ROUNDEVEN_F80, + RTLIB::ROUNDEVEN_F128, + RTLIB::ROUNDEVEN_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, @@ -658,8 +676,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags, - L->getAAInfo()); + L->getPointerInfo(), NVT, L->getOriginalAlign(), + MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -669,8 +687,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Do a non-extending load followed by FP_EXTEND. NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(), - MMOFlags, L->getAAInfo()); + L->getPointerInfo(), L->getMemoryVT(), + L->getOriginalAlign(), MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -1166,10 +1184,13 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; case ISD::STRICT_FPOWI: case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FREEZE: ExpandFloatRes_FREEZE(N, Lo, Hi); break; case ISD::STRICT_FRINT: case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; case ISD::STRICT_FROUND: case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; + case ISD::STRICT_FROUNDEVEN: + case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break; case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::STRICT_FSQRT: @@ -1459,6 +1480,17 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, RTLIB::POWI_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + + SDLoc dl(N); + GetExpandedFloat(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FREEZE, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::FREEZE, dl, Hi.getValueType(), Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), @@ -1485,6 +1517,16 @@ void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, RTLIB::ROUND_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FROUNDEVEN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUNDEVEN_F32, + RTLIB::ROUNDEVEN_F64, + RTLIB::ROUNDEVEN_F80, + RTLIB::ROUNDEVEN_F128, + RTLIB::ROUNDEVEN_PPCF128), Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -2117,6 +2159,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -2328,12 +2371,10 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { // Load the value as an integer value with the same number of bits. EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT, - SDLoc(N), L->getChain(), L->getBasePtr(), - L->getOffset(), L->getPointerInfo(), IVT, - L->getAlignment(), - L->getMemOperand()->getFlags(), - L->getAAInfo()); + SDValue newL = DAG.getLoad( + L->getAddressingMode(), L->getExtensionType(), IVT, SDLoc(N), + L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), IVT, + L->getOriginalAlign(), L->getMemOperand()->getFlags(), L->getAAInfo()); // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); @@ -2412,3 +2453,421 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { } +//===----------------------------------------------------------------------===// +// Half Result Soft Promotion +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Soft promote half result " << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"); + SDValue R = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); + return; + } + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SoftPromoteHalfResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soft promote this operator's result!"); + + case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break; + case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break; + case ISD::EXTRACT_VECTOR_ELT: + R = SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FCOPYSIGN: R = SoftPromoteHalfRes_FCOPYSIGN(N); break; + case ISD::STRICT_FP_ROUND: + case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break; + + // Unary FP Operations + case ISD::FABS: + case ISD::FCBRT: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FREEZE: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FROUNDEVEN: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; + + // Binary FP Operations + case ISD::FADD: + case ISD::FDIV: + case ISD::FMAXIMUM: + case ISD::FMINIMUM: + case ISD::FMAXNUM: + case ISD::FMINNUM: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: R = SoftPromoteHalfRes_BinOp(N); break; + + case ISD::FMA: // FMA is same as FMAD + case ISD::FMAD: R = SoftPromoteHalfRes_FMAD(N); break; + + case ISD::FPOWI: R = SoftPromoteHalfRes_FPOWI(N); break; + + case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break; + case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break; + case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; + } + + if (R.getNode()) + SetSoftPromotedHalf(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BITCAST(SDNode *N) { + return BitConvertToInteger(N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ConstantFP(SDNode *N) { + ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); + + // Get the (bit-cast) APInt of the APFloat and build an integer constant + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), + MVT::i16); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + NewOp.getValueType().getVectorElementType(), NewOp, + N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FCOPYSIGN(SDNode *N) { + SDValue LHS = GetSoftPromotedHalf(N->getOperand(0)); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + SDLoc dl(N); + + EVT LVT = LHS.getValueType(); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // First get the sign bit of second operand. + SDValue SignBit = DAG.getNode( + ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT), + DAG.getConstant(RSize - 1, dl, + TLI.getShiftAmountTy(RVT, DAG.getDataLayout()))); + SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); + if (SizeDiff > 0) { + SignBit = + DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); + } else if (SizeDiff < 0) { + SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); + SignBit = + DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); + } + + // Clear the sign bit of the first operand. + SDValue Mask = DAG.getNode( + ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT), + DAG.getConstant(LSize - 1, dl, + TLI.getShiftAmountTy(LVT, DAG.getDataLayout()))); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT)); + LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); + + // Or the value with the sign bit. + return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); + SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); + SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2)); + SDLoc dl(N); + + // Promote to the larger FP type. + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + Op2 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op2); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2); + + // Convert back to FP16 as an integer. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); + SDValue Op1 = N->getOperand(1); + SDLoc dl(N); + + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1); + + // Convert back to FP16 as an integer. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { + if (N->isStrictFPOpcode()) { + SDValue Res = + DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other}, + {N->getOperand(0), N->getOperand(1)}); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; + } + + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), MVT::i16, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + + // Load the value as an integer value with the same number of bits. + assert(L->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension!"); + SDValue NewL = + DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), MVT::i16, + SDLoc(N), L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), MVT::i16, L->getOriginalAlign(), + L->getMemOperand()->getFlags(), L->getAAInfo()); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) { + SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); + SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2)); + return DAG.getSelect(SDLoc(N), Op1.getValueType(), N->getOperand(0), Op1, + Op2); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) { + SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2)); + SDValue Op3 = GetSoftPromotedHalf(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), Op2.getValueType(), + N->getOperand(0), N->getOperand(1), Op2, Op3, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); + + // Round the value to the softened type. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(MVT::i16); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Promote to the larger FP type. + Op = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op); + + // Convert back to FP16 as an integer. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); + SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); + SDLoc dl(N); + + // Promote to the larger FP type. + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1); + + // Convert back to FP16 as an integer. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +//===----------------------------------------------------------------------===// +// Half Operand Soft Promotion +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Soft promote half operand " << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { + LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); + return false; + } + + // Nodes that use a promotion-requiring floating point operand, but doesn't + // produce a soft promotion-requiring floating point result, need to be + // legalized to use the soft promoted float operand. Nodes that produce at + // least one soft promotion-requiring floating point result have their + // operands legalized as a part of PromoteFloatResult. + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "SoftPromoteHalfOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to soft promote this operator's operand!"); + + case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break; + case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break; + case ISD::STRICT_FP_EXTEND: + case ISD::FP_EXTEND: Res = SoftPromoteHalfOp_FP_EXTEND(N); break; + case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break; + case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break; + } + + if (!Res.getNode()) + return false; + + assert(Res.getNode() != N && "Expected a new node!"); + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_BITCAST(SDNode *N) { + SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); + + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op1 = N->getOperand(1); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op1.getValueType()); + + Op1 = GetSoftPromotedHalf(Op1); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), N->getOperand(0), + Op1); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0)); + + if (IsStrict) { + SDValue Res = + DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N), + {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op}); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { + SDValue Op = N->getOperand(0); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); + + Op = GetSoftPromotedHalf(Op); + + SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N, + unsigned OpNo) { + assert(OpNo == 0 && "Can only soften the comparison values"); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType()); + + Op0 = GetSoftPromotedHalf(Op0); + Op1 = GetSoftPromotedHalf(Op1); + + // Promote to the larger FP type. + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), Op0, Op1, + N->getOperand(2), N->getOperand(3), N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SETCC(SDNode *N) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType()); + + Op0 = GetSoftPromotedHalf(Op0); + Op1 = GetSoftPromotedHalf(Op1); + + // Promote to the larger FP type. + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + + return DAG.getSetCC(SDLoc(N), N->getValueType(0), Op0, Op1, CCCode); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Can only soften the stored value!"); + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + SDLoc dl(N); + + assert(!ST->isTruncatingStore() && "Unexpected truncating store."); + SDValue Promoted = GetSoftPromotedHalf(Val); + return DAG.getStore(ST->getChain(), dl, Promoted, ST->getBasePtr(), + ST->getMemOperand()); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 015b3d99fb0f..74071f763dbf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -91,6 +91,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + case ISD::VSCALE: Res = PromoteIntRes_VSCALE(N); break; case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; @@ -161,7 +162,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break; case ISD::SDIVFIX: - case ISD::UDIVFIX: Res = PromoteIntRes_DIVFIX(N); break; + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: Res = PromoteIntRes_DIVFIX(N); break; case ISD::ABS: Res = PromoteIntRes_ABS(N); break; @@ -198,6 +201,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VECREDUCE_UMIN: Res = PromoteIntRes_VECREDUCE(N); break; + + case ISD::FREEZE: + Res = PromoteIntRes_FREEZE(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -275,14 +282,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, // target's atomic operations. Op3 is merely stored and so can be left alone. SDValue Op2 = N->getOperand(2); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - if (TLI.getTargetMachine().getTargetTriple().isRISCV()) { - // The comparison argument must be sign-extended for RISC-V. This is - // abstracted using a new TargetLowering hook in the main LLVM development - // branch, but handled here directly in order to fix the codegen bug for - // 10.x without breaking the libLLVM.so ABI. + switch (TLI.getExtendForAtomicCmpSwapArg()) { + case ISD::SIGN_EXTEND: Op2 = SExtPromotedInteger(Op2); - } else { + break; + case ISD::ZERO_EXTEND: + Op2 = ZExtPromotedInteger(Op2); + break; + case ISD::ANY_EXTEND: Op2 = GetPromotedInteger(Op2); + break; + default: + llvm_unreachable("Invalid atomic op extension"); } SDVTList VTs = @@ -315,6 +326,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case TargetLowering::TypeSoftPromoteHalf: + // Promote the integer operand by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftPromotedHalf(InOp)); case TargetLowering::TypePromoteFloat: { // Convert the promoted float by hand. if (!NOutVT.isVector()) @@ -330,6 +344,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, BitConvertToInteger(GetScalarizedVector(InOp))); break; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error("Scalarization of scalable vectors is not supported."); case TargetLowering::TypeSplitVector: { if (!NOutVT.isVector()) { // For example, i32 = BITCAST v2i16 on alpha. Convert the split @@ -382,9 +398,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { OutVT.getVectorNumElements() * Scale); if (isTypeLegal(WideOutVT)) { InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp)); - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp, - DAG.getConstant(0, dl, IdxTy)); + DAG.getVectorIdxConstant(0, dl)); return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp); } } @@ -408,6 +423,12 @@ static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI, return ShiftVT; } +SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) { + SDValue V = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::FREEZE, SDLoc(N), + V.getValueType(), V); +} + SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); @@ -570,7 +591,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - return DAG.getNode(N->getOpcode(), dl, NVT); + SDValue Res = + DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; } SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { @@ -590,8 +617,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, DAG.getValueType(N->getOperand(0).getValueType())); if (N->getOpcode() == ISD::ZERO_EXTEND) - return DAG.getZeroExtendInReg(Res, dl, - N->getOperand(0).getValueType().getScalarType()); + return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType()); assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); return Res; } @@ -793,22 +819,51 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { N->getOperand(2)); } +static SDValue SaturateWidenedDIVFIX(SDValue V, SDLoc &dl, + unsigned SatW, bool Signed, + const TargetLowering &TLI, + SelectionDAG &DAG) { + EVT VT = V.getValueType(); + unsigned VTW = VT.getScalarSizeInBits(); + + if (!Signed) { + // Saturate to the unsigned maximum by getting the minimum of V and the + // maximum. + return DAG.getNode(ISD::UMIN, dl, VT, V, + DAG.getConstant(APInt::getLowBitsSet(VTW, SatW), + dl, VT)); + } + + // Saturate to the signed maximum (the low SatW - 1 bits) by taking the + // signed minimum of it and V. + V = DAG.getNode(ISD::SMIN, dl, VT, V, + DAG.getConstant(APInt::getLowBitsSet(VTW, SatW - 1), + dl, VT)); + // Saturate to the signed minimum (the high SatW + 1 bits) by taking the + // signed maximum of it and V. + V = DAG.getNode(ISD::SMAX, dl, VT, V, + DAG.getConstant(APInt::getHighBitsSet(VTW, VTW - SatW + 1), + dl, VT)); + return V; +} + static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS, - unsigned Scale, const TargetLowering &TLI, - SelectionDAG &DAG) { + unsigned Scale, const TargetLowering &TLI, + SelectionDAG &DAG, unsigned SatW = 0) { EVT VT = LHS.getValueType(); - bool Signed = N->getOpcode() == ISD::SDIVFIX; + unsigned VTSize = VT.getScalarSizeInBits(); + bool Signed = N->getOpcode() == ISD::SDIVFIX || + N->getOpcode() == ISD::SDIVFIXSAT; + bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT || + N->getOpcode() == ISD::UDIVFIXSAT; SDLoc dl(N); - // See if we can perform the division in this type without widening. - if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale, - DAG)) - return V; - - // If that didn't work, double the type width and try again. That must work, - // or something is wrong. - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getScalarSizeInBits() * 2); + // Widen the types by a factor of two. This is guaranteed to expand, since it + // will always have enough high bits in the LHS to shift into. + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2); + if (VT.isVector()) + WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT, + VT.getVectorElementCount()); if (Signed) { LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT); RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT); @@ -817,18 +872,28 @@ static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS, RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT); } - // TODO: Saturation. - SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale, DAG); assert(Res && "Expanding DIVFIX with wide type failed?"); + if (Saturating) { + // If the caller has told us to saturate at something less, use that width + // instead of the type before doubling. However, it cannot be more than + // what we just widened! + assert(SatW <= VTSize && + "Tried to saturate to more than the original type?"); + Res = SaturateWidenedDIVFIX(Res, dl, SatW == 0 ? VTSize : SatW, Signed, + TLI, DAG); + } return DAG.getZExtOrTrunc(Res, dl, VT); } SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) { SDLoc dl(N); SDValue Op1Promoted, Op2Promoted; - bool Signed = N->getOpcode() == ISD::SDIVFIX; + bool Signed = N->getOpcode() == ISD::SDIVFIX || + N->getOpcode() == ISD::SDIVFIXSAT; + bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT || + N->getOpcode() == ISD::UDIVFIXSAT; if (Signed) { Op1Promoted = SExtPromotedInteger(N->getOperand(0)); Op2Promoted = SExtPromotedInteger(N->getOperand(1)); @@ -839,23 +904,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) { EVT PromotedType = Op1Promoted.getValueType(); unsigned Scale = N->getConstantOperandVal(2); - SDValue Res; // If the type is already legal and the operation is legal in that type, we // should not early expand. if (TLI.isTypeLegal(PromotedType)) { TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale); - if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) - Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, - Op2Promoted, N->getOperand(2)); + if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) { + EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + unsigned Diff = PromotedType.getScalarSizeInBits() - + N->getValueType(0).getScalarSizeInBits(); + if (Saturating) + Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, + DAG.getConstant(Diff, dl, ShiftTy)); + SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, + Op2Promoted, N->getOperand(2)); + if (Saturating) + Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res, + DAG.getConstant(Diff, dl, ShiftTy)); + return Res; + } } - if (!Res) - Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG); - - // TODO: Saturation. - - return Res; + // See if we can perform the division in this type without expanding. + if (SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, Op1Promoted, + Op2Promoted, Scale, DAG)) { + if (Saturating) + Res = SaturateWidenedDIVFIX(Res, dl, + N->getValueType(0).getScalarSizeInBits(), + Signed, TLI, DAG); + return Res; + } + // If we cannot, expand it to twice the type width. If we are saturating, give + // it the original width as a saturating width so we don't need to emit + // two saturations. + return earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG, + N->getValueType(0).getScalarSizeInBits()); } SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { @@ -1060,8 +1143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc); // Extract the low NVT subvector. - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx); } } @@ -1088,7 +1170,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { // Calculate the overflow flag: zero extend the arithmetic result from // the original type. - SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType()); + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT); // Overflowed if and only if this is not equal to Res. Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); @@ -1193,6 +1275,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { N->getValueType(0))); } +SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) { + EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue(); + return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits())); +} + SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. @@ -1318,7 +1407,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: - case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break; + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break; case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; @@ -1632,7 +1723,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); NewOps[OpNo] = Mask; - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); + if (Res == N) + return SDValue(Res, 0); + + // Update triggered CSE, do our own replacement since caller can't. + ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0)); + ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1)); + return SDValue(); } SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, @@ -1653,7 +1751,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); + if (Res == N) + return SDValue(Res, 0); + + // Update triggered CSE, do our own replacement since caller can't. + ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0)); + ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1)); + return SDValue(); } SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, @@ -1694,8 +1799,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { SDLoc dl(N); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); - return DAG.getZeroExtendInReg(Op, dl, - N->getOperand(0).getValueType().getScalarType()); + return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); } SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { @@ -1804,6 +1908,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break; case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; @@ -1926,7 +2031,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break; case ISD::SDIVFIX: - case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break; + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: ExpandIntRes_DIVFIX(N, Lo, Hi); break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -2684,10 +2791,15 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo, unsigned NBitWidth = NVT.getSizeInBits(); EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT); + Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0)); + SDValue Chain = Lo.getValue(1); // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, @@ -2701,6 +2813,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) { + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); + Op = GetSoftPromotedHalf(Op); + Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); + } + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -2724,6 +2842,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) { + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); + Op = GetSoftPromotedHalf(Op); + Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); + } + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -2818,7 +2942,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); ISD::LoadExtType ExtType = N->getExtensionType(); - unsigned Alignment = N->getAlignment(); MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); @@ -2829,7 +2952,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT, - Alignment, MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -2851,8 +2974,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } else if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. - Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags, - AAInfo); + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), + N->getOriginalAlign(), MMOFlags, AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2863,7 +2986,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2881,7 +3004,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - Alignment, MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); @@ -2889,7 +3012,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -3244,8 +3367,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1), - N->getConstantOperandVal(2), TLI, DAG); + SDLoc dl(N); + // Try expanding in the existing type first. + SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, N->getOperand(0), + N->getOperand(1), + N->getConstantOperandVal(2), DAG); + + if (!Res) + Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1), + N->getConstantOperandVal(2), TLI, DAG); SplitInteger(Res, Lo, Hi); } @@ -4089,7 +4219,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); - unsigned Alignment = N->getAlignment(); MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); @@ -4100,15 +4229,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (N->getMemoryVT().bitsLE(NVT)) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - N->getMemoryVT(), Alignment, MMOFlags, AAInfo); + N->getMemoryVT(), N->getOriginalAlign(), MMOFlags, + AAInfo); } if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); - Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, - AAInfo); + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), + N->getOriginalAlign(), MMOFlags, AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -4117,9 +4247,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Hi = DAG.getTruncStore( - Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + NEVT, N->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -4147,8 +4277,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { } // Store both the high bits and maybe some of the low bits. - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment, - MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, + N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); @@ -4156,7 +4286,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -4204,18 +4334,43 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - unsigned OutNumElems = OutVT.getVectorNumElements(); EVT NOutVTElem = NOutVT.getVectorElementType(); SDLoc dl(N); SDValue BaseIdx = N->getOperand(1); + // TODO: We may be able to use this for types other than scalable + // vectors and fix those tests that expect BUILD_VECTOR to be used + if (OutVT.isScalableVector()) { + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + + // Promote operands and see if this is handled by target lowering, + // Otherwise, use the BUILD_VECTOR approach below + if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) { + // Collect the (promoted) operands + SDValue Ops[] = { GetPromotedInteger(InOp0), BaseIdx }; + + EVT PromEltVT = Ops[0].getValueType().getVectorElementType(); + assert(PromEltVT.bitsLE(NOutVTElem) && + "Promoted operand has an element type greater than result"); + + EVT ExtVT = NOutVT.changeVectorElementType(PromEltVT); + SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), ExtVT, Ops); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext); + } + } + + if (OutVT.isScalableVector()) + report_fatal_error("Unable to promote scalable types using BUILD_VECTOR"); + SDValue InOp0 = N->getOperand(0); if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger) InOp0 = GetPromotedInteger(N->getOperand(0)); EVT InVT = InOp0.getValueType(); + unsigned OutNumElems = OutVT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; Ops.reserve(OutNumElems); for (unsigned i = 0; i != OutNumElems; ++i) { @@ -4337,9 +4492,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { "Unexpected number of elements"); for (unsigned j = 0; j < NumElem; ++j) { - SDValue Ext = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op, - DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op, + DAG.getVectorIdxConstant(j, dl)); Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy); } } @@ -4447,9 +4601,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i=0; i<NumElem; ++i) { // Extract element from incoming vector - SDValue Ex = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming, + DAG.getVectorIdxConstant(i, dl)); SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); NewOps.push_back(Tr); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 63ddb59fce68..ae087d3bbd8c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -124,6 +124,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { Mapped |= 128; if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end()) Mapped |= 256; + if (ResId && SoftPromotedHalfs.find(ResId) != SoftPromotedHalfs.end()) + Mapped |= 512; if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes @@ -168,12 +170,15 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << " WidenedVectors"; if (Mapped & 256) dbgs() << " PromotedFloats"; + if (Mapped & 512) + dbgs() << " SoftPromoteHalfs"; dbgs() << "\n"; llvm_unreachable(nullptr); } } } +#ifndef NDEBUG // Checked that NewNodes are only used by other NewNodes. for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { SDNode *N = NewNodes[i]; @@ -181,6 +186,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { UI != UE; ++UI) assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); } +#endif } /// This is the main entry point for the type legalizer. This does a top-down @@ -239,6 +245,9 @@ bool DAGTypeLegalizer::run() { case TargetLowering::TypeLegal: LLVM_DEBUG(dbgs() << "Legal result type\n"); break; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error( + "Scalarization of scalable vectors is not supported."); // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results // with a legal type). Results can be remapped using ReplaceValueWith, @@ -276,6 +285,10 @@ bool DAGTypeLegalizer::run() { PromoteFloatResult(N, i); Changed = true; goto NodeDone; + case TargetLowering::TypeSoftPromoteHalf: + SoftPromoteHalfResult(N, i); + Changed = true; + goto NodeDone; } } @@ -297,6 +310,9 @@ ScanOperands: case TargetLowering::TypeLegal: LLVM_DEBUG(dbgs() << "Legal operand\n"); continue; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error( + "Scalarization of scalable vectors is not supported."); // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's // operands in place, and return "true". @@ -332,6 +348,10 @@ ScanOperands: NeedsReanalyzing = PromoteFloatOperand(N, i); Changed = true; break; + case TargetLowering::TypeSoftPromoteHalf: + NeedsReanalyzing = SoftPromoteHalfOperand(N, i); + Changed = true; + break; } break; } @@ -719,6 +739,16 @@ void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { OpIdEntry = getTableId(Result); } +void DAGTypeLegalizer::SetSoftPromotedHalf(SDValue Op, SDValue Result) { + assert(Result.getValueType() == MVT::i16 && + "Invalid type for soft-promoted half"); + AnalyzeNewValue(Result); + + auto &OpIdEntry = SoftPromotedHalfs[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already promoted!"); + OpIdEntry = getTableId(Result); +} + void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { // Note that in some cases vector operation operands may be greater than // the vector element type. For example BUILD_VECTOR of type <1 x i1> with @@ -805,9 +835,9 @@ void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi) { assert(Lo.getValueType().getVectorElementType() == - Op.getValueType().getVectorElementType() && - 2*Lo.getValueType().getVectorNumElements() == - Op.getValueType().getVectorNumElements() && + Op.getValueType().getVectorElementType() && + Lo.getValueType().getVectorElementCount() * 2 == + Op.getValueType().getVectorElementCount() && Hi.getValueType() == Lo.getValueType() && "Invalid type for split vector"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. @@ -859,12 +889,19 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, SDLoc dl(Op); // Create the stack frame object. Make sure it is aligned for both // the source and destination types. - SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); + + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align DestAlign = DAG.getReducedAlign(DestVT, /*UseABI=*/false); + Align OpAlign = DAG.getReducedAlign(Op.getValueType(), /*UseABI=*/false); + Align Align = std::max(DestAlign, OpAlign); + SDValue StackPtr = + DAG.CreateStackTemporary(Op.getValueType().getStoreSize(), Align); // Emit a store to the stack slot. - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo()); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, + MachinePointerInfo(), Align); // Result is a load from the stack slot. - return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo()); + return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), Align); } /// Replace the node's results with custom code provided by the target and @@ -890,17 +927,6 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // The target didn't want to custom lower it after all. return false; - // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to - // provide the same kind of custom splitting behavior. - if (Results.size() == N->getNumValues() + 1 && LegalizeResult) { - // We've legalized a return type by splitting it. If there is a chain, - // replace that too. - SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]); - if (N->getNumValues() > 1) - ReplaceValueWith(SDValue(N, 1), Results[2]); - return true; - } - // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index faae14444d51..0fa6d653a836 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -109,6 +109,10 @@ private: /// supported precision, this map indicates what promoted value to use. SmallDenseMap<TableId, TableId, 8> PromotedFloats; + /// For floating-point nodes that have a smaller precision than the smallest + /// supported precision, this map indicates the converted value to use. + SmallDenseMap<TableId, TableId, 8> SoftPromotedHalfs; + /// For float nodes that need to be expanded this map indicates which operands /// are the expanded version of the input. SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats; @@ -155,7 +159,9 @@ private: const SDValue &getSDValue(TableId &Id) { RemapId(Id); assert(Id && "TableId should be non-zero"); - return IdToValueMap[Id]; + auto I = IdToValueMap.find(Id); + assert(I != IdToValueMap.end() && "cannot find Id in map"); + return I->second; } public: @@ -172,24 +178,30 @@ public: bool run(); void NoteDeletion(SDNode *Old, SDNode *New) { + assert(Old != New && "node replaced with self"); for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { TableId NewId = getTableId(SDValue(New, i)); TableId OldId = getTableId(SDValue(Old, i)); - if (OldId != NewId) + if (OldId != NewId) { ReplacedValues[OldId] = NewId; - // Delete Node from tables. + // Delete Node from tables. We cannot do this when OldId == NewId, + // because NewId can still have table references to it in + // ReplacedValues. + IdToValueMap.erase(OldId); + PromotedIntegers.erase(OldId); + ExpandedIntegers.erase(OldId); + SoftenedFloats.erase(OldId); + PromotedFloats.erase(OldId); + SoftPromotedHalfs.erase(OldId); + ExpandedFloats.erase(OldId); + ScalarizedVectors.erase(OldId); + SplitVectors.erase(OldId); + WidenedVectors.erase(OldId); + } + ValueToIdMap.erase(SDValue(Old, i)); - IdToValueMap.erase(OldId); - PromotedIntegers.erase(OldId); - ExpandedIntegers.erase(OldId); - SoftenedFloats.erase(OldId); - PromotedFloats.erase(OldId); - ExpandedFloats.erase(OldId); - ScalarizedVectors.erase(OldId); - SplitVectors.erase(OldId); - WidenedVectors.erase(OldId); } } @@ -260,7 +272,7 @@ private: EVT OldVT = Op.getValueType(); SDLoc dl(Op); Op = GetPromotedInteger(Op); - return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); + return DAG.getZeroExtendInReg(Op, dl, OldVT); } // Get a promoted operand and sign or zero extend it to the final size @@ -274,7 +286,7 @@ private: if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType())) return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op, DAG.getValueType(OldVT)); - return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType()); + return DAG.getZeroExtendInReg(Op, DL, OldVT); } // Integer Result Promotion. @@ -304,6 +316,7 @@ private: SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); + SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); @@ -326,6 +339,7 @@ private: SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_VSCALE(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); @@ -512,9 +526,11 @@ private: SDValue SoftenFloatRes_FP_ROUND(SDNode *N); SDValue SoftenFloatRes_FPOW(SDNode *N); SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_FREEZE(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); SDValue SoftenFloatRes_FROUND(SDNode *N); + SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); @@ -584,9 +600,11 @@ private: void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -651,6 +669,43 @@ private: SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// + // Half soft promotion support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + SDValue GetSoftPromotedHalf(SDValue Op) { + TableId &PromotedId = SoftPromotedHalfs[getTableId(Op)]; + SDValue PromotedOp = getSDValue(PromotedId); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetSoftPromotedHalf(SDValue Op, SDValue Result); + + void SoftPromoteHalfResult(SDNode *N, unsigned ResNo); + SDValue SoftPromoteHalfRes_BinOp(SDNode *N); + SDValue SoftPromoteHalfRes_BITCAST(SDNode *N); + SDValue SoftPromoteHalfRes_ConstantFP(SDNode *N); + SDValue SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N); + SDValue SoftPromoteHalfRes_FMAD(SDNode *N); + SDValue SoftPromoteHalfRes_FPOWI(SDNode *N); + SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N); + SDValue SoftPromoteHalfRes_LOAD(SDNode *N); + SDValue SoftPromoteHalfRes_SELECT(SDNode *N); + SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N); + SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N); + SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N); + SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); + + bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_BITCAST(SDNode *N); + SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N); + SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N); + SDValue SoftPromoteHalfOp_SETCC(SDNode *N); + SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// @@ -721,6 +776,11 @@ private: void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + // Helper function for incrementing the pointer when splitting + // memory operations + void IncrementPointer(MemSDNode *N, EVT MemVT, + MachinePointerInfo &MPI, SDValue &Ptr); + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned ResNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -918,6 +978,7 @@ private: void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVSETCC(const SDNode *N); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c45c62cabc05..9cd3b8f76d6c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -50,6 +50,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteInteger: break; case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeSoftPromoteHalf: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); case TargetLowering::TypeSoftenFloat: @@ -82,6 +83,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error("Scalarization of scalable vectors is not supported."); case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); @@ -119,9 +122,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SmallVector<SDValue, 8> Vals; for (unsigned i = 0; i < NumElems; ++i) - Vals.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, + CastInOp, DAG.getVectorIdxConstant(i, dl))); // Build Lo, Hi pair by pairing extracted elements if needed. unsigned Slot = 0; @@ -154,9 +156,13 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. - unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment( - NOutVT.getTypeForEVT(*DAG.getContext())); - SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align InAlign = DAG.getReducedAlign(InVT, /*UseABI=*/false); + Align NOutAlign = DAG.getReducedAlign(NOutVT, /*UseABI=*/false); + Align Align = std::max(InAlign, NOutAlign); + SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Align); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); @@ -165,7 +171,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, NOutAlign); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -173,8 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize)); + PtrInfo.getWithOffset(IncrementSize), NOutAlign); // Handle endianness of the load. if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) @@ -251,21 +256,20 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - unsigned Alignment = LD->getAlignment(); AAMDNodes AAInfo = LD->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); - Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment, - LD->getMemOperand()->getFlags(), AAInfo); + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), + AAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); - Hi = DAG.getLoad(NVT, dl, Chain, Ptr, - LD->getPointerInfo().getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize), - LD->getMemOperand()->getFlags(), AAInfo); + Hi = DAG.getLoad( + NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -462,7 +466,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); - unsigned Alignment = St->getAlignment(); AAMDNodes AAInfo = St->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -474,14 +477,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment, - St->getMemOperand()->getFlags(), AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), + St->getOriginalAlign(), St->getMemOperand()->getFlags(), + AAInfo); Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Hi = DAG.getStore(Chain, dl, Hi, Ptr, - St->getPointerInfo().getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize), - St->getMemOperand()->getFlags(), AAInfo); + Hi = DAG.getStore( + Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), + St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -558,3 +561,12 @@ void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } + +void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue L, H; + SDLoc dl(N); + GetSplitOp(N->getOperand(0), L, H); + + Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L); + Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 7d0b1ee6ae07..6409f924920d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -142,9 +142,10 @@ class VectorLegalizer { void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); - SDValue ExpandFixedPointDiv(SDNode *Node); + void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue ExpandStrictFPOp(SDNode *Node); void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -182,9 +183,7 @@ bool VectorLegalizer::Run() { E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { // Check if the values of the nodes contain vectors. We don't need to check // the operands because we are going to check their values at some point. - for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); - J != E; ++J) - HasVectors |= J->isVector(); + HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); }); // If we found a vector node we can start the legalization. if (HasVectors) @@ -318,12 +317,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { } } - bool HasVectorValueOrOp = false; - for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J) - HasVectorValueOrOp |= J->isVector(); - for (const SDValue &Oper : Node->op_values()) - HasVectorValueOrOp |= Oper.getValueType().isVector(); - + bool HasVectorValueOrOp = + llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) || + llvm::any_of(Node->op_values(), + [](SDValue O) { return O.getValueType().isVector(); }); if (!HasVectorValueOrOp) return TranslateLegalizeResults(Op, Node); @@ -339,7 +336,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Expand; break; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" ValVT = Node->getValueType(0); @@ -431,6 +428,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: @@ -463,7 +461,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: - case ISD::UDIVFIX: { + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -704,132 +704,7 @@ void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); - - EVT SrcVT = LD->getMemoryVT(); - EVT SrcEltVT = SrcVT.getScalarType(); - unsigned NumElem = SrcVT.getVectorNumElements(); - - SDValue NewChain; - SDValue Value; - if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { - SDLoc dl(N); - - SmallVector<SDValue, 8> Vals; - SmallVector<SDValue, 8> LoadChains; - - EVT DstEltVT = LD->getValueType(0).getScalarType(); - SDValue Chain = LD->getChain(); - SDValue BasePTR = LD->getBasePtr(); - ISD::LoadExtType ExtType = LD->getExtensionType(); - - // When elements in a vector is not byte-addressable, we cannot directly - // load each element by advancing pointer, which could only address bytes. - // Instead, we load all significant words, mask bits off, and concatenate - // them to form each element. Finally, they are extended to destination - // scalar type to build the destination vector. - EVT WideVT = TLI.getPointerTy(DAG.getDataLayout()); - - assert(WideVT.isRound() && - "Could not handle the sophisticated case when the widest integer is" - " not power of 2."); - assert(WideVT.bitsGE(SrcEltVT) && - "Type is not legalized?"); - - unsigned WideBytes = WideVT.getStoreSize(); - unsigned Offset = 0; - unsigned RemainingBytes = SrcVT.getStoreSize(); - SmallVector<SDValue, 8> LoadVals; - while (RemainingBytes > 0) { - SDValue ScalarLoad; - unsigned LoadBytes = WideBytes; - - if (RemainingBytes >= LoadBytes) { - ScalarLoad = - DAG.getLoad(WideVT, dl, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), - MinAlign(LD->getAlignment(), Offset), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); - } else { - EVT LoadVT = WideVT; - while (RemainingBytes < LoadBytes) { - LoadBytes >>= 1; // Reduce the load size by half. - LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); - } - ScalarLoad = - DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), LoadVT, - MinAlign(LD->getAlignment(), Offset), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); - } - - RemainingBytes -= LoadBytes; - Offset += LoadBytes; - - BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes); - - LoadVals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); - } - - unsigned BitOffset = 0; - unsigned WideIdx = 0; - unsigned WideBits = WideVT.getSizeInBits(); - - // Extract bits, pack and extend/trunc them into destination type. - unsigned SrcEltBits = SrcEltVT.getSizeInBits(); - SDValue SrcEltBitMask = DAG.getConstant( - APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT); - - for (unsigned Idx = 0; Idx != NumElem; ++Idx) { - assert(BitOffset < WideBits && "Unexpected offset!"); - - SDValue ShAmt = DAG.getConstant( - BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); - - BitOffset += SrcEltBits; - if (BitOffset >= WideBits) { - WideIdx++; - BitOffset -= WideBits; - if (BitOffset > 0) { - ShAmt = DAG.getConstant( - SrcEltBits - BitOffset, dl, - TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - SDValue Hi = - DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); - Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); - } - } - - Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); - - switch (ExtType) { - default: llvm_unreachable("Unknown extended-load op!"); - case ISD::EXTLOAD: - Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); - break; - case ISD::ZEXTLOAD: - Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); - break; - case ISD::SEXTLOAD: - ShAmt = - DAG.getConstant(WideBits - SrcEltBits, dl, - TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); - Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); - Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); - break; - } - Vals.push_back(Lo); - } - - NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - Value = DAG.getBuildVector(N->getValueType(0), dl, Vals); - } else { - std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); - } - - return std::make_pair(Value, NewChain); + return TLI.scalarizeVectorLoad(LD, DAG); } SDValue VectorLegalizer::ExpandStore(SDNode *N) { @@ -968,9 +843,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { break; case ISD::SDIVFIX: case ISD::UDIVFIX: - Results.push_back(ExpandFixedPointDiv(Node)); + ExpandFixedPointDiv(Node, Results); return; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::SDIVFIXSAT: + case ISD::UDIVFIXSAT: + break; +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" ExpandStrictFPOp(Node, Results); @@ -990,6 +868,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::VECREDUCE_FMIN: Results.push_back(TLI.expandVecReduce(Node, DAG)); return; + case ISD::SREM: + case ISD::UREM: + ExpandREM(Node, Results); + return; } Results.push_back(DAG.UnrollVectorOp(Node)); @@ -1087,9 +969,8 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NumSrcElements); - Src = DAG.getNode( - ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), + Src, DAG.getVectorIdxConstant(0, DL)); } // Build a base mask of undef shuffles. @@ -1147,9 +1028,8 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NumSrcElements); - Src = DAG.getNode( - ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), + Src, DAG.getVectorIdxConstant(0, DL)); } // Build up a zero vector to blend into this one. @@ -1456,12 +1336,12 @@ void VectorLegalizer::ExpandMULO(SDNode *Node, Results.push_back(Overflow); } -SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) { +void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { SDNode *N = Node; if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N), N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG)) - return Expanded; - return DAG.UnrollVectorOp(N); + Results.push_back(Expanded); } void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, @@ -1478,6 +1358,17 @@ void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, UnrollStrictFPOp(Node, Results); } +void VectorLegalizer::ExpandREM(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) && + "Expected REM node"); + + SDValue Result; + if (!TLI.expandREM(Node, Result, DAG)) + Result = DAG.UnrollVectorOp(Node); + Results.push_back(Result); +} + void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results) { EVT VT = Node->getValueType(0); @@ -1500,8 +1391,7 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, SmallVector<SDValue, 32> OpChains; for (unsigned i = 0; i < NumElems; ++i) { SmallVector<SDValue, 4> Opers; - SDValue Idx = DAG.getConstant(i, dl, - TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue Idx = DAG.getVectorIdxConstant(i, dl); // The Chain is the first operand. Opers.push_back(Chain); @@ -1551,12 +1441,10 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { SDLoc dl(Node); SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { - SDValue LHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue RHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getVectorIdxConstant(i, dl)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getVectorIdxConstant(i, dl)); Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TmpEltVT), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d809139d3807..414ba25ffd5f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -20,10 +20,11 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TypeSize.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "legalize-types" @@ -88,11 +89,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FREEZE: case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -147,7 +150,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_TernaryOp(N); break; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" R = ScalarizeVecRes_StrictFPOp(N); @@ -166,7 +169,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: + case ISD::SDIVFIXSAT: case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: R = ScalarizeVecRes_FIX(N); break; } @@ -187,8 +192,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - Op0.getValueType(), Op0, Op1, Op2); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, + Op2, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { @@ -196,7 +201,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = N->getOperand(2); return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, - Op2); + Op2, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { @@ -221,7 +226,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { Opers[i] = Oper; } - SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers); + SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs), + Opers, N->getFlags()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -251,6 +257,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N, ResVT.getVectorElementType(), OvVT.getVectorElementType()); SDNode *ScalarNode = DAG.getNode( N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode(); + ScalarNode->setFlags(N->getFlags()); // Replace the other vector result not being explicitly scalarized here. unsigned OtherNo = 1 - ResNo; @@ -331,8 +338,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), - N->getOriginalAlignment(), N->getMemOperand()->getFlags(), - N->getAAInfo()); + N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -357,11 +363,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { Op = GetScalarizedVector(Op); } else { EVT VT = OpVT.getVectorElementType(); - Op = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getVectorIdxConstant(0, DL)); } - return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { @@ -383,9 +388,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) { if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { Op = GetScalarizedVector(Op); } else { - Op = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op, + DAG.getVectorIdxConstant(0, DL)); } switch (N->getOpcode()) { @@ -421,9 +425,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { Cond = GetScalarizedVector(Cond); } else { EVT VT = OpVT.getVectorElementType(); - Cond = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Cond = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, + DAG.getVectorIdxConstant(0, DL)); } SDValue LHS = GetScalarizedVector(N->getOperand(1)); @@ -523,12 +526,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { RHS = GetScalarizedVector(RHS); } else { EVT VT = OpVT.getVectorElementType(); - LHS = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - RHS = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getVectorIdxConstant(0, DL)); + RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getVectorIdxConstant(0, DL)); } // Turn it into a scalar SETCC. @@ -749,12 +750,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ return DAG.getTruncStore( N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), - N->getMemoryVT().getVectorElementType(), N->getAlignment(), + N->getMemoryVT().getVectorElementType(), N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), - N->getOriginalAlignment(), N->getMemOperand()->getFlags(), + N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); } @@ -881,12 +882,14 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FREEZE: case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -942,7 +945,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_TernaryOp(N, Lo, Hi); break; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" SplitVecRes_StrictFPOp(N, Lo, Hi); @@ -961,7 +964,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: + case ISD::SDIVFIXSAT: case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: SplitVecRes_FIX(N, Lo, Hi); break; } @@ -971,6 +976,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, + MachinePointerInfo &MPI, + SDValue &Ptr) { + SDLoc DL(N); + unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8; + + if (MemVT.isScalableVector()) { + SDValue BytesIncrement = DAG.getVScale( + DL, Ptr.getValueType(), + APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize)); + MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement); + } else { + MPI = N->getPointerInfo().getWithOffset(IncrementSize); + // Increment the pointer to the other half. + Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); + } +} + void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; @@ -995,10 +1019,10 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); SDLoc dl(N); - Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), - Op0Lo, Op1Lo, Op2Lo); - Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), - Op0Hi, Op1Hi, Op2Hi); + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, + Op2Lo, N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, + Op2Hi, N->getFlags()); } void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { @@ -1010,8 +1034,10 @@ void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Op2 = N->getOperand(2); unsigned Opcode = N->getOpcode(); - Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2); - Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2, + N->getFlags()); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2, + N->getFlags()); } void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, @@ -1030,6 +1056,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeSoftPromoteHalf: case TargetLowering::TypeSoftenFloat: case TargetLowering::TypeScalarizeVector: case TargetLowering::TypeWidenVector: @@ -1055,6 +1082,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); return; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error("Scalarization of scalable vectors is not supported."); } // In the general case, convert the input to an integer and split it by hand. @@ -1116,9 +1145,9 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + Hi = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, + DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorNumElements(), dl)); } void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, @@ -1137,40 +1166,45 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // boundary between the halves, we can avoid spilling the vector, and insert // into the lower half of the split vector directly. // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever - // the index is constant and there is no boundary crossing. But those cases - // don't seem to get hit in practice. - if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) { - unsigned IdxVal = ConstIdx->getZExtValue(); - if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) { - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); - return; - } + // there is no boundary crossing. But those cases don't seem to get hit in + // practice. + unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) { + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); + return; } // Spill the vector to the stack. - SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + SmallestAlign); // Store the new subvector into the specified index. SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo()); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, + MachinePointerInfo::getUnknownStack(MF)); // Load the Lo part from the stack slot. - Lo = - DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, PtrInfo, + SmallestAlign); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueSizeInBits() / 8; StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - MinAlign(Alignment, IncrementSize)); + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), SmallestAlign); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -1291,8 +1325,10 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, EVT LoValueVTs[] = {LoVT, MVT::Other}; EVT HiValueVTs[] = {HiVT, MVT::Other}; - Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo); - Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); + Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo, + N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi, + N->getFlags()); // Build a factor node to remember that this Op is independent of the // other one. @@ -1332,10 +1368,8 @@ SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) { EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { EVT OperandEltVT = OperandVT.getVectorElementType(); - Operands[j] = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - DAG.getConstant(i, dl, TLI.getVectorIdxTy( - DAG.getDataLayout()))); + Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, + Operand, DAG.getVectorIdxConstant(i, dl)); } else { Operands[j] = Operand; } @@ -1384,6 +1418,8 @@ void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT); SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode(); SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode(); + LoNode->setFlags(N->getFlags()); + HiNode->setFlags(N->getFlags()); Lo = SDValue(LoNode, ResNo); Hi = SDValue(HiNode, ResNo); @@ -1417,10 +1453,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Lo.getValueType(), Lo, Elt, Idx); else - Hi = - DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getConstant(IdxVal - LoNumElts, dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl)); return; } @@ -1442,36 +1476,38 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, } // Spill the vector to the stack. - SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); auto &MF = DAG.getMachineFunction(); auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + SmallestAlign); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, - MachinePointerInfo::getUnknownStack(MF), EltVT); + Store = DAG.getTruncStore( + Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT, + commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8)); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo); + Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign); // Increment the pointer to the other part. unsigned IncrementSize = LoVT.getSizeInBits() / 8; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, dl, - StackPtr.getValueType())); + StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); // Load the Hi part from the stack slot. Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize)); + PtrInfo.getWithOffset(IncrementSize), SmallestAlign); // If we adjusted the original type, we need to truncate the results. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); @@ -1502,21 +1538,29 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue Ptr = LD->getBasePtr(); SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT MemoryVT = LD->getMemoryVT(); - unsigned Alignment = LD->getOriginalAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) { + SDValue Value, NewChain; + std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); + std::tie(Lo, Hi) = DAG.SplitVector(Value, dl); + ReplaceValueWith(SDValue(LD, 1), NewChain); + return; + } + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); + LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(), + MMOFlags, AAInfo); + + MachinePointerInfo MPI; + IncrementPointer(LD, LoMemVT, MPI, Ptr); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, - Alignment, MMOFlags, AAInfo); + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI, + HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1541,7 +1585,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, assert(Offset.isUndef() && "Unexpected indexed masked load offset"); SDValue Mask = MLD->getMask(); SDValue PassThru = MLD->getPassThru(); - unsigned Alignment = MLD->getOriginalAlignment(); + Align Alignment = MLD->getOriginalAlign(); ISD::LoadExtType ExtType = MLD->getExtensionType(); // Split Mask operand @@ -1557,7 +1601,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty); SDValue PassThruLo, PassThruHi; if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) @@ -1565,27 +1611,33 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, else std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, MMO, MLD->getAddressingMode(), ExtType, MLD->isExpandingLoad()); - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, - MLD->isExpandingLoad()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad, - HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), - MLD->getRanges()); - - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, - MMO, MLD->getAddressingMode(), ExtType, - MLD->isExpandingLoad()); + if (HiIsEmpty) { + // The hi masked load has zero storage size. We therefore simply set it to + // the low masked load and rely on subsequent removal from the chain. + Hi = Lo; + } else { + // Generate hi masked load. + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, + MLD->isExpandingLoad()); + unsigned HiOffset = LoMemVT.getStoreSize(); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo().getWithOffset(HiOffset), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, + MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, + HiMemVT, MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); + } // Build a factor node to remember that this load is independent of the // other one. @@ -1610,7 +1662,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue PassThru = MGT->getPassThru(); SDValue Index = MGT->getIndex(); SDValue Scale = MGT->getScale(); - unsigned Alignment = MGT->getOriginalAlignment(); + Align Alignment = MGT->getOriginalAlign(); // Split Mask operand SDValue MaskLo, MaskHi; @@ -1623,11 +1675,6 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); } - EVT MemoryVT = MGT->getMemoryVT(); - EVT LoMemVT, HiMemVT; - // Split MemoryVT - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue PassThruLo, PassThruHi; if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(PassThru, PassThruLo, PassThruHi); @@ -1640,10 +1687,10 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), MGT->getRanges()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MGT->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(), + MGT->getRanges()); SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, @@ -1708,11 +1755,13 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo); if (N->getOpcode() == ISD::FP_ROUND) { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1), + N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1), + N->getFlags()); } else { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags()); } } @@ -1737,8 +1786,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, // more effectively move in the right direction and prevent falling down // to scalarization in many cases due to the input vector being split too // far. - unsigned NumElements = SrcVT.getVectorNumElements(); - if ((NumElements & 1) == 0 && + if ((SrcVT.getVectorMinNumElements() & 1) == 0 && SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { LLVMContext &Ctx = *DAG.getContext(); EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx); @@ -1851,9 +1899,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, Idx -= Input * NewElts; // Extract the vector element by hand. - SVOps.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input], - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], + DAG.getVectorIdxConstant(Idx, dl))); } // Construct the Lo/Hi output using a BUILD_VECTOR. @@ -1882,11 +1930,11 @@ void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue SV = N->getOperand(2); SDLoc dl(N); - const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment( - NVT.getTypeForEVT(*DAG.getContext())); + const Align Alignment = + DAG.getDataLayout().getABITypeAlign(NVT.getTypeForEVT(*DAG.getContext())); - Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment); - Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment); + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment.value()); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment.value()); Chain = Hi.getValue(1); // Modified the chain - switch anything that used the old chain to use @@ -2160,8 +2208,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); } else { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, - DAG.getConstant(IdxVal - LoElts, dl, - Idx.getValueType())); + DAG.getVectorIdxConstant(IdxVal - LoElts, dl)); } } @@ -2200,11 +2247,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { } // Store the vector to the stack. - SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); auto &MF = DAG.getMachineFunction(); auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + SmallestAlign); // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); @@ -2219,7 +2271,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT, + commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8)); } SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { @@ -2244,7 +2297,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Scale = MGT->getScale(); SDValue Mask = MGT->getMask(); SDValue PassThru = MGT->getPassThru(); - unsigned Alignment = MGT->getOriginalAlignment(); + Align Alignment = MGT->getOriginalAlign(); SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) @@ -2269,21 +2322,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), MGT->getRanges()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MGT->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(), + MGT->getRanges()); SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO, MGT->getIndexType()); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), - MGT->getRanges()); - SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO, MGT->getIndexType()); @@ -2312,13 +2359,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, assert(Offset.isUndef() && "Unexpected indexed masked store offset"); SDValue Mask = N->getMask(); SDValue Data = N->getValue(); - EVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getOriginalAlignment(); + Align Alignment = N->getOriginalAlign(); SDLoc DL(N); - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue DataLo, DataHi; if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) // Split Data operand @@ -2337,32 +2380,45 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); } - SDValue Lo, Hi; - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); + EVT MemoryVT = N->getMemoryVT(); + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); + + SDValue Lo, Hi, Res; + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - N->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); + if (HiIsEmpty) { + // The hi masked store has zero storage size. + // Only the lo masked store is needed. + Res = Lo; + } else { - MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, - HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), - N->getRanges()); + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + N->isCompressingStore()); + unsigned HiOffset = LoMemVT.getStoreSize(); - Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, - N->getAddressingMode(), N->isTruncatingStore(), - N->isCompressingStore()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, + HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - // Build a factor node to remember that this store is independent of the - // other one. - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); + + // Build a factor node to remember that this store is independent of the + // other one. + Res = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + } + + return Res; } SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, @@ -2373,13 +2429,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue Index = N->getIndex(); SDValue Scale = N->getScale(); SDValue Data = N->getValue(); - EVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getOriginalAlignment(); + Align Alignment = N->getOriginalAlign(); SDLoc DL(N); // Split all operands - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) @@ -2406,20 +2459,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); SDValue Lo; - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo(), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO, N->getIndexType()); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); - // The order of the Scatter operation after split is well defined. The "Hi" // part comes after the "Lo". So these two operations should be chained one // after another. @@ -2437,7 +2484,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); EVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getOriginalAlignment(); + Align Alignment = N->getOriginalAlign(); MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDValue Lo, Hi; @@ -2450,8 +2497,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) return TLI.scalarizeVectorStore(N, DAG); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); @@ -2459,17 +2504,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, AAInfo); - // Increment the pointer to the other half. - Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); + MachinePointerInfo MPI; + IncrementPointer(N, LoMemVT, MPI, Ptr); if (isTruncating) - Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, - N->getPointerInfo().getWithOffset(IncrementSize), + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI, HiMemVT, Alignment, MMOFlags, AAInfo); else - Hi = DAG.getStore(Ch, DL, Hi, Ptr, - N->getPointerInfo().getWithOffset(IncrementSize), - Alignment, MMOFlags, AAInfo); + Hi = DAG.getStore(Ch, DL, Hi, Ptr, MPI, Alignment, MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } @@ -2487,9 +2529,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { for (const SDValue &Op : N->op_values()) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { - Elts.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op, + DAG.getVectorIdxConstant(i, DL))); } } @@ -2565,9 +2606,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { SDValue Chain; if (N->isStrictFPOpcode()) { HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, - {N->getOperand(0), HalfLo}); + {N->getOperand(0), InLoVec}); HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, - {N->getOperand(0), HalfHi}); + {N->getOperand(0), InHiVec}); // Legalize the chain result - switch anything that used the old chain to // use the new one. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1), @@ -2611,9 +2652,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo0, Hi0); GetSplitVector(N->getOperand(1), Lo1, Hi1); - unsigned PartElements = Lo0.getValueType().getVectorNumElements(); - EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements); - EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements); + auto PartEltCnt = Lo0.getValueType().getVectorElementCount(); + + LLVMContext &Context = *DAG.getContext(); + EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt); + EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2); LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); @@ -2753,7 +2796,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryWithExtraScalarOp(N); break; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" Res = WidenVecRes_StrictFP(N); @@ -2813,6 +2856,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: { @@ -2842,6 +2886,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::FNEG: + case ISD::FREEZE: case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; @@ -2924,9 +2969,8 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, SDValue VecOp = DAG.getUNDEF(NextVT); unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { - VecOp = DAG.getNode( - ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx], - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getVectorIdxConstant(i, dl)); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; @@ -3008,12 +3052,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { // } while (CurNumElts != 0) { while (CurNumElts >= NumElts) { - SDValue EOp1 = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue EOp2 = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getVectorIdxConstant(Idx, dl)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getVectorIdxConstant(Idx, dl)); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); Idx += NumElts; CurNumElts -= NumElts; @@ -3025,12 +3067,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { - SDValue EOp1 = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue EOp2 = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp1, DAG.getVectorIdxConstant(Idx, dl)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp2, DAG.getVectorIdxConstant(Idx, dl)); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2, Flags); } @@ -3108,14 +3148,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SmallVector<SDValue, 4> EOps; - + for (unsigned i = 0; i < NumOpers; ++i) { SDValue Op = InOps[i]; - - if (Op.getValueType().isVector()) - Op = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, Op, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + if (Op.getValueType().isVector()) + Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op, + DAG.getVectorIdxConstant(Idx, dl)); EOps.push_back(Op); } @@ -3140,10 +3179,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { SDValue Op = InOps[i]; if (Op.getValueType().isVector()) - Op = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op, - DAG.getConstant(Idx, dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op, + DAG.getVectorIdxConstant(Idx, dl)); EOps.push_back(Op); } @@ -3190,8 +3227,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { *DAG.getContext(), ResVT.getVectorElementType(), WideOvVT.getVectorNumElements()); - SDValue Zero = DAG.getConstant( - 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue Zero = DAG.getVectorIdxConstant(0, DL); WideLHS = DAG.getNode( ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT), N->getOperand(0), Zero); @@ -3210,8 +3246,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) { SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo)); } else { - SDValue Zero = DAG.getConstant( - 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue Zero = DAG.getVectorIdxConstant(0, DL); SDValue OtherVal = DAG.getNode( ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero); ReplaceValueWith(SDValue(N, OtherNo), OtherVal); @@ -3274,9 +3309,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } if (InVTNumElts % WidenNumElts == 0) { - SDValue InVal = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, + DAG.getVectorIdxConstant(0, DL)); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); @@ -3291,9 +3325,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // necessary. unsigned MinElts = N->getValueType(0).getVectorNumElements(); for (unsigned i=0; i < MinElts; ++i) { - SDValue Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getVectorIdxConstant(i, DL)); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else @@ -3310,7 +3343,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); - SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other }; EVT InVT = InOp.getValueType(); EVT InEltVT = InVT.getVectorElementType(); @@ -3321,16 +3353,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. EVT EltVT = WidenVT.getVectorElementType(); - SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other }; + std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}}; SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); SmallVector<SDValue, 32> OpChains; // Use the original element count so we don't do more scalar opts than // necessary. unsigned MinElts = N->getValueType(0).getVectorNumElements(); for (unsigned i=0; i < MinElts; ++i) { - NewOps[1] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getVectorIdxConstant(i, DL)); Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps); OpChains.push_back(Ops[i].getValue(1)); } @@ -3370,7 +3401,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { SmallVector<SDValue, 16> Ops; for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + DAG.getVectorIdxConstant(i, DL)); switch (Opcode) { case ISD::ANY_EXTEND_VECTOR_INREG: Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val); @@ -3463,6 +3494,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: break; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error("Scalarization of scalable vectors is not supported."); case TargetLowering::TypePromoteInteger: { // If the incoming type is a vector that is being promoted, then // we know that the elements are arranged differently and that we @@ -3492,6 +3525,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { } case TargetLowering::TypeSoftenFloat: case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeSoftPromoteHalf: case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: case TargetLowering::TypeScalarizeVector: @@ -3626,10 +3660,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { SDValue InOp = N->getOperand(i); if (InputWidened) InOp = GetWidenedVector(InOp); - for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + for (unsigned j = 0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getVectorIdxConstant(j, dl)); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -3666,11 +3699,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; - for (i=0; i < NumElts; ++i) - Ops[i] = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(IdxVal + i, dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + for (i = 0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getVectorIdxConstant(IdxVal + i, dl)); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -3689,6 +3720,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::LoadExtType ExtType = LD->getExtensionType(); + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!LD->getMemoryVT().isByteSized()) { + SDValue Value, NewChain; + std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); + ReplaceValueWith(SDValue(LD, 0), Value); + ReplaceValueWith(SDValue(LD, 1), NewChain); + return SDValue(); + } + SDValue Result; SmallVector<SDValue, 16> LdChain; // Chain for the series of load if (ExtType != ISD::NON_EXTLOAD) @@ -3877,8 +3922,7 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, // Adjust Mask to the right number of elements. unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements(); if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(Mask)); Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask, ZeroIdx); } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) { @@ -4144,12 +4188,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT)); SmallVector<SDValue, 8> Chains(NumElts); for (unsigned i = 0; i != NumElts; ++i) { - SDValue LHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue RHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getVectorIdxConstant(i, dl)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getVectorIdxConstant(i, dl)); Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, {Chain, LHSElem, RHSElem, CC}); @@ -4288,13 +4330,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && "We can't have the same type as we started with!"); if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) - InOp = DAG.getNode( - ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getVectorIdxConstant(0, DL)); else - InOp = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getVectorIdxConstant(0, DL)); break; } } @@ -4363,9 +4404,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { else Res = DAG.getNode(Opcode, dl, WideVT, InOp); } - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, Res, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, + DAG.getVectorIdxConstant(0, dl)); } EVT InEltVT = InVT.getVectorElementType(); @@ -4376,9 +4416,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); SmallVector<SDValue, 32> OpChains; for (unsigned i=0; i < NumElts; ++i) { - NewOps[1] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getVectorIdxConstant(i, dl)); Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps); OpChains.push_back(Ops[i].getValue(1)); } @@ -4386,11 +4425,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { ReplaceValueWith(SDValue(N, 1), NewChain); } else { for (unsigned i = 0; i < NumElts; ++i) - Ops[i] = DAG.getNode( - Opcode, dl, EltVT, - DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, + InOp, DAG.getVectorIdxConstant(i, dl))); } return DAG.getBuildVector(VT, dl, Ops); @@ -4411,9 +4448,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); - return DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getVectorIdxConstant(0, dl)); } } @@ -4430,7 +4466,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + DAG.getVectorIdxConstant(0, dl)); } } } @@ -4470,10 +4506,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { TargetLowering::TypeWidenVector && "Unexpected type action"); InOp = GetWidenedVector(InOp); - for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + for (unsigned j = 0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getVectorIdxConstant(j, dl)); } return DAG.getBuildVector(VT, dl, Ops); } @@ -4630,9 +4665,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), VT.getVectorNumElements()); - SDValue CC = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, + DAG.getVectorIdxConstant(0, dl)); EVT OpVT = N->getOperand(0).getValueType(); ISD::NodeType ExtendCode = @@ -4657,12 +4691,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) { SmallVector<SDValue, 8> Chains(NumElts); for (unsigned i = 0; i != NumElts; ++i) { - SDValue LHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue RHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getVectorIdxConstant(i, dl)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getVectorIdxConstant(i, dl)); Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, {Chain, LHSElem, RHSElem, CC}); @@ -4729,7 +4761,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { unsigned WideElts = WideVT.getVectorNumElements(); for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + DAG.getVectorIdxConstant(Idx, dl)); return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags()); } @@ -4748,9 +4780,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond, LeftIn, RightIn); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, VT, Select, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Select, + DAG.getVectorIdxConstant(0, DL)); } //===----------------------------------------------------------------------===// @@ -4836,7 +4867,6 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, SmallVectorImpl<SDValue> &LdOps, unsigned Start, unsigned End) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(LdOps[Start]); EVT LdTy = LdOps[Start].getValueType(); unsigned Width = VecTy.getSizeInBits(); @@ -4856,9 +4886,8 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); LdTy = NewLdTy; } - VecOp = DAG.getNode( - ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], + DAG.getVectorIdxConstant(Idx++, dl)); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } @@ -4879,19 +4908,19 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; - unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads. + // Allow wider loads. + unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment(); // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - Align, MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. @@ -4934,7 +4963,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Increment), MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { // Later code assumes the vector loads produced will be mergeable, so we @@ -4952,7 +4981,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Increment), MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); } @@ -5029,7 +5058,6 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); @@ -5043,14 +5071,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, Align, MMOFlags, AAInfo); + LdEltVT, LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - Align, MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -5069,7 +5097,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, // element type or scalar stores. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - unsigned Align = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); @@ -5093,12 +5120,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, if (NewVT.isVector()) { unsigned NumVTElts = NewVT.getVectorNumElements(); do { - SDValue EOp = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, + DAG.getVectorIdxConstant(Idx, dl)); StChain.push_back(DAG.getStore( Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Offset), MMOFlags, AAInfo)); + ST->getOriginalAlign(), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; @@ -5113,13 +5139,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, // Readjust index position based on new vector type. Idx = Idx * ValEltWidth / NewVTWidth; do { - SDValue EOp = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getConstant(Idx++, dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, + DAG.getVectorIdxConstant(Idx++, dl)); StChain.push_back(DAG.getStore( Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Offset), MMOFlags, AAInfo)); + ST->getOriginalAlign(), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); @@ -5137,7 +5161,6 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, // and then store it. Instead, we extract each element and then store it. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - unsigned Align = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); @@ -5157,21 +5180,19 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, EVT ValEltVT = ValVT.getVectorElementType(); unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); - SDValue EOp = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo(), StEltVT, Align, - MMOFlags, AAInfo)); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getVectorIdxConstant(0, dl)); + StChain.push_back( + DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, + ST->getOriginalAlign(), MMOFlags, AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); - SDValue EOp = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getVectorIdxConstant(0, dl)); StChain.push_back(DAG.getTruncStore( Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), - StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo)); + StEltVT, ST->getOriginalAlign(), MMOFlags, AAInfo)); } } @@ -5206,9 +5227,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, } if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getVectorIdxConstant(0, dl)); // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); @@ -5216,9 +5236,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, unsigned MinNumElts = std::min(WidenNumElts, InNumElts); unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) - Ops[Idx] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getVectorIdxConstant(Idx, dl)); SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : DAG.getUNDEF(EltVT); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 34660e3a48ec..55fe26eb64cd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -19,9 +19,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 7ee44c808fcb..2902c96c7658 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -761,7 +761,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); - DenseMap<SDValue, unsigned> VRBaseMap; + DenseMap<SDValue, Register> VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index ff806bdb822c..72e68a5045c6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -279,7 +279,7 @@ private: SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.MarkDirty(); + Topo.AddSUnitWithoutPredecessors(NewNode); return NewNode; } @@ -289,7 +289,7 @@ private: SUnit *NewNode = Clone(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.MarkDirty(); + Topo.AddSUnitWithoutPredecessors(NewNode); return NewNode; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 0e4d783e3505..ce20d506586f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" @@ -198,10 +199,10 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { - SDNode *Chain = nullptr; + SDValue Chain; unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) - Chain = Node->getOperand(NumOps-1).getNode(); + Chain = Node->getOperand(NumOps-1); if (!Chain) return; @@ -234,6 +235,9 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { unsigned UseCount = 0; for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); I != E && UseCount < 100; ++I, ++UseCount) { + if (I.getUse().getResNo() != Chain.getResNo()) + continue; + SDNode *User = *I; if (User == Node || !Visited.insert(User).second) continue; @@ -471,6 +475,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).getNode(); + unsigned DefIdx = N->getOperand(i).getResNo(); if (isPassiveNode(OpN)) continue; // Not scheduled. SUnit *OpSU = &SUnits[OpN->getNodeId()]; assert(OpSU && "Node has no SUnit!"); @@ -505,7 +510,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { Dep.setLatency(OpLatency); if (!isChain && !UnitLatencies) { computeOperandLatency(OpN, N, i, Dep); - ST.adjustSchedDependency(OpSU, SU, Dep); + ST.adjustSchedDependency(OpSU, DefIdx, SU, i, Dep); } if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { @@ -731,7 +736,7 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, - DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) { + DenseMap<SDValue, Register> &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -758,9 +763,9 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders, - SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) { + SmallSet<Register, 8> &Seen, MachineInstr *NewInsn) { unsigned Order = N->getIROrder(); if (!Order || Seen.count(Order)) { // Process any valid SDDbgValues even if node does not have any order @@ -784,17 +789,17 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } void ScheduleDAGSDNodes:: -EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, +EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, MachineBasicBlock::iterator InsertPos) { for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds if (I->getSUnit()->CopyDstRC) { // Copy to physical register. - DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + DenseMap<SUnit*, Register>::iterator VRI = VRBaseMap.find(I->getSUnit()); assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); // Find the destination physical register. - unsigned Reg = 0; + Register Reg; for (SUnit::const_succ_iterator II = SU->Succs.begin(), EE = SU->Succs.end(); II != EE; ++II) { if (II->isCtrl()) continue; // ignore chain preds @@ -826,17 +831,17 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); - DenseMap<SDValue, unsigned> VRBaseMap; - DenseMap<SUnit*, unsigned> CopyVRBaseMap; + DenseMap<SDValue, Register> VRBaseMap; + DenseMap<SUnit*, Register> CopyVRBaseMap; SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; - SmallSet<unsigned, 8> Seen; + SmallSet<Register, 8> Seen; bool HasDbg = DAG->hasDebugValues(); // Emit a node, and determine where its first instruction is for debuginfo. // Zero, one, or multiple instructions can be created when emitting a node. auto EmitNode = [&](SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * { + DenseMap<SDValue, Register> &VRBaseMap) -> MachineInstr * { // Fetch instruction prior to this, or end() if nonexistant. auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { if (I == BB->begin()) @@ -863,9 +868,14 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { MI = &*std::next(Before); } - if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues) + if (MI->isCandidateForCallSiteEntry() && + DAG->getTarget().Options.EmitCallSiteInfo) MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node)); + if (DAG->getNoMergeSiteInfo(Node)) { + MI->setFlag(MachineInstr::MIFlag::NoMerge); + } + return MI; }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 183ce4b0652d..8c28ce403c9b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -184,7 +184,7 @@ class InstrItineraryData; void BuildSchedUnits(); void AddSchedEdges(); - void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, MachineBasicBlock::iterator InsertPos); }; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 313e07b5fdd6..592c09c10fb0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -543,7 +544,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::ConstantPool: case ISD::TargetConstantPool: { const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); - ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getAlign().value()); ID.AddInteger(CP->getOffset()); if (CP->isMachineConstantPoolEntry()) CP->getMachineCPVal()->addSelectionDAGCSEId(ID); @@ -1000,12 +1001,12 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, return Node; } -unsigned SelectionDAG::getEVTAlignment(EVT VT) const { +Align SelectionDAG::getEVTAlign(EVT VT) const { Type *Ty = VT == MVT::iPTR ? PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return getDataLayout().getABITypeAlignment(Ty); + return getDataLayout().getABITypeAlign(Ty); } // EntryNode could meaningfully have debug info if we can find it... @@ -1167,15 +1168,21 @@ SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, } SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { - assert(!VT.isVector() && - "getZeroExtendInReg should use the vector element type instead of " - "the vector type!"); - if (Op.getValueType().getScalarType() == VT) return Op; - unsigned BitWidth = Op.getScalarValueSizeInBits(); - APInt Imm = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); - return getNode(ISD::AND, DL, Op.getValueType(), Op, - getConstant(Imm, DL, Op.getValueType())); + EVT OpVT = Op.getValueType(); + assert(VT.isInteger() && OpVT.isInteger() && + "Cannot getZeroExtendInReg FP types"); + assert(VT.isVector() == OpVT.isVector() && + "getZeroExtendInReg type should be vector iff the operand " + "type is vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == OpVT.getVectorElementCount()) && + "Vector element counts must match in getZeroExtendInReg"); + assert(VT.bitsLE(OpVT) && "Not extending!"); + if (OpVT == VT) + return Op; + APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(), + VT.getScalarSizeInBits()); + return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT)); } SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { @@ -1332,10 +1339,16 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes) { + assert(VT.isInteger() && "Shift amount is not an integer type!"); EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes); return getConstant(Val, DL, ShiftVT); } +SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL, + bool isTarget) { + return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget); +} + SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, bool isTarget) { return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); @@ -1381,7 +1394,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, else if (EltVT == MVT::f64) return getConstantFP(APFloat(Val), DL, VT, isTarget); else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || - EltVT == MVT::f16) { + EltVT == MVT::f16 || EltVT == MVT::bf16) { bool Ignored; APFloat APF = APFloat(Val); APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, @@ -1459,19 +1472,18 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, } SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, - unsigned Alignment, int Offset, - bool isTarget, - unsigned TargetFlags) { + MaybeAlign Alignment, int Offset, + bool isTarget, unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); - if (Alignment == 0) + if (!Alignment) Alignment = shouldOptForSize() - ? getDataLayout().getABITypeAlignment(C->getType()) - : getDataLayout().getPrefTypeAlignment(C->getType()); + ? getDataLayout().getABITypeAlign(C->getType()) + : getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); - ID.AddInteger(Alignment); + ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); ID.AddPointer(C); ID.AddInteger(TargetFlags); @@ -1479,25 +1491,26 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new constant pool: ", this); + return V; } SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, - unsigned Alignment, int Offset, - bool isTarget, - unsigned TargetFlags) { + MaybeAlign Alignment, int Offset, + bool isTarget, unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); - if (Alignment == 0) - Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); + if (!Alignment) + Alignment = getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); - ID.AddInteger(Alignment); + ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); @@ -1505,7 +1518,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -1861,9 +1874,6 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, } SDValue SelectionDAG::getSrcValue(const Value *V) { - assert((!V || V->getType()->isPointerTy()) && - "SrcValue is not a pointer?"); - FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); ID.AddPointer(V); @@ -1921,6 +1931,10 @@ SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, return SDValue(N, 0); } +SDValue SelectionDAG::getFreeze(SDValue V) { + return getNode(ISD::FREEZE, SDLoc(V), V.getValueType(), V); +} + /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { @@ -1979,28 +1993,54 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) { MachinePointerInfo(VD)); } -SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { - MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - unsigned ByteSize = VT.getStoreSize(); +Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) { + const DataLayout &DL = getDataLayout(); Type *Ty = VT.getTypeForEVT(*getContext()); - unsigned StackAlign = - std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); + Align RedAlign = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); + + if (TLI->isTypeLegal(VT) || !VT.isVector()) + return RedAlign; + + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + const Align StackAlign = TFI->getStackAlign(); + + // See if we can choose a smaller ABI alignment in cases where it's an + // illegal vector type that will get broken down. + if (RedAlign > StackAlign) { + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + TLI->getVectorTypeBreakdown(*getContext(), VT, IntermediateVT, + NumIntermediates, RegisterVT); + Ty = IntermediateVT.getTypeForEVT(*getContext()); + Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); + if (RedAlign2 < RedAlign) + RedAlign = RedAlign2; + } + + return RedAlign; +} - int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); +SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { + MachineFrameInfo &MFI = MF->getFrameInfo(); + int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, false); return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } +SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { + Type *Ty = VT.getTypeForEVT(*getContext()); + Align StackAlign = + std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign)); + return CreateStackTemporary(VT.getStoreSize(), StackAlign); +} + SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { - unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); + TypeSize Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); const DataLayout &DL = getDataLayout(); - unsigned Align = - std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2)); - - MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); + Align Align = std::max(DL.getPrefTypeAlign(Ty1), DL.getPrefTypeAlign(Ty2)); + return CreateStackTemporary(Bytes, Align); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, @@ -2179,21 +2219,16 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, const APInt &DemandedElts) { switch (V.getOpcode()) { default: + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, + *this, 0); break; case ISD::Constant: { - auto *CV = cast<ConstantSDNode>(V.getNode()); - assert(CV && "Const value should be ConstSDNode."); - const APInt &CVal = CV->getAPIntValue(); + const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue(); APInt NewVal = CVal & DemandedBits; if (NewVal != CVal) return getConstant(NewVal, SDLoc(V), V.getValueType()); break; } - case ISD::OR: - case ISD::XOR: - case ISD::SIGN_EXTEND_INREG: - return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, - *this, 0); case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) @@ -2224,19 +2259,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, } break; } - case ISD::ANY_EXTEND: { - SDValue Src = V.getOperand(0); - unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); - // Being conservative here - only peek through if we only demand bits in the - // non-extended source (even though the extended bits are technically - // undef). - if (DemandedBits.getActiveBits() > SrcBitWidth) - break; - APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth); - if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits)) - return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); - break; - } } return SDValue(); } @@ -2253,11 +2275,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { /// for bits that V cannot have. bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, unsigned Depth) const { - EVT VT = V.getValueType(); - APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) - : APInt(1, 1); - return MaskedValueIsZero(V, Mask, DemandedElts, Depth); + return Mask.isSubsetOf(computeKnownBits(V, Depth).Zero); } /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in @@ -2276,15 +2294,42 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, } /// isSplatValue - Return true if the vector V has the same value -/// across all DemandedElts. +/// across all DemandedElts. For scalable vectors it does not make +/// sense to specify which elements are demanded or undefined, therefore +/// they are simply ignored. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts) { - if (!DemandedElts) - return false; // No demanded elts, better to assume we don't know anything. - EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); + if (!VT.isScalableVector() && !DemandedElts) + return false; // No demanded elts, better to assume we don't know anything. + + // Deal with some common cases here that work for both fixed and scalable + // vector types. + switch (V.getOpcode()) { + case ISD::SPLAT_VECTOR: + return true; + case ISD::ADD: + case ISD::SUB: + case ISD::AND: { + APInt UndefLHS, UndefRHS; + SDValue LHS = V.getOperand(0); + SDValue RHS = V.getOperand(1); + if (isSplatValue(LHS, DemandedElts, UndefLHS) && + isSplatValue(RHS, DemandedElts, UndefRHS)) { + UndefElts = UndefLHS | UndefRHS; + return true; + } + break; + } + } + + // We don't support other cases than those above for scalable vectors at + // the moment. + if (VT.isScalableVector()) + return false; + unsigned NumElts = VT.getVectorNumElements(); assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); UndefElts = APInt::getNullValue(NumElts); @@ -2326,30 +2371,14 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, return true; } case ISD::EXTRACT_SUBVECTOR: { + // Offset the demanded elts by the subvector index. SDValue Src = V.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1)); + uint64_t Idx = V.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - APInt UndefSrcElts; - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) { - UndefElts = UndefSrcElts.extractBits(NumElts, Idx); - return true; - } - } - break; - } - case ISD::ADD: - case ISD::SUB: - case ISD::AND: { - APInt UndefLHS, UndefRHS; - SDValue LHS = V.getOperand(0); - SDValue RHS = V.getOperand(1); - if (isSplatValue(LHS, DemandedElts, UndefLHS) && - isSplatValue(RHS, DemandedElts, UndefRHS)) { - UndefElts = UndefLHS | UndefRHS; + APInt UndefSrcElts; + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts)) { + UndefElts = UndefSrcElts.extractBits(NumElts, Idx); return true; } break; @@ -2363,10 +2392,13 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); - unsigned NumElts = VT.getVectorNumElements(); APInt UndefElts; - APInt DemandedElts = APInt::getAllOnesValue(NumElts); + APInt DemandedElts; + + // For now we don't support this with scalable vectors. + if (!VT.isScalableVector()) + DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); return isSplatValue(V, DemandedElts, UndefElts) && (AllowUndefs || !UndefElts); } @@ -2379,19 +2411,35 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { switch (Opcode) { default: { APInt UndefElts; - APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + APInt DemandedElts; + + if (!VT.isScalableVector()) + DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (isSplatValue(V, DemandedElts, UndefElts)) { - // Handle case where all demanded elements are UNDEF. - if (DemandedElts.isSubsetOf(UndefElts)) { + if (VT.isScalableVector()) { + // DemandedElts and UndefElts are ignored for scalable vectors, since + // the only supported cases are SPLAT_VECTOR nodes. SplatIdx = 0; - return getUNDEF(VT); + } else { + // Handle case where all demanded elements are UNDEF. + if (DemandedElts.isSubsetOf(UndefElts)) { + SplatIdx = 0; + return getUNDEF(VT); + } + SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); } - SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); return V; } break; } + case ISD::SPLAT_VECTOR: + SplatIdx = 0; + return V; case ISD::VECTOR_SHUFFLE: { + if (VT.isScalableVector()) + return SDValue(); + // Check if this is a shuffle node doing a splat. // TODO - remove this and rely purely on SelectionDAG::isSplatValue, // getTargetVShiftNode currently struggles without the splat source. @@ -2413,14 +2461,16 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), SrcVector.getValueType().getScalarType(), SrcVector, - getIntPtrConstant(SplatIdx, SDLoc(V))); + getVectorIdxConstant(SplatIdx, SDLoc(V))); return SDValue(); } -/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that -/// is less than the element bit-width of the shift node, return it. -static const APInt *getValidShiftAmountConstant(SDValue V, - const APInt &DemandedElts) { +const APInt * +SelectionDAG::getValidShiftAmountConstant(SDValue V, + const APInt &DemandedElts) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); unsigned BitWidth = V.getScalarValueSizeInBits(); if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) { // Shifting more than the bitwidth is not valid. @@ -2431,10 +2481,13 @@ static const APInt *getValidShiftAmountConstant(SDValue V, return nullptr; } -/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less -/// than the element bit-width of the shift node, return the minimum value. -static const APInt * -getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { +const APInt *SelectionDAG::getValidMinimumShiftAmountConstant( + SDValue V, const APInt &DemandedElts) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); + if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts)) + return ValidAmt; unsigned BitWidth = V.getScalarValueSizeInBits(); auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); if (!BV) @@ -2457,10 +2510,13 @@ getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { return MinShAmt; } -/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less -/// than the element bit-width of the shift node, return the maximum value. -static const APInt * -getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { +const APInt *SelectionDAG::getValidMaximumShiftAmountConstant( + SDValue V, const APInt &DemandedElts) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); + if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts)) + return ValidAmt; unsigned BitWidth = V.getScalarValueSizeInBits(); auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); if (!BV) @@ -2488,6 +2544,14 @@ getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { /// every vector element. KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); + + // TOOD: Until we have a plan for how to represent demanded elements for + // scalable vectors, we can just bail out for now. + if (Op.getValueType().isScalableVector()) { + unsigned BitWidth = Op.getScalarValueSizeInBits(); + return KnownBits(BitWidth); + } + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -2503,6 +2567,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, KnownBits Known(BitWidth); // Don't know anything. + // TOOD: Until we have a plan for how to represent demanded elements for + // scalable vectors, we can just bail out for now. + if (Op.getValueType().isScalableVector()) + return Known; + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { // We know all of the bits for a constant! Known.One = C->getAPIntValue(); @@ -2622,52 +2691,40 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::INSERT_SUBVECTOR: { - // If we know the element index, demand any elements from the subvector and - // the remainder from the src its inserted into, otherwise demand them all. + // Demand any elements from the subvector and the remainder from the src its + // inserted into. SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + uint64_t Idx = Op.getConstantOperandVal(2); unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { - Known.One.setAllBits(); - Known.Zero.setAllBits(); - uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); - if (!!DemandedSubElts) { - Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); - if (Known.isUnknown()) - break; // early-out. - } - APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); - APInt DemandedSrcElts = DemandedElts & ~SubMask; - if (!!DemandedSrcElts) { - Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - } - } else { - Known = computeKnownBits(Sub, Depth + 1); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + + Known.One.setAllBits(); + Known.Zero.setAllBits(); + if (!!DemandedSubElts) { + Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); if (Known.isUnknown()) break; // early-out. - Known2 = computeKnownBits(Src, Depth + 1); + } + if (!!DemandedSrcElts) { + Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } break; } case ISD::EXTRACT_SUBVECTOR: { - // If we know the element index, just demand that subvector elements, - // otherwise demand them all. + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // Bail until we can represent demanded elements for scalable vectors. + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - } - Known = computeKnownBits(Src, DemandedSrc, Depth + 1); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1); break; } case ISD::SCALAR_TO_VECTOR: { @@ -2753,35 +2810,23 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::AND: - // If either the LHS or the RHS are Zero, the result is zero. Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Output known-1 bits are only known if set in both the LHS & RHS. - Known.One &= Known2.One; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - Known.Zero |= Known2.Zero; + Known &= Known2; break; case ISD::OR: Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Output known-0 bits are only known if clear in both the LHS & RHS. - Known.Zero &= Known2.Zero; - // Output known-1 are known to be set if set in either the LHS | RHS. - Known.One |= Known2.One; + Known |= Known2; break; - case ISD::XOR: { + case ISD::XOR: Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); - Known.Zero = KnownZeroOut; + Known ^= Known2; break; - } case ISD::MUL: { Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); @@ -3075,12 +3120,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); - Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zext(BitWidth); break; } case ISD::ZERO_EXTEND: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zext(BitWidth); break; } case ISD::SIGN_EXTEND_VECTOR_INREG: { @@ -3099,9 +3144,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = Known.sext(BitWidth); break; } + case ISD::ANY_EXTEND_VECTOR_INREG: { + EVT InVT = Op.getOperand(0).getValueType(); + APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); + Known = Known.anyext(BitWidth); + break; + } case ISD::ANY_EXTEND: { - Known = computeKnownBits(Op.getOperand(0), Depth+1); - Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = Known.anyext(BitWidth); break; } case ISD::TRUNCATE: { @@ -3117,6 +3169,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One &= (~Known.Zero); break; } + case ISD::AssertAlign: { + unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign()); + assert(LogOfAlign != 0); + // If a node is guaranteed to be aligned, set low zero bits accordingly as + // well as clearing one bits. + Known.Zero.setLowBits(LogOfAlign); + Known.One.clearLowBits(LogOfAlign); + break; + } case ISD::FGETSIGN: // All bits are zero except the low bit. Known.Zero.setBitsFrom(1); @@ -3134,6 +3195,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, LLVM_FALLTHROUGH; case ISD::SUB: case ISD::SUBC: { + assert(Op.getResNo() == 0 && + "We only compute knownbits for the difference here."); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, @@ -3245,57 +3309,51 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, EVT VecVT = InVec.getValueType(); const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); const unsigned NumSrcElts = VecVT.getVectorNumElements(); + // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know // anything about the extended bits. if (BitWidth > EltBitWidth) Known = Known.trunc(EltBitWidth); - ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); - if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { - // If we know the element index, just demand that vector element. - unsigned Idx = ConstEltNo->getZExtValue(); - APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); - Known = computeKnownBits(InVec, DemandedElt, Depth + 1); - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Known = computeKnownBits(InVec, Depth + 1); - } + + // If we know the element index, just demand that vector element, else for + // an unknown element index, ignore DemandedElts and demand them all. + APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) + DemandedSrcElts = + APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); + + Known = computeKnownBits(InVec, DemandedSrcElts, Depth + 1); if (BitWidth > EltBitWidth) - Known = Known.zext(BitWidth, false /* => any extend */); + Known = Known.anyext(BitWidth); break; } case ISD::INSERT_VECTOR_ELT: { + // If we know the element index, split the demand between the + // source vector and the inserted element, otherwise assume we need + // the original demanded vector elements and the value. SDValue InVec = Op.getOperand(0); SDValue InVal = Op.getOperand(1); SDValue EltNo = Op.getOperand(2); - - ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + bool DemandedVal = true; + APInt DemandedVecElts = DemandedElts; + auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { - // If we know the element index, split the demand between the - // source vector and the inserted element. - Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth); unsigned EltIdx = CEltNo->getZExtValue(); - - // If we demand the inserted element then add its common known bits. - if (DemandedElts[EltIdx]) { - Known2 = computeKnownBits(InVal, Depth + 1); - Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); - } - - // If we demand the source vector then add its common known bits, ensuring - // that we don't demand the inserted element. - APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); - if (!!VectorElts) { - Known2 = computeKnownBits(InVec, VectorElts, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - } - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Known = computeKnownBits(InVec, Depth + 1); + DemandedVal = !!DemandedElts[EltIdx]; + DemandedVecElts.clearBit(EltIdx); + } + Known.One.setAllBits(); + Known.Zero.setAllBits(); + if (DemandedVal) { Known2 = computeKnownBits(InVal, Depth + 1); - Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); + Known.One &= Known2.One.zextOrTrunc(BitWidth); + Known.Zero &= Known2.Zero.zextOrTrunc(BitWidth); + } + if (!!DemandedVecElts) { + Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } break; } @@ -3399,7 +3457,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::FrameIndex: case ISD::TargetFrameIndex: - TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); + TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(), + Known, getMachineFunction()); break; default: @@ -3492,6 +3551,11 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); + + // TODO: Assume we don't know anything for now. + if (VT.isScalableVector()) + return 1; + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -3515,7 +3579,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. - if (!DemandedElts) + if (!DemandedElts || VT.isScalableVector()) return 1; // No demanded elts, better to assume we don't know anything. unsigned Opcode = Op.getOpcode(); @@ -3535,7 +3599,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, continue; SDValue SrcOp = Op.getOperand(i); - Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1); + Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1); // BUILD_VECTOR can implicitly truncate sources, we must handle this. if (SrcOp.getValueSizeInBits() != VTBits) { @@ -3646,23 +3710,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::SRA: Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); // SRA X, C -> adds C sign bits. - if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) - Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); - else if (const APInt *ShAmt = - getValidMinimumShiftAmountConstant(Op, DemandedElts)) + if (const APInt *ShAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); return Tmp; case ISD::SHL: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { + if (const APInt *ShAmt = + getValidMaximumShiftAmountConstant(Op, DemandedElts)) { // shl destroys sign bits, ensure it doesn't shift out all sign bits. Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (ShAmt->ult(Tmp)) return Tmp - ShAmt->getZExtValue(); - } else if (const APInt *ShAmt = - getValidMaximumShiftAmountConstant(Op, DemandedElts)) { - Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); - if (ShAmt->ult(Tmp)) - return Tmp - ShAmt->getZExtValue(); } break; case ISD::AND: @@ -3712,18 +3770,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } // Fallback - just get the minimum number of sign bits of the operands. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); return std::min(Tmp, Tmp2); } case ISD::UMIN: case ISD::UMAX: - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); return std::min(Tmp, Tmp2); case ISD::SADDO: case ISD::UADDO: @@ -3753,7 +3811,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } case ISD::ROTL: case ISD::ROTR: - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // If we're rotating an 0/-1 value, then it stays an 0/-1 value. + if (Tmp == VTBits) + return VTBits; + + if (ConstantSDNode *C = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { unsigned RotAmt = C->getAPIntValue().urem(VTBits); // Handle rotate right by N like a rotate left by 32-N. @@ -3762,7 +3827,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If we aren't rotating out all of the known-in sign bits, return the // number that are left. This handles rotl(sext(x), 1) for example. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt); } break; @@ -3770,13 +3834,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::ADDC: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Tmp == 1) return 1; // Early out. // Special case decrementing a value (ADD X, -1): - if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if (ConstantSDNode *CRHS = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) if (CRHS->isAllOnesValue()) { - KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1); + KnownBits Known = + computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -3789,18 +3855,19 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Tmp2 == 1) return 1; // Early out. + return std::min(Tmp, Tmp2) - 1; case ISD::SUB: - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - if (Tmp2 == 1) return 1; + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Tmp2 == 1) return 1; // Early out. // Handle NEG. - if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) + if (ConstantSDNode *CLHS = + isConstOrConstSplat(Op.getOperand(0), DemandedElts)) if (CLHS->isNullValue()) { - KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1); + KnownBits Known = + computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((Known.Zero | 1).isAllOnesValue()) @@ -3816,9 +3883,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2) - 1; case ISD::MUL: { // The output of the Mul can be at most twice the valid bits in the inputs. unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); @@ -3853,39 +3920,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } case ISD::INSERT_VECTOR_ELT: { + // If we know the element index, split the demand between the + // source vector and the inserted element, otherwise assume we need + // the original demanded vector elements and the value. SDValue InVec = Op.getOperand(0); SDValue InVal = Op.getOperand(1); SDValue EltNo = Op.getOperand(2); - - ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + bool DemandedVal = true; + APInt DemandedVecElts = DemandedElts; + auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { - // If we know the element index, split the demand between the - // source vector and the inserted element. unsigned EltIdx = CEltNo->getZExtValue(); - - // If we demand the inserted element then get its sign bits. - Tmp = std::numeric_limits<unsigned>::max(); - if (DemandedElts[EltIdx]) { - // TODO - handle implicit truncation of inserted elements. - if (InVal.getScalarValueSizeInBits() != VTBits) - break; - Tmp = ComputeNumSignBits(InVal, Depth + 1); - } - - // If we demand the source vector then get its sign bits, and determine - // the minimum. - APInt VectorElts = DemandedElts; - VectorElts.clearBit(EltIdx); - if (!!VectorElts) { - Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1); - Tmp = std::min(Tmp, Tmp2); - } - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Tmp = ComputeNumSignBits(InVec, Depth + 1); + DemandedVal = !!DemandedElts[EltIdx]; + DemandedVecElts.clearBit(EltIdx); + } + Tmp = std::numeric_limits<unsigned>::max(); + if (DemandedVal) { + // TODO - handle implicit truncation of inserted elements. + if (InVal.getScalarValueSizeInBits() != VTBits) + break; Tmp2 = ComputeNumSignBits(InVal, Depth + 1); Tmp = std::min(Tmp, Tmp2); } + if (!!DemandedVecElts) { + Tmp2 = ComputeNumSignBits(InVec, DemandedVecElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } @@ -3906,7 +3966,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If we know the element index, just demand that vector element, else for // an unknown element index, ignore DemandedElts and demand them all. APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); - ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); @@ -3914,18 +3974,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); } case ISD::EXTRACT_SUBVECTOR: { - // If we know the element index, just demand that subvector elements, - // otherwise demand them all. + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // Bail until we can represent demanded elements for scalable vectors. + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - } - return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); } case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded @@ -3946,35 +4003,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } case ISD::INSERT_SUBVECTOR: { - // If we know the element index, demand any elements from the subvector and - // the remainder from the src its inserted into, otherwise demand them all. + // Demand any elements from the subvector and the remainder from the src its + // inserted into. SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + uint64_t Idx = Op.getConstantOperandVal(2); unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { - Tmp = std::numeric_limits<unsigned>::max(); - uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); - if (!!DemandedSubElts) { - Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); - if (Tmp == 1) return 1; // early-out - } - APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); - APInt DemandedSrcElts = DemandedElts & ~SubMask; - if (!!DemandedSrcElts) { - Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); - Tmp = std::min(Tmp, Tmp2); - } - assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); - return Tmp; - } + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); - // Not able to determine the index so just assume worst case. - Tmp = ComputeNumSignBits(Sub, Depth + 1); - if (Tmp == 1) return 1; // early-out - Tmp2 = ComputeNumSignBits(Src, Depth + 1); - Tmp = std::min(Tmp, Tmp2); + Tmp = std::numeric_limits<unsigned>::max(); + if (!!DemandedSubElts) { + Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); + if (Tmp == 1) + return 1; // early-out + } + if (!!DemandedSrcElts) { + Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } @@ -4052,13 +4100,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return FirstAnswer; } - // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // Okay, we know that the sign bit in Mask is set. Use CLO to determine // the number of identical bits in the top of the input value. - Mask = ~Mask; Mask <<= Mask.getBitWidth()-VTBits; - // Return # leading zeros. We use 'min' here in case Val was zero before - // shifting. We don't want to return '64' as for an i32 "0". - return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); + return std::max(FirstAnswer, Mask.countLeadingOnes()); } bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { @@ -4109,6 +4154,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FFLOOR: case ISD::FCEIL: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FRINT: case ISD::FNEARBYINT: { if (SNaN) @@ -4249,6 +4295,8 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, SelectionDAG &DAG) { int NumOps = Ops.size(); assert(NumOps != 0 && "Can't build an empty vector!"); + assert(!VT.isScalableVector() && + "BUILD_VECTOR cannot be used with scalable types"); assert(VT.getVectorNumElements() == (unsigned)NumOps && "Incorrect element count in BUILD_VECTOR!"); @@ -4287,8 +4335,8 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, return Ops[0].getValueType() == Op.getValueType(); }) && "Concatenation of vectors with inconsistent value types!"); - assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) == - VT.getVectorNumElements() && + assert((Ops[0].getValueType().getVectorElementCount() * Ops.size()) == + VT.getVectorElementCount() && "Incorrect element count in vector concatenation!"); if (Ops.size() == 1) @@ -4305,11 +4353,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, bool IsIdentity = true; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { SDValue Op = Ops[i]; - unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements(); + unsigned IdentityIndex = i * Op.getValueType().getVectorMinNumElements(); if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || Op.getOperand(0).getValueType() != VT || (IdentitySrc && Op.getOperand(0) != IdentitySrc) || - !isa<ConstantSDNode>(Op.getOperand(1)) || Op.getConstantOperandVal(1) != IdentityIndex) { IsIdentity = false; break; @@ -4323,6 +4370,11 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, return IdentitySrc; } + // The code below this point is only designed to work for fixed width + // vectors, so we bail out for now. + if (VT.isScalableVector()) + return SDValue(); + // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be // simplified to one big BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. @@ -4508,7 +4560,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // FIXME need to be more flexible about rounding mode. (void)V.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); - return getConstant(V.bitcastToAPInt(), DL, VT); + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); } } } @@ -4553,6 +4605,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { + case ISD::FREEZE: + assert(VT == Operand.getValueType() && "Unexpected VT!"); + break; case ISD::TokenFactor: case ISD::MERGE_VALUES: case ISD::CONCAT_VECTORS: @@ -4597,8 +4652,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!"); @@ -4616,8 +4671,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!"); @@ -4635,8 +4690,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!"); @@ -4665,8 +4720,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop truncate assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!"); @@ -4753,6 +4808,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) return getNode(ISD::FABS, DL, VT, Operand.getOperand(0)); break; + case ISD::VSCALE: + assert(VT == Operand.getValueType() && "Unexpected VT!"); + break; } SDNode *N; @@ -4824,17 +4882,6 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, return llvm::None; } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, const ConstantSDNode *C1, - const ConstantSDNode *C2) { - if (C1->isOpaque() || C2->isOpaque()) - return SDValue(); - if (Optional<APInt> Folded = - FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue())) - return getConstant(Folded.getValue(), DL, VT); - return SDValue(); -} - SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, const GlobalAddressSDNode *GA, const SDNode *N2) { @@ -4881,20 +4928,37 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, SDNode *N1, SDNode *N2) { + EVT VT, ArrayRef<SDValue> Ops) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); - if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)})) + // For now, the array Ops should only contain two values. + // This enforcement will be removed once this function is merged with + // FoldConstantVectorArithmetic + if (Ops.size() != 2) + return SDValue(); + + if (isUndef(Opcode, Ops)) return getUNDEF(VT); + SDNode *N1 = Ops[0].getNode(); + SDNode *N2 = Ops[1].getNode(); + // Handle the case of two scalars. if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { - SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2); + if (C1->isOpaque() || C2->isOpaque()) + return SDValue(); + + Optional<APInt> FoldAttempt = + FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); + if (!FoldAttempt) + return SDValue(); + + SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); assert((!Folded || !VT.isVector()) && "Can't fold vectors ops with scalar operands"); return Folded; @@ -4908,8 +4972,14 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) return FoldSymbolOffset(Opcode, VT, GA, N1); - // For vectors, extract each constant element and fold them individually. - // Either input may be an undef value. + // TODO: All the folds below are performed lane-by-lane and assume a fixed + // vector width, however we should be able to do constant folds involving + // splat vector nodes too. + if (VT.isScalableVector()) + return SDValue(); + + // For fixed width vectors, extract each constant element and fold them + // individually. Either input may be an undef value. auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); if (!BV1 && !N1->isUndef()) return SDValue(); @@ -4985,6 +5055,13 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, if (!VT.isVector()) return SDValue(); + // TODO: All the folds below are performed lane-by-lane and assume a fixed + // vector width, however we should be able to do constant folds involving + // splat vector nodes too. + if (VT.isScalableVector()) + return SDValue(); + + // From this point onwards all vectors are assumed to be fixed width. unsigned NumElts = VT.getVectorNumElements(); auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { @@ -5107,8 +5184,13 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, } switch (Opcode) { - case ISD::FADD: case ISD::FSUB: + // -0.0 - undef --> undef (consistent with "fneg undef") + if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef()) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + + case ISD::FADD: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: @@ -5122,6 +5204,34 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, return SDValue(); } +SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) { + assert(Val.getValueType().isInteger() && "Invalid AssertAlign!"); + + // There's no need to assert on a byte-aligned pointer. All pointers are at + // least byte aligned. + if (A == Align(1)) + return Val; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::AssertAlign, getVTList(Val.getValueType()), {Val}); + ID.AddInteger(A.value()); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(), + Val.getValueType(), A); + createOperands(N, {Val}); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -5186,11 +5296,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N2C && N2C->isNullValue()) return N1; break; + case ISD::MUL: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { + APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue(); + APInt N2CImm = N2C->getAPIntValue(); + return getVScale(DL, VT, MulImm * N2CImm); + } + break; case ISD::UDIV: case ISD::UREM: case ISD::MULHU: case ISD::MULHS: - case ISD::MUL: case ISD::SDIV: case ISD::SREM: case ISD::SMIN: @@ -5213,7 +5332,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); - if (SDValue V = simplifyFPBinop(Opcode, N1, N2)) + if (SDValue V = simplifyFPBinop(Opcode, N1, N2, Flags)) return V; break; case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. @@ -5223,6 +5342,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Invalid FCOPYSIGN!"); break; case ISD::SHL: + if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { + APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue(); + APInt ShiftImm = N2C->getAPIntValue(); + return getVScale(DL, VT, MulImm << ShiftImm); + } + LLVM_FALLTHROUGH; case ISD::SRA: case ISD::SRL: if (SDValue V = simplifyShift(N1, N2)) @@ -5240,7 +5365,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). - assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) && + assert(N2.getValueType().getScalarSizeInBits().getFixedSize() >= + Log2_32_Ceil(VT.getScalarSizeInBits().getFixedSize()) && "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to @@ -5281,7 +5407,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "SIGN_EXTEND_INREG type should be vector iff the operand " "type is vector!"); assert((!EVT.isVector() || - EVT.getVectorNumElements() == VT.getVectorNumElements()) && + EVT.getVectorElementCount() == VT.getVectorElementCount()) && "Vector element counts must match in SIGN_EXTEND_INREG"); assert(EVT.bitsLE(VT) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending @@ -5323,27 +5449,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1.isUndef() || N2.isUndef()) return getUNDEF(VT); - // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF - if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) + // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length + // vectors. For scalable vectors we will provide appropriate support for + // dealing with arbitrary indices. + if (N2C && N1.getValueType().isFixedLengthVector() && + N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is - // expanding copies of large vectors from registers. - if (N2C && - N1.getOpcode() == ISD::CONCAT_VECTORS && - N1.getNumOperands() > 0) { + // expanding copies of large vectors from registers. This only works for + // fixed length vectors, since we need to know the exact number of + // elements. + if (N2C && N1.getOperand(0).getValueType().isFixedLengthVector() && + N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0) { unsigned Factor = N1.getOperand(0).getValueType().getVectorNumElements(); return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(N2C->getZExtValue() / Factor), - getConstant(N2C->getZExtValue() % Factor, DL, - N2.getValueType())); + getVectorIdxConstant(N2C->getZExtValue() % Factor, DL)); } - // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is - // expanding large vector constants. - if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { - SDValue Elt = N1.getOperand(N2C->getZExtValue()); + // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while + // lowering is expanding large vector constants. + if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR || + N1.getOpcode() == ISD::SPLAT_VECTOR)) { + assert((N1.getOpcode() != ISD::BUILD_VECTOR || + N1.getValueType().isFixedLengthVector()) && + "BUILD_VECTOR used for scalable vectors"); + unsigned Index = + N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0; + SDValue Elt = N1.getOperand(Index); if (VT != Elt.getValueType()) // If the vector element type is not legal, the BUILD_VECTOR operands @@ -5377,8 +5512,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed // when vector types are scalarized and v1iX is legal. - // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx) + // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx). + // Here we are completely ignoring the extract element index (N2), + // which is fine for fixed width vectors, since any index other than 0 + // is undefined anyway. However, this cannot be ignored for scalable + // vectors - in theory we could support this, but we don't want to do this + // without a profitability check. if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getValueType().isFixedLengthVector() && N1.getValueType().getVectorNumElements() == 1) { return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N1.getOperand(1)); @@ -5406,50 +5547,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } break; case ISD::EXTRACT_SUBVECTOR: - if (VT.isSimple() && N1.getValueType().isSimple()) { - assert(VT.isVector() && N1.getValueType().isVector() && - "Extract subvector VTs must be a vectors!"); - assert(VT.getVectorElementType() == - N1.getValueType().getVectorElementType() && - "Extract subvector VTs must have the same element type!"); - assert(VT.getSimpleVT() <= N1.getSimpleValueType() && - "Extract subvector must be from larger vector to smaller vector!"); - - if (N2C) { - assert((VT.getVectorNumElements() + N2C->getZExtValue() - <= N1.getValueType().getVectorNumElements()) - && "Extract subvector overflow!"); - } - - // Trivial extraction. - if (VT.getSimpleVT() == N1.getSimpleValueType()) - return N1; - - // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. - if (N1.isUndef()) - return getUNDEF(VT); + EVT N1VT = N1.getValueType(); + assert(VT.isVector() && N1VT.isVector() && + "Extract subvector VTs must be vectors!"); + assert(VT.getVectorElementType() == N1VT.getVectorElementType() && + "Extract subvector VTs must have the same element type!"); + assert((VT.isFixedLengthVector() || N1VT.isScalableVector()) && + "Cannot extract a scalable vector from a fixed length vector!"); + assert((VT.isScalableVector() != N1VT.isScalableVector() || + VT.getVectorMinNumElements() <= N1VT.getVectorMinNumElements()) && + "Extract subvector must be from larger vector to smaller vector!"); + assert(N2C && "Extract subvector index must be a constant"); + assert((VT.isScalableVector() != N1VT.isScalableVector() || + (VT.getVectorMinNumElements() + N2C->getZExtValue()) <= + N1VT.getVectorMinNumElements()) && + "Extract subvector overflow!"); + + // Trivial extraction. + if (VT == N1VT) + return N1; - // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of - // the concat have the same type as the extract. - if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && - N1.getNumOperands() > 0 && - VT == N1.getOperand(0).getValueType()) { - unsigned Factor = VT.getVectorNumElements(); - return N1.getOperand(N2C->getZExtValue() / Factor); - } + // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. + if (N1.isUndef()) + return getUNDEF(VT); - // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created - // during shuffle legalization. - if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && - VT == N1.getOperand(1).getValueType()) - return N1.getOperand(1); + // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of + // the concat have the same type as the extract. + if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0 && VT == N1.getOperand(0).getValueType()) { + unsigned Factor = VT.getVectorMinNumElements(); + return N1.getOperand(N2C->getZExtValue() / Factor); } + + // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created + // during shuffle legalization. + if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && + VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); break; } // Perform trivial constant folding. - if (SDValue SV = - FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) + if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2})) return SV; if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2)) @@ -5571,8 +5710,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "SETCC operands must have the same type!"); assert(VT.isVector() == N1.getValueType().isVector() && "SETCC type should be vector iff the operand type is vector!"); - assert((!VT.isVector() || - VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || VT.getVectorElementCount() == + N1.getValueType().getVectorElementCount()) && "SETCC vector element counts must match!"); // Use FoldSetCC to simplify SETCC's. if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) @@ -5594,8 +5733,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, llvm_unreachable("should use getVectorShuffle constructor!"); case ISD::INSERT_VECTOR_ELT: { ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3); - // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF - if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except + // for scalable vectors where we will generate appropriate code to + // deal with out-of-bounds cases correctly. + if (N3C && N1.getValueType().isFixedLengthVector() && + N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) return getUNDEF(VT); // Undefined index can be assumed out-of-bounds, so that's UNDEF too. @@ -5612,33 +5754,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Inserting undef into undef is still undef. if (N1.isUndef() && N2.isUndef()) return getUNDEF(VT); - SDValue Index = N3; - if (VT.isSimple() && N1.getValueType().isSimple() - && N2.getValueType().isSimple()) { - assert(VT.isVector() && N1.getValueType().isVector() && - N2.getValueType().isVector() && - "Insert subvector VTs must be a vectors"); - assert(VT == N1.getValueType() && - "Dest and insert subvector source types must match!"); - assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && - "Insert subvector must be from smaller vector to larger vector!"); - if (isa<ConstantSDNode>(Index)) { - assert((N2.getValueType().getVectorNumElements() + - cast<ConstantSDNode>(Index)->getZExtValue() - <= VT.getVectorNumElements()) - && "Insert subvector overflow!"); - } - // Trivial insertion. - if (VT.getSimpleVT() == N2.getSimpleValueType()) - return N2; + EVT N2VT = N2.getValueType(); + assert(VT == N1.getValueType() && + "Dest and insert subvector source types must match!"); + assert(VT.isVector() && N2VT.isVector() && + "Insert subvector VTs must be vectors!"); + assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) && + "Cannot insert a scalable vector into a fixed length vector!"); + assert((VT.isScalableVector() != N2VT.isScalableVector() || + VT.getVectorMinNumElements() >= N2VT.getVectorMinNumElements()) && + "Insert subvector must be from smaller vector to larger vector!"); + assert(isa<ConstantSDNode>(N3) && + "Insert subvector index must be constant"); + assert((VT.isScalableVector() != N2VT.isScalableVector() || + (N2VT.getVectorMinNumElements() + + cast<ConstantSDNode>(N3)->getZExtValue()) <= + VT.getVectorMinNumElements()) && + "Insert subvector overflow!"); + + // Trivial insertion. + if (VT == N2VT) + return N2; - // If this is an insert of an extracted vector into an undef vector, we - // can just use the input to the extract. - if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && - N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) - return N2.getOperand(0); - } + // If this is an insert of an extracted vector into an undef vector, we + // can just use the input to the extract. + if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) + return N2.getOperand(0); break; } case ISD::BITCAST: @@ -5867,7 +6010,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Alignment, + uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -5891,37 +6034,38 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Alignment > SrcAlign) + MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); + if (!SrcAlign || Alignment > *SrcAlign) SrcAlign = Alignment; + assert(SrcAlign && "SrcAlign must be set"); ConstantDataArraySlice Slice; bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); - + const MemOp Op = isZeroConstant + ? MemOp::Set(Size, DstAlignCanChange, Alignment, + /*IsZeroMemset*/ true, isVol) + : MemOp::Copy(Size, DstAlignCanChange, Alignment, + *SrcAlign, isVol, CopyFromConstant); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), - (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, - /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, - /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), + MemOps, Limit, Op, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); + Align NewAlign = DL.getABITypeAlign(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Alignment && - DL.exceedsNaturalStackAlignment(Align(NewAlign))) - NewAlign /= 2; + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign / 2; if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); Alignment = NewAlign; } @@ -5968,7 +6112,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (Value.getNode()) { Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); OutChains.push_back(Store); } } @@ -5991,12 +6135,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - MinAlign(SrcAlign, SrcOff), SrcMMOFlags); + commonAlignment(*SrcAlign, SrcOff).value(), + SrcMMOFlags); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment.value(), MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6052,7 +6197,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, + uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -6074,29 +6219,27 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; + MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); + if (!SrcAlign || Alignment > *SrcAlign) + SrcAlign = Alignment; + assert(SrcAlign && "SrcAlign must be set"); unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in - // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the - // correct code. - bool AllowOverlap = false; if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, - /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, - AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MemOps, Limit, + MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign, + /*IsVolatile*/ true), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); - if (NewAlign > Align) { + Align NewAlign = DL.getABITypeAlign(Ty); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -6118,9 +6261,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; - Value = - DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); + Value = DAG.getLoad( + VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign->value(), SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -6132,9 +6275,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Store; - Store = DAG.getStore(Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + Store = DAG.getStore( + Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); OutChains.push_back(Store); DstOff += VTSize; } @@ -6151,7 +6294,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, /// \param Dst Pointer to destination memory location. /// \param Src Value of byte to write into the memory. /// \param Size Number of bytes to write. -/// \param Align Alignment of the destination in bytes. +/// \param Alignment Alignment of the destination in bytes. /// \param isVol True if destination is volatile. /// \param DstPtrInfo IR information on the memory pointer. /// \returns New head in the control flow, if lowering was successful, empty @@ -6162,7 +6305,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, /// memory size. static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, bool isVol, + uint64_t Size, Align Alignment, bool isVol, MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. // FIXME: We need to honor volatile even is Src is undef. @@ -6183,21 +6326,19 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!TLI.findOptimalMemOpLowering( - MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, - (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, - /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, - MF.getFunction().getAttributes())) + MemOps, TLI.getMaxStoresPerMemset(OptSize), + MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), + DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); - if (NewAlign > Align) { + Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -6235,7 +6376,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, + DstPtrInfo.getWithOffset(DstOff), Alignment.value(), isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; @@ -6256,12 +6397,10 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, } SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); - // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6270,9 +6409,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(),Align, - isVol, false, DstPtrInfo, SrcPtrInfo); + SDValue Result = getMemcpyLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -6281,7 +6420,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemcpy( - *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, + *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -6292,8 +6431,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - true, DstPtrInfo, SrcPtrInfo); + ConstantSize->getZExtValue(), Alignment, + isVol, true, DstPtrInfo, SrcPtrInfo); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -6372,12 +6511,10 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, } SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); - // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6386,10 +6523,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = - getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - false, DstPtrInfo, SrcPtrInfo); + SDValue Result = getMemmoveLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -6397,8 +6533,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. if (TSI) { - SDValue Result = TSI->EmitTargetCodeForMemmove( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); + SDValue Result = + TSI->EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, + Alignment, isVol, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -6476,11 +6613,9 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, } SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); - // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6489,9 +6624,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = - getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Align, isVol, DstPtrInfo); + SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Alignment, + isVol, DstPtrInfo); if (Result.getNode()) return Result; @@ -6501,7 +6636,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemset( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); + *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo); if (Result.getNode()) return Result; } @@ -6662,11 +6797,8 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, - EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, + EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { - if (Align == 0) // Ensure that codegen never sees alignment 0 - Align = getEVTAlignment(MemVT); - if (!Size && MemVT.isScalableVector()) Size = MemoryLocation::UnknownSize; else if (!Size) @@ -6674,7 +6806,7 @@ SDValue SelectionDAG::getMemIntrinsicNode( MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo); + MF.getMachineMemOperand(PtrInfo, Flags, Size, Alignment, AAInfo); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } @@ -6686,8 +6818,6 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || - Opcode == ISD::LIFETIME_START || - Opcode == ISD::LIFETIME_END || ((int)Opcode <= std::numeric_limits<int>::max() && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -6795,13 +6925,11 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, - unsigned Alignment, + Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); MMOFlags |= MachineMemOperand::MOLoad; assert((MMOFlags & MachineMemOperand::MOStore) == 0); @@ -6810,9 +6938,10 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges); + MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, + Alignment, AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -6867,7 +6996,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, - unsigned Alignment, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -6885,7 +7014,7 @@ SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - unsigned Alignment, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -6918,12 +7047,10 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - unsigned Alignment, + Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(Val.getValueType()); MMOFlags |= MachineMemOperand::MOStore; assert((MMOFlags & MachineMemOperand::MOLoad) == 0); @@ -6932,8 +7059,10 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo); + uint64_t Size = + MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } @@ -6969,13 +7098,11 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - EVT SVT, unsigned Alignment, + EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(SVT); MMOFlags |= MachineMemOperand::MOStore; assert((MMOFlags & MachineMemOperand::MOLoad) == 0); @@ -7288,9 +7415,24 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { return SDValue(); } -// TODO: Use fast-math-flags to enable more simplifications. -SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) { +SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, + SDNodeFlags Flags) { + // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand + // (an undef operand can be chosen to be Nan/Inf), then the result of this + // operation is poison. That result can be relaxed to undef. + ConstantFPSDNode *XC = isConstOrConstSplatFP(X, /* AllowUndefs */ true); ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); + bool HasNan = (XC && XC->getValueAPF().isNaN()) || + (YC && YC->getValueAPF().isNaN()); + bool HasInf = (XC && XC->getValueAPF().isInfinity()) || + (YC && YC->getValueAPF().isInfinity()); + + if (Flags.hasNoNaNs() && (HasNan || X.isUndef() || Y.isUndef())) + return getUNDEF(X.getValueType()); + + if (Flags.hasNoInfs() && (HasInf || X.isUndef() || Y.isUndef())) + return getUNDEF(X.getValueType()); + if (!YC) return SDValue(); @@ -7394,6 +7536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, createOperands(N, Ops); } + N->setFlags(Flags); InsertNode(N); SDValue V(N, 0); NewSDValueDbgMsg(V, "Creating new node: ", this); @@ -7406,7 +7549,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, - ArrayRef<SDValue> Ops) { + ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); @@ -7481,6 +7624,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); createOperands(N, Ops); } + + N->setFlags(Flags); InsertNode(N); SDValue V(N, 0); NewSDValueDbgMsg(V, "Creating new node: ", this); @@ -7919,7 +8064,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { switch (OrigOpc) { default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break; @@ -9196,9 +9341,8 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) return SDValue(); BinOp = (ISD::NodeType)CandidateBinOp; - return getNode( - ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, - getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout()))); + return getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, + getVectorIdxConstant(0, SDLoc(Op))); }; // At each stage, we're looking for something that looks like: @@ -9246,6 +9390,28 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, PrevOp = Op; } + // Handle subvector reductions, which tend to appear after the shuffle + // reduction stages. + while (Op.getOpcode() == CandidateBinOp) { + unsigned NumElts = Op.getValueType().getVectorNumElements(); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (Op0.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Op0.getOperand(0) != Op1.getOperand(0)) + break; + SDValue Src = Op0.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (NumSrcElts != (2 * NumElts)) + break; + if (!(Op0.getConstantOperandAPInt(1) == 0 && + Op1.getConstantOperandAPInt(1) == NumElts) && + !(Op1.getConstantOperandAPInt(1) == 0 && + Op0.getConstantOperandAPInt(1) == NumElts)) + break; + Op = Src; + } + BinOp = (ISD::NodeType)CandidateBinOp; return Op; } @@ -9276,9 +9442,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { if (OperandVT.isVector()) { // A vector operand; extract a single element. EVT OperandEltVT = OperandVT.getVectorElementType(); - Operands[j] = - getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout()))); + Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, + Operand, getVectorIdxConstant(i, dl)); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -9395,9 +9560,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; } -/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if -/// it cannot be inferred. -unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { +/// InferPtrAlignment - Infer alignment of a load / store address. Return None +/// if it cannot be inferred. +MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV = nullptr; int64_t GVOffset = 0; @@ -9406,9 +9571,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { KnownBits Known(PtrWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); - unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; - if (Align) - return MinAlign(Align, GVOffset); + if (AlignBits) + return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset); } // If this is a direct reference to a stack slot, use information about the @@ -9426,12 +9590,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (FrameIdx != INT_MIN) { const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), - FrameOffset); - return FIInfoAlign; + return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset); } - return 0; + return None; } /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type @@ -9447,20 +9609,58 @@ std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { return std::make_pair(LoVT, HiVT); } +/// GetDependentSplitDestVTs - Compute the VTs needed for the low/hi parts of a +/// type, dependent on an enveloping VT that has been split into two identical +/// pieces. Sets the HiIsEmpty flag when hi type has zero storage size. +std::pair<EVT, EVT> +SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, + bool *HiIsEmpty) const { + EVT EltTp = VT.getVectorElementType(); + bool IsScalable = VT.isScalableVector(); + // Examples: + // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty) + // custom VL=9 with enveloping VL=8/8 yields 8/1 + // custom VL=10 with enveloping VL=8/8 yields 8/2 + // etc. + unsigned VTNumElts = VT.getVectorNumElements(); + unsigned EnvNumElts = EnvVT.getVectorNumElements(); + EVT LoVT, HiVT; + if (VTNumElts > EnvNumElts) { + LoVT = EnvVT; + HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts, + IsScalable); + *HiIsEmpty = false; + } else { + // Flag that hi type has zero storage size, but return split envelop type + // (this would be easier if vector types with zero elements were allowed). + LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts, IsScalable); + HiVT = EnvVT; + *HiIsEmpty = true; + } + return std::make_pair(LoVT, HiVT); +} + /// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the /// low/high part. std::pair<SDValue, SDValue> SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT) { - assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= - N.getValueType().getVectorNumElements() && + assert(LoVT.isScalableVector() == HiVT.isScalableVector() && + LoVT.isScalableVector() == N.getValueType().isScalableVector() && + "Splitting vector with an invalid mixture of fixed and scalable " + "vector types"); + assert(LoVT.getVectorMinNumElements() + HiVT.getVectorMinNumElements() <= + N.getValueType().getVectorMinNumElements() && "More vector elements requested than available!"); SDValue Lo, Hi; - Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, - getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); + Lo = + getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, getVectorIdxConstant(0, DL)); + // For scalable vectors it is safe to use LoVT.getVectorMinNumElements() + // (rather than having to use ElementCount), because EXTRACT_SUBVECTOR scales + // IDX with the runtime scaling factor of the result vector type. For + // fixed-width result vectors, that runtime scaling factor is 1. Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, - getConstant(LoVT.getVectorNumElements(), DL, - TLI->getVectorIdxTy(getDataLayout()))); + getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); return std::make_pair(Lo, Hi); } @@ -9470,22 +9670,22 @@ SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), NextPowerOf2(VT.getVectorNumElements())); return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, - getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); + getVectorIdxConstant(0, DL)); } void SelectionDAG::ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args, - unsigned Start, unsigned Count) { + unsigned Start, unsigned Count, + EVT EltVT) { EVT VT = Op.getValueType(); if (Count == 0) Count = VT.getVectorNumElements(); - - EVT EltVT = VT.getVectorElementType(); - EVT IdxTy = TLI->getVectorIdxTy(getDataLayout()); + if (EltVT == EVT()) + EltVT = VT.getVectorElementType(); SDLoc SL(Op); for (unsigned i = Start, e = Start + Count; i != e; ++i) { - Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, - Op, getConstant(i, SL, IdxTy))); + Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Op, + getVectorIdxConstant(i, SL))); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 421ff3e7d472..1d596c89c911 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -69,7 +69,6 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" @@ -136,6 +135,11 @@ using namespace SwitchCG; /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; +static cl::opt<bool> + InsertAssertAlign("insert-assert-align", cl::init(true), + cl::desc("Insert the experimental `assertalign` node."), + cl::ReallyHidden); + static cl::opt<unsigned, true> LimitFPPrecision("limit-float-precision", cl::desc("Generate low-precision inline sequences " @@ -206,12 +210,17 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, MVT PartVT, EVT ValueVT, const Value *V, Optional<CallingConv::ID> CC = None, Optional<ISD::NodeType> AssertOp = None) { + // Let the target assemble the parts if it wants to + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts, + PartVT, ValueVT, CC)) + return Val; + if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V, CC); assert(NumParts > 0 && "No parts to assemble!"); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; if (NumParts > 1) { @@ -347,7 +356,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, const char *AsmError = ", possible invalid constraint for vector type"; if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) + if (CI->isInlineAsm()) return Ctx.emitError(I, ErrMsg + AsmError); return Ctx.emitError(I, ErrMsg); @@ -415,10 +424,13 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. EVT BuiltVectorTy = - EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), - (IntermediateVT.isVector() - ? IntermediateVT.getVectorNumElements() * NumParts - : NumIntermediates)); + IntermediateVT.isVector() + ? EVT::getVectorVT( + *DAG.getContext(), IntermediateVT.getScalarType(), + IntermediateVT.getVectorElementCount() * NumParts) + : EVT::getVectorVT(*DAG.getContext(), + IntermediateVT.getScalarType(), + NumIntermediates); Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, BuiltVectorTy, Ops); @@ -436,18 +448,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { - assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + assert((PartEVT.getVectorElementCount().Min > + ValueVT.getVectorElementCount().Min) && + (PartEVT.getVectorElementCount().Scalable == + ValueVT.getVectorElementCount().Scalable) && "Cannot narrow, it would be a lossy transformation"); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getVectorIdxConstant(0, DL)); } // Vector/Vector bitcast. if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() && "Cannot handle this kind of promotion"); // Promoted vector extract return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); @@ -472,9 +486,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(), Elts); Val = DAG.getBitcast(WiderVecType, Val); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getVectorIdxConstant(0, DL)); } diagnosePossiblyInvalidConstraint( @@ -484,9 +497,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Handle cases such as i8 -> <1 x i1> EVT ValueSVT = ValueVT.getVectorElementType(); - if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) - Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) - : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) { + if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits()) + Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val); + else + Val = ValueVT.isFloatingPoint() + ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) + : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + } return DAG.getBuildVector(ValueVT, DL, Val); } @@ -504,6 +522,11 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, const Value *V, Optional<CallingConv::ID> CallConv = None, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + // Let the target split the parts if it wants to + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT, + CallConv)) + return; EVT ValueVT = Val.getValueType(); // Handle the vector case separately. @@ -633,7 +656,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val, const SDLoc &DL, EVT PartVT) { - if (!PartVT.isVector()) + if (!PartVT.isFixedLengthVector()) return SDValue(); EVT ValueVT = Val.getValueType(); @@ -679,16 +702,16 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, Val = Widened; } else if (PartVT.isVector() && PartEVT.getVectorElementType().bitsGE( - ValueVT.getVectorElementType()) && - PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + ValueVT.getVectorElementType()) && + PartEVT.getVectorElementCount() == + ValueVT.getVectorElementCount()) { // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else { if (ValueVT.getVectorNumElements() == 1) { - Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getVectorIdxConstant(0, DL)); } else { assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && "lossy conversion of vector to scalar type"); @@ -723,15 +746,18 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - unsigned IntermediateNumElts = IntermediateVT.isVector() ? - IntermediateVT.getVectorNumElements() : 1; + assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() && + "Mixing scalable and fixed vectors when copying in parts"); - // Convert the vector to the appropriate type if necessary. - unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts; + ElementCount DestEltCnt; + + if (IntermediateVT.isVector()) + DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates; + else + DestEltCnt = ElementCount(NumIntermediates, false); EVT BuiltVectorTy = EVT::getVectorVT( - *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); - MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt); if (ValueVT != BuiltVectorTy) { if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) Val = Widened; @@ -743,12 +769,15 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) { - Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * IntermediateNumElts, DL, IdxVT)); + // This does something sensible for scalable vectors - see the + // definition of EXTRACT_SUBVECTOR for further details. + unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements(); + Ops[i] = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, + DAG.getVectorIdxConstant(i * IntermediateNumElts, DL)); } else { - Ops[i] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, - DAG.getConstant(i, DL, IdxVT)); + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, + DAG.getVectorIdxConstant(i, DL)); } } @@ -1112,32 +1141,26 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) { - // Propagate the fast-math-flags of this IR instruction to the DAG node that - // maps to this instruction. - // TODO: We could handle all flags (nsw, etc) here. - // TODO: If an IR instruction maps to >1 node, only the final node will have - // flags set. - if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags IncomingFlags; - IncomingFlags.copyFMF(*FPMO); - if (!Node->getFlags().isDefined()) - Node->setFlags(IncomingFlags); - else - Node->intersectFlagsWith(IncomingFlags); - } - } - // Constrained FP intrinsics with fpexcept.ignore should also get - // the NoFPExcept flag. - if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I)) - if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore) + // ConstrainedFPIntrinsics handle their own FMF. + if (!isa<ConstrainedFPIntrinsic>(&I)) { + // Propagate the fast-math-flags of this IR instruction to the DAG node that + // maps to this instruction. + // TODO: We could handle all flags (nsw, etc) here. + // TODO: If an IR instruction maps to >1 node, only the final node will have + // flags set. if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags Flags = Node->getFlags(); - Flags.setNoFPExcept(true); - Node->setFlags(Flags); + SDNodeFlags IncomingFlags; + IncomingFlags.copyFMF(*FPMO); + if (!Node->getFlags().isDefined()) + Node->setFlags(IncomingFlags); + else + Node->intersectFlagsWith(IncomingFlags); } + } + } if (!I.isTerminator() && !HasTailCall && - !isStatepoint(&I)) // statepoints handle their exports internally + !isa<GCStatepointInst>(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); CurInst = nullptr; @@ -1399,11 +1422,11 @@ void SelectionDAGBuilder::resolveOrClearDbgInfo() { /// getCopyFromRegs - If there was virtual register allocated for the value V /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { - DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V); SDValue Result; if (It != FuncInfo.ValueMap.end()) { - unsigned InReg = It->second; + Register InReg = It->second; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Ty, @@ -1437,12 +1460,6 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return Val; } -// Return true if SDValue exists for the given Value -bool SelectionDAGBuilder::findValue(const Value *V) const { - return (NodeMap.find(V) != NodeMap.end()) || - (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); -} - /// getNonRegisterValue - Return an SDValue for the given Value, but /// don't look in FuncInfo.ValueMap for a virtual register. SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { @@ -1486,6 +1503,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { TLI.getPointerTy(DAG.getDataLayout(), AS)); } + if (match(C, m_VScale(DAG.getDataLayout()))) + return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)); + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); @@ -1558,16 +1578,17 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return DAG.getBlockAddress(BA, VT); VectorType *VecTy = cast<VectorType>(V->getType()); - unsigned NumElements = VecTy->getNumElements(); // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. - SmallVector<SDValue, 16> Ops; if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { + SmallVector<SDValue, 16> Ops; + unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements(); for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CV->getOperand(i))); - } else { - assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); + + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + } else if (isa<ConstantAggregateZero>(C)) { EVT EltVT = TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); @@ -1576,11 +1597,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Op = DAG.getConstant(0, getCurSDLoc(), EltVT); - Ops.assign(NumElements, Op); - } - // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + if (isa<ScalableVectorType>(VecTy)) + return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op); + else { + SmallVector<SDValue, 16> Ops; + Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + } + } + llvm_unreachable("Unknown vector constant"); } // If this is a static alloca, generate it as the frameindex instead of @@ -1603,6 +1629,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } + if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) { + return DAG.getMDNode(cast<MDNode>(MD->getMetadata())); + } llvm_unreachable("Can't get register for value!"); } @@ -1611,17 +1640,12 @@ void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; bool IsCoreCLR = Pers == EHPersonality::CoreCLR; bool IsSEH = isAsynchronousEHPersonality(Pers); - bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX; MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; if (!IsSEH) CatchPadMBB->setIsEHScopeEntry(); // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) CatchPadMBB->setIsEHFuncletEntry(); - // Wasm does not need catchpads anymore - if (!IsWasmCXX) - DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, - getControlRoot())); } void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { @@ -1835,6 +1859,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); + Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType()); for (unsigned i = 0; i != NumValues; ++i) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. @@ -1843,9 +1868,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); - Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, + Chains[i] = DAG.getStore( + Chain, getCurSDLoc(), Val, // FIXME: better loc info would be nice. - Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), + commonAlignment(BaseAlign, Offsets[i])); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -1964,7 +1991,7 @@ void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { if (V->getType()->isEmptyTy()) return; - DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { assert(!V->use_empty() && "Unused value assigned virtual registers!"); CopyValueToVirtualRegister(V, VMI->second); @@ -2277,7 +2304,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If this is a series of conditions that are or'd or and'd together, emit // this as a sequence of branches instead of setcc's with and/or operations. - // As long as jumps are not expensive, this should improve performance. + // As long as jumps are not expensive (exceptions for multi-use logic ops, + // unpredictable branches, and vector extracts because those jumps are likely + // expensive for any target), this should improve performance. // For example, instead of something like: // cmp A, B // C = seteq @@ -2292,9 +2321,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); + Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable) && - (Opcode == Instruction::And || Opcode == Instruction::Or)) { + (Opcode == Instruction::And || Opcode == Instruction::Or) && + !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), @@ -2516,7 +2548,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; MachineMemOperand *MemRef = MF.getMachineMemOperand( - MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); + MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy)); DAG.setNodeMemRefs(Node, {MemRef}); } if (PtrTy != PtrMemTy) @@ -2597,17 +2629,13 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineMemOperand::MOVolatile); } - // Perform the comparison via a subtract/getsetcc. - EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); - + // Perform the comparison via a getsetcc. SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); + Guard.getValueType()), + Guard, GuardVal, ISD::SETNE); - // If the sub is not 0, then we know the guard/stackslot do not equal, so - // branch to failure MBB. + // If the guard/stackslot do not equal, branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, GuardVal.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); @@ -2640,6 +2668,11 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { // Passing 'true' for doesNotReturn above won't generate the trap for us. if (TM.getTargetTriple().isPS4CPU()) Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); + // WebAssembly needs an unreachable instruction after a non-returning call, + // because the function return type can be different from __stack_chk_fail's + // return type (void). + if (TM.getTargetTriple().isWasm()) + Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); DAG.setRoot(Chain); } @@ -2778,14 +2811,16 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, + LLVMContext::OB_gc_transition, + LLVMContext::OB_gc_live, LLVMContext::OB_funclet, LLVMContext::OB_cfguardtarget}) && "Cannot lower invokes with arbitrary operand bundles yet!"); - const Value *Callee(I.getCalledValue()); + const Value *Callee(I.getCalledOperand()); const Function *Fn = dyn_cast<Function>(Callee); if (isa<InlineAsm>(Callee)) - visitInlineAsm(&I); + visitInlineAsm(I); else if (Fn && Fn->isIntrinsic()) { switch (Fn->getIntrinsicID()) { default: @@ -2795,10 +2830,10 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - visitPatchpoint(&I, EHPadBB); + visitPatchpoint(I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); + LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB); break; case Intrinsic::wasm_rethrow_in_catch: { // This is usually done in visitTargetIntrinsic, but this intrinsic is @@ -2822,14 +2857,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // with deopt state. LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); } else { - LowerCallTo(&I, getValue(Callee), false, EHPadBB); + LowerCallTo(I, getValue(Callee), false, EHPadBB); } // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. // We already took care of the exported value for the statepoint instruction // during call to the LowerStatepoint. - if (!isStatepoint(I)) { + if (!isa<GCStatepointInst>(I)) { CopyToExportRegsIfNeeded(&I); } @@ -2862,18 +2897,19 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower callbrs with arbitrary operand bundles yet!"); - assert(isa<InlineAsm>(I.getCalledValue()) && - "Only know how to handle inlineasm callbr"); - visitInlineAsm(&I); + assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr"); + visitInlineAsm(I); + CopyToExportRegsIfNeeded(&I); // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; // Update successor info. - addSuccessorWithProb(CallBrMBB, Return); + addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)]; - addSuccessorWithProb(CallBrMBB, Target); + addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + Target->setIsInlineAsmBrIndirectTarget(); } CallBrMBB->normalizeSuccProbs(); @@ -3003,133 +3039,6 @@ void SelectionDAGBuilder::visitFSub(const User &I) { visitBinary(I, ISD::FSUB); } -/// Checks if the given instruction performs a vector reduction, in which case -/// we have the freedom to alter the elements in the result as long as the -/// reduction of them stays unchanged. -static bool isVectorReductionOp(const User *I) { - const Instruction *Inst = dyn_cast<Instruction>(I); - if (!Inst || !Inst->getType()->isVectorTy()) - return false; - - auto OpCode = Inst->getOpcode(); - switch (OpCode) { - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - break; - case Instruction::FAdd: - case Instruction::FMul: - if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (FPOp->getFastMathFlags().isFast()) - break; - LLVM_FALLTHROUGH; - default: - return false; - } - - unsigned ElemNum = Inst->getType()->getVectorNumElements(); - // Ensure the reduction size is a power of 2. - if (!isPowerOf2_32(ElemNum)) - return false; - - unsigned ElemNumToReduce = ElemNum; - - // Do DFS search on the def-use chain from the given instruction. We only - // allow four kinds of operations during the search until we reach the - // instruction that extracts the first element from the vector: - // - // 1. The reduction operation of the same opcode as the given instruction. - // - // 2. PHI node. - // - // 3. ShuffleVector instruction together with a reduction operation that - // does a partial reduction. - // - // 4. ExtractElement that extracts the first element from the vector, and we - // stop searching the def-use chain here. - // - // 3 & 4 above perform a reduction on all elements of the vector. We push defs - // from 1-3 to the stack to continue the DFS. The given instruction is not - // a reduction operation if we meet any other instructions other than those - // listed above. - - SmallVector<const User *, 16> UsersToVisit{Inst}; - SmallPtrSet<const User *, 16> Visited; - bool ReduxExtracted = false; - - while (!UsersToVisit.empty()) { - auto User = UsersToVisit.back(); - UsersToVisit.pop_back(); - if (!Visited.insert(User).second) - continue; - - for (const auto *U : User->users()) { - auto Inst = dyn_cast<Instruction>(U); - if (!Inst) - return false; - - if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { - if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast()) - return false; - UsersToVisit.push_back(U); - } else if (const ShuffleVectorInst *ShufInst = - dyn_cast<ShuffleVectorInst>(U)) { - // Detect the following pattern: A ShuffleVector instruction together - // with a reduction that do partial reduction on the first and second - // ElemNumToReduce / 2 elements, and store the result in - // ElemNumToReduce / 2 elements in another vector. - - unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); - if (ResultElements < ElemNum) - return false; - - if (ElemNumToReduce == 1) - return false; - if (!isa<UndefValue>(U->getOperand(1))) - return false; - for (unsigned i = 0; i < ElemNumToReduce / 2; ++i) - if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) - return false; - for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i) - if (ShufInst->getMaskValue(i) != -1) - return false; - - // There is only one user of this ShuffleVector instruction, which - // must be a reduction operation. - if (!U->hasOneUse()) - return false; - - auto U2 = dyn_cast<Instruction>(*U->user_begin()); - if (!U2 || U2->getOpcode() != OpCode) - return false; - - // Check operands of the reduction operation. - if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) || - (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) { - UsersToVisit.push_back(U2); - ElemNumToReduce /= 2; - } else - return false; - } else if (isa<ExtractElementInst>(U)) { - // At this moment we should have reduced all elements in the vector. - if (ElemNumToReduce != 1) - return false; - - const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); - if (!Val || !Val->isZero()) - return false; - - ReduxExtracted = true; - } else - return false; - } - } - return ReduxExtracted; -} - void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { SDNodeFlags Flags; @@ -3148,17 +3057,6 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) { Flags.setExact(ExactOp->isExact()); } - if (isVectorReductionOp(&I)) { - Flags.setVectorReduction(true); - LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); - - // If no flags are set we will propagate the incoming flags, if any flags - // are set, we will intersect them with the incoming flag and so we need to - // copy the FMF flags here. - if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) { - Flags.copyFMF(*FPOp); - } - } SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -3296,9 +3194,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SDValue Cond = getValue(I.getOperand(0)); SDValue LHSVal = getValue(I.getOperand(1)); SDValue RHSVal = getValue(I.getOperand(2)); - auto BaseOps = {Cond}; - ISD::NodeType OpCode = Cond.getValueType().isVector() ? - ISD::VSELECT : ISD::SELECT; + SmallVector<SDValue, 1> BaseOps(1, Cond); + ISD::NodeType OpCode = + Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; bool IsUnaryAbs = false; @@ -3381,13 +3279,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) { OpCode = Opc; LHSVal = getValue(LHS); RHSVal = getValue(RHS); - BaseOps = {}; + BaseOps.clear(); } if (IsUnaryAbs) { OpCode = Opc; LHSVal = getValue(LHS); - BaseOps = {}; + BaseOps.clear(); } } @@ -3577,19 +3475,22 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); - Constant *MaskV = cast<Constant>(I.getOperand(2)); + ArrayRef<int> Mask; + if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I)) + Mask = SVI->getShuffleMask(); + else + Mask = cast<ConstantExpr>(I).getShuffleMask(); SDLoc DL = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); - unsigned SrcNumElts = SrcVT.getVectorNumElements(); - if (MaskV->isNullValue() && VT.isScalableVector()) { + if (all_of(Mask, [](int Elem) { return Elem == 0; }) && + VT.isScalableVector()) { // Canonical splat form of first element of first input vector. - SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - SrcVT.getScalarType(), Src1, - DAG.getConstant(0, DL, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue FirstElt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1, + DAG.getVectorIdxConstant(0, DL)); setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt)); return; } @@ -3599,8 +3500,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // for targets that support a SPLAT_VECTOR for non-scalable vector types. assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle"); - SmallVector<int, 8> Mask; - ShuffleVectorInst::getShuffleMask(MaskV, Mask); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); unsigned MaskNumElts = Mask.size(); if (SrcNumElts == MaskNumElts) { @@ -3683,9 +3583,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // If the concatenated vector was padded, extract a subvector with the // correct number of elements. if (MaskNumElts != PaddedMaskNumElts) - Result = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, VT, Result, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result, + DAG.getVectorIdxConstant(0, DL)); setValue(&I, Result); return; @@ -3729,10 +3628,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (StartIdx[Input] < 0) Src = DAG.getUNDEF(VT); else { - Src = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, VT, Src, - DAG.getConstant(StartIdx[Input], DL, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src, + DAG.getVectorIdxConstant(StartIdx[Input], DL)); } } @@ -3754,7 +3651,6 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); SmallVector<SDValue,8> Ops; for (int Idx : Mask) { SDValue Res; @@ -3765,8 +3661,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - EltVT, Src, DAG.getConstant(Idx, DL, IdxVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src, + DAG.getVectorIdxConstant(Idx, DL)); } Ops.push_back(Res); @@ -3882,13 +3778,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. - unsigned VectorWidth = I.getType()->isVectorTy() ? - I.getType()->getVectorNumElements() : 0; + bool IsVectorGEP = I.getType()->isVectorTy(); + ElementCount VectorElementCount = + IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount() + : ElementCount(0, false); - if (VectorWidth && !N.getValueType().isVector()) { + if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); - EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); - N = DAG.getSplatBuildVector(VT, dl, N); + EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); + if (VectorElementCount.Scalable) + N = DAG.getSplatVector(VT, dl, N); + else + N = DAG.getSplatBuildVector(VT, dl, N); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); @@ -3910,9 +3811,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { + // IdxSize is the width of the arithmetic according to IR semantics. + // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth + // (and fix up the result later). unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); MVT IdxTy = MVT::getIntegerVT(IdxSize); - APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); + TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + // We intentionally mask away the high bits here; ElementSize may not + // fit in IdxTy. + APInt ElementMul(IdxSize, ElementSize.getKnownMinSize()); + bool ElementScalable = ElementSize.isScalable(); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3920,14 +3828,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (C && isa<VectorType>(C->getType())) C = C->getSplatValue(); - if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) { - if (CI->isZero()) - continue; - APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); + const auto *CI = dyn_cast_or_null<ConstantInt>(C); + if (CI && CI->isZero()) + continue; + if (CI && !ElementScalable) { + APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize); LLVMContext &Context = *DAG.getContext(); - SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : - DAG.getConstant(Offs, dl, IdxTy); + SDValue OffsVal; + if (IsVectorGEP) + OffsVal = DAG.getConstant( + Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount)); + else + OffsVal = DAG.getConstant(Offs, dl, IdxTy); // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. @@ -3941,31 +3853,45 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { continue; } - // N = N + Idx * ElementSize; + // N = N + Idx * ElementMul; SDValue IdxN = getValue(Idx); - if (!IdxN.getValueType().isVector() && VectorWidth) { - EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); - IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); + if (!IdxN.getValueType().isVector() && IsVectorGEP) { + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), + VectorElementCount); + if (VectorElementCount.Scalable) + IdxN = DAG.getSplatVector(VT, dl, IdxN); + else + IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); - // If this is a multiply by a power of two, turn it into a shl - // immediately. This is a very common case. - if (ElementSize != 1) { - if (ElementSize.isPowerOf2()) { - unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, dl, - N.getValueType(), IdxN, - DAG.getConstant(Amt, dl, IdxN.getValueType())); - } else { - SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl, - IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, dl, - N.getValueType(), IdxN, Scale); + if (ElementScalable) { + EVT VScaleTy = N.getValueType().getScalarType(); + SDValue VScale = DAG.getNode( + ISD::VSCALE, dl, VScaleTy, + DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy)); + if (IsVectorGEP) + VScale = DAG.getSplatVector(N.getValueType(), dl, VScale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale); + } else { + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementMul != 1) { + if (ElementMul.isPowerOf2()) { + unsigned Amt = ElementMul.logBase2(); + IdxN = DAG.getNode(ISD::SHL, dl, + N.getValueType(), IdxN, + DAG.getConstant(Amt, dl, IdxN.getValueType())); + } else { + SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl, + IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, dl, + N.getValueType(), IdxN, Scale); + } } } @@ -3991,8 +3917,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); uint64_t TySize = DL.getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); + MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign()); SDValue AllocSize = getValue(I.getArraySize()); @@ -4007,25 +3932,26 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = - DAG.getSubtarget().getFrameLowering()->getStackAlignment(); - if (Align <= StackAlign) - Align = 0; + Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign(); + if (*Alignment <= StackAlign) + Alignment = None; + const uint64_t StackAlignMask = StackAlign.value() - 1U; // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. This doesn't overflow because we're computing // an address inside an alloca. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags); + DAG.getConstant(StackAlignMask, dl, IntPtr), Flags); // Mask out the low bits for alignment purposes. - AllocSize = - DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, - DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr)); + AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(~StackAlignMask, dl, IntPtr)); - SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)}; + SDValue Ops[] = { + getRoot(), AllocSize, + DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)}; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); @@ -4057,13 +3983,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Ptr = getValue(SV); Type *Ty = I.getType(); - - bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal); - bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load); - bool isDereferenceable = - isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); - unsigned Alignment = I.getAlignment(); + Align Alignment = I.getAlign(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4076,6 +3996,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (NumValues == 0) return; + bool isVolatile = I.isVolatile(); + SDValue Root; bool ConstantMemory = false; if (isVolatile) @@ -4109,6 +4031,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); + + MachineMemOperand::Flags MMOFlags + = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and @@ -4128,16 +4054,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), Flags); - auto MMOFlags = MachineMemOperand::MONone; - if (isVolatile) - MMOFlags |= MachineMemOperand::MOVolatile; - if (isNonTemporal) - MMOFlags |= MachineMemOperand::MONonTemporal; - if (isInvariant) - MMOFlags |= MachineMemOperand::MOInvariant; - if (isDereferenceable) - MMOFlags |= MachineMemOperand::MODereferenceable; - MMOFlags |= TLI.getMMOFlags(I); SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -4260,16 +4176,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot(); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); - unsigned Alignment = I.getAlignment(); + Align Alignment = I.getAlign(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - auto MMOFlags = MachineMemOperand::MONone; - if (I.isVolatile()) - MMOFlags |= MachineMemOperand::MOVolatile; - if (I.hasMetadata(LLVMContext::MD_nontemporal)) - MMOFlags |= MachineMemOperand::MONonTemporal; - MMOFlags |= TLI.getMMOFlags(I); + auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. @@ -4304,25 +4215,25 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, bool IsCompressing) { SDLoc sdl = getCurSDLoc(); - auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, - unsigned& Alignment) { + auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, + MaybeAlign &Alignment) { // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); - Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); + Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue(); Mask = I.getArgOperand(3); }; - auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, - unsigned& Alignment) { + auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, + MaybeAlign &Alignment) { // llvm.masked.compressstore.*(Src0, Ptr, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Mask = I.getArgOperand(2); - Alignment = 0; + Alignment = None; }; Value *PtrOperand, *MaskOperand, *Src0Operand; - unsigned Alignment; + MaybeAlign Alignment; if (IsCompressing) getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else @@ -4335,19 +4246,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, EVT VT = Src0.getValueType(); if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); + Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOStore, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), - Alignment, AAInfo); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4370,78 +4278,51 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // are looking for. If first operand of the GEP is a splat vector - we // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. -static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, +static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, ISD::MemIndexType &IndexType, SDValue &Scale, - SelectionDAGBuilder *SDB) { + SelectionDAGBuilder *SDB, const BasicBlock *CurBB) { SelectionDAG& DAG = SDB->DAG; - LLVMContext &Context = *DAG.getContext(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); - const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); - if (!GEP) - return false; - const Value *GEPPtr = GEP->getPointerOperand(); - if (!GEPPtr->getType()->isVectorTy()) - Ptr = GEPPtr; - else if (!(Ptr = getSplatValue(GEPPtr))) - return false; - - unsigned FinalIndex = GEP->getNumOperands() - 1; - Value *IndexVal = GEP->getOperand(FinalIndex); - gep_type_iterator GTI = gep_type_begin(*GEP); - - // Ensure all the other indices are 0. - for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) { - auto *C = dyn_cast<Constant>(GEP->getOperand(i)); + // Handle splat constant pointer. + if (auto *C = dyn_cast<Constant>(Ptr)) { + C = C->getSplatValue(); if (!C) return false; - if (isa<VectorType>(C->getType())) - C = C->getSplatValue(); - auto *CI = dyn_cast_or_null<ConstantInt>(C); - if (!CI || !CI->isZero()) - return false; + + Base = SDB->getValue(C); + + unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements(); + EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts); + Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT); + IndexType = ISD::SIGNED_SCALED; + Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + return true; } - // The operands of the GEP may be defined in another basic block. - // In this case we'll not find nodes for the operands. - if (!SDB->findValue(Ptr)) + const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP || GEP->getParent() != CurBB) return false; - Constant *C = dyn_cast<Constant>(IndexVal); - if (!C && !SDB->findValue(IndexVal)) + + if (GEP->getNumOperands() != 2) return false; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const DataLayout &DL = DAG.getDataLayout(); - StructType *STy = GTI.getStructTypeOrNull(); - - if (STy) { - const StructLayout *SL = DL.getStructLayout(STy); - if (isa<VectorType>(C->getType())) { - C = C->getSplatValue(); - // FIXME: If getSplatValue may return nullptr for a structure? - // If not, the following check can be removed. - if (!C) - return false; - } - auto *CI = cast<ConstantInt>(C); - Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); - Index = DAG.getConstant(SL->getElementOffset(CI->getZExtValue()), - SDB->getCurSDLoc(), TLI.getPointerTy(DL)); - } else { - Scale = DAG.getTargetConstant( - DL.getTypeAllocSize(GEP->getResultElementType()), - SDB->getCurSDLoc(), TLI.getPointerTy(DL)); - Index = SDB->getValue(IndexVal); - } - Base = SDB->getValue(Ptr); - IndexType = ISD::SIGNED_SCALED; + const Value *BasePtr = GEP->getPointerOperand(); + const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1); - if (STy || !Index.getValueType().isVector()) { - unsigned GEPWidth = GEP->getType()->getVectorNumElements(); - EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); - Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index); - } + // Make sure the base is scalar and the index is a vector. + if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy()) + return false; + + Base = SDB->getValue(BasePtr); + Index = SDB->getValue(IndexVal); + IndexType = ISD::SIGNED_SCALED; + Scale = DAG.getTargetConstant( + DL.getTypeAllocSize(GEP->getResultElementType()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; } @@ -4453,9 +4334,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); EVT VT = Src0.getValueType(); - unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); - if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); + Align Alignment = cast<ConstantInt>(I.getArgOperand(2)) + ->getMaybeAlignValue() + .getValueOr(DAG.getEVTAlign(VT)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); AAMDNodes AAInfo; @@ -4465,18 +4346,15 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Index; ISD::MemIndexType IndexType; SDValue Scale; - const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, - this); - - const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), - MachineMemOperand::MOStore, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), - Alignment, AAInfo); + bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, + I.getParent()); + + unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOStore, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + MemoryLocation::UnknownSize, Alignment, AAInfo); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); @@ -4493,25 +4371,25 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDLoc sdl = getCurSDLoc(); - auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, - unsigned& Alignment) { + auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, + MaybeAlign &Alignment) { // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); Mask = I.getArgOperand(2); Src0 = I.getArgOperand(3); }; - auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, - unsigned& Alignment) { + auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, + MaybeAlign &Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = 0; + Alignment = None; Mask = I.getArgOperand(1); Src0 = I.getArgOperand(2); }; Value *PtrOperand, *MaskOperand, *Src0Operand; - unsigned Alignment; + MaybeAlign Alignment; if (IsExpanding) getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else @@ -4524,7 +4402,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { EVT VT = Src0.getValueType(); if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); + Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4542,14 +4420,11 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOLoad, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), - Alignment, AAInfo, Ranges); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, @@ -4569,9 +4444,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); - if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); + Align Alignment = cast<ConstantInt>(I.getArgOperand(1)) + ->getMaybeAlignValue() + .getValueOr(DAG.getEVTAlign(VT)); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4582,29 +4457,14 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Index; ISD::MemIndexType IndexType; SDValue Scale; - const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, - this); - bool ConstantMemory = false; - if (UniformBase && AA && - AA->pointsToConstantMemory( - MemoryLocation(BasePtr, - LocationSize::precise( - DAG.getDataLayout().getTypeStoreSize(I.getType())), - AAInfo))) { - // Do not serialize (non-volatile) loads of constant memory with anything. - Root = DAG.getEntryNode(); - ConstantMemory = true; - } - - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), - MachineMemOperand::MOLoad, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), - Alignment, AAInfo, Ranges); + bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, + I.getParent()); + unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOLoad, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); @@ -4616,9 +4476,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO, IndexType); - SDValue OutChain = Gather.getValue(1); - if (!ConstantMemory) - PendingLoads.push_back(OutChain); + PendingLoads.push_back(Gather.getValue(1)); setValue(&I, Gather); } @@ -4633,19 +4491,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); - auto Alignment = DAG.getEVTAlignment(MemVT); - - auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), - Flags, MemVT.getStoreSize(), Alignment, - AAMDNodes(), nullptr, SSID, SuccessOrdering, - FailureOrdering); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), + DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering, + FailureOrdering); SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, @@ -4684,18 +4537,13 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue InChain = getRoot(); auto MemVT = getValue(I.getValOperand()).getSimpleValueType(); - auto Alignment = DAG.getEVTAlignment(MemVT); - - auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, - MemVT.getStoreSize(), Alignment, AAMDNodes(), - nullptr, SSID, Ordering); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), + DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering); SDValue L = DAG.getAtomic(NT, dl, MemVT, InChain, @@ -4735,24 +4583,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); - auto Flags = MachineMemOperand::MOLoad; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - if (I.hasMetadata(LLVMContext::MD_invariant_load)) - Flags |= MachineMemOperand::MOInvariant; - if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), - DAG.getDataLayout())) - Flags |= MachineMemOperand::MODereferenceable; - - Flags |= TLI.getMMOFlags(I); - - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), - Flags, MemVT.getStoreSize(), - I.getAlignment() ? I.getAlignment() : - DAG.getEVTAlignment(MemVT), - AAMDNodes(), nullptr, SSID, Order); + auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), + I.getAlign(), AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); @@ -4773,7 +4608,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { PendingLoads.push_back(OutChain); return; } - + SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, Ptr, MMO); @@ -4800,16 +4635,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { if (I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - auto Flags = MachineMemOperand::MOStore; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - Flags |= TLI.getMMOFlags(I); + auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, - MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(), - nullptr, SSID, Ordering); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), + I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering); SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) @@ -4899,10 +4730,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - Result = DAG.getMemIntrinsicNode( - Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo); + Result = + DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align, Info.flags, Info.size, AAInfo); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4926,6 +4757,15 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } else Result = lowerRangeToAssertZExt(DAG, I, Result); + MaybeAlign Alignment = I.getRetAlign(); + if (!Alignment) + Alignment = F->getAttributes().getRetAlignment(); + // Insert `assertalign` node if there's an alignment. + if (InsertAssertAlign && Alignment) { + Result = + DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); + } + setValue(&I, Result); } } @@ -5465,7 +5305,8 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue Scale, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT = LHS.getValueType(); - bool Signed = Opcode == ISD::SDIVFIX; + bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT; + bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT; LLVMContext &Ctx = *DAG.getContext(); // If the type is legal but the operation isn't, this node might survive all @@ -5477,14 +5318,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, // by bumping the size by one bit. This will force it to Promote, enabling the // early expansion and avoiding the need to expand later. - // We don't have to do this if Scale is 0; that can always be expanded. + // We don't have to do this if Scale is 0; that can always be expanded, unless + // it's a saturating signed operation. Those can experience true integer + // division overflow, a case which we must avoid. // FIXME: We wouldn't have to do this (or any of the early // expansion/promotion) if it was possible to expand a libcall of an // illegal type during operation legalization. But it's not, so things // get a bit hacky. unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue(); - if (ScaleInt > 0 && + if ((ScaleInt > 0 || (Saturating && Signed)) && (TLI.isTypeLegal(VT) || (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) { TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction( @@ -5506,8 +5349,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT); RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT); } - // TODO: Saturation. + EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout()); + // For saturating operations, we need to shift up the LHS to get the + // proper saturation width, and then shift down again afterwards. + if (Saturating) + LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS, + DAG.getConstant(1, DL, ShiftTy)); SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale); + if (Saturating) + Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res, + DAG.getConstant(1, DL, ShiftTy)); return DAG.getZExtOrTrunc(Res, DL, VT); } } @@ -5699,7 +5550,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( }; // Check if ValueMap has reg number. - DenseMap<const Value *, unsigned>::const_iterator + DenseMap<const Value *, Register>::const_iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); @@ -5771,6 +5622,10 @@ static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { return ISD::SDIVFIX; case Intrinsic::udiv_fix: return ISD::UDIVFIX; + case Intrinsic::sdiv_fix_sat: + return ISD::SDIVFIXSAT; + case Intrinsic::udiv_fix_sat: + return ISD::UDIVFIXSAT; default: llvm_unreachable("Unhandled fixed point intrinsic"); } @@ -5782,7 +5637,24 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I, SDValue Callee = DAG.getExternalSymbol( FunctionName, DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); - LowerCallTo(&I, Callee, I.isTailCall()); + LowerCallTo(I, Callee, I.isTailCall()); +} + +/// Given a @llvm.call.preallocated.setup, return the corresponding +/// preallocated call. +static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) { + assert(cast<CallBase>(PreallocatedSetup) + ->getCalledFunction() + ->getIntrinsicID() == Intrinsic::call_preallocated_setup && + "expected call_preallocated_setup Value"); + for (auto *U : PreallocatedSetup->users()) { + auto *UseCall = cast<CallBase>(U); + const Function *Fn = UseCall->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) { + return UseCall; + } + } + llvm_unreachable("expected corresponding call to preallocated setup/arg"); } /// Lower the call to the specified intrinsic function. @@ -5798,6 +5670,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); return; + case Intrinsic::vscale: { + match(&I, m_VScale(DAG.getDataLayout())); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, + DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1))); + return; + } case Intrinsic::vastart: visitVAStart(I); return; case Intrinsic::vaend: visitVAEnd(I); return; case Intrinsic::vacopy: visitVACopy(I); return; @@ -5819,6 +5698,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, TLI.getFrameIndexTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; + case Intrinsic::read_volatile_register: case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); SDValue Chain = getRoot(); @@ -5847,16 +5727,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); // @llvm.memcpy defines 0 and 1 to both mean no alignment. - unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1); - unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1); - unsigned Align = MinAlign(DstAlign, SrcAlign); + Align DstAlign = MCI.getDestAlign().valueOrOne(); + Align SrcAlign = MCI.getSourceAlign().valueOrOne(); + Align Alignment = commonAlignment(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol, - false, isTC, + SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, + /* AlwaysInline */ false, isTC, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MC); + return; + } + case Intrinsic::memcpy_inline: { + const auto &MCI = cast<MemCpyInlineInst>(I); + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Src = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size"); + // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment. + Align DstAlign = MCI.getDestAlign().valueOrOne(); + Align SrcAlign = MCI.getSourceAlign().valueOrOne(); + Align Alignment = commonAlignment(DstAlign, SrcAlign); + bool isVol = MCI.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MC); @@ -5868,12 +5769,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); // @llvm.memset defines 0 and 1 to both mean no alignment. - unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1); + Align Alignment = MSI.getDestAlign().valueOrOne(); bool isVol = MSI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol, - isTC, MachinePointerInfo(I.getArgOperand(0))); + SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, + MachinePointerInfo(I.getArgOperand(0))); updateDAGForMaybeTailCall(MS); return; } @@ -5883,15 +5784,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); // @llvm.memmove defines 0 and 1 to both mean no alignment. - unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1); - unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1); - unsigned Align = MinAlign(DstAlign, SrcAlign); + Align DstAlign = MMI.getDestAlign().valueOrOne(); + Align SrcAlign = MMI.getSourceAlign().valueOrOne(); + Align Alignment = commonAlignment(DstAlign, SrcAlign); bool isVol = MMI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memmove DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol, + SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MM); @@ -5907,7 +5808,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned SrcAlign = MI.getSourceAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src, SrcAlign, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest()), @@ -5925,7 +5826,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned SrcAlign = MI.getSourceAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src, SrcAlign, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest()), @@ -5942,13 +5843,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned DstAlign = MI.getDestAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest())); updateDAGForMaybeTailCall(MC); return; } + case Intrinsic::call_preallocated_setup: { + const CallBase *PreallocatedCall = FindPreallocatedCall(&I); + SDValue SrcValue = DAG.getSrcValue(PreallocatedCall); + SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other, + getRoot(), SrcValue); + setValue(&I, Res); + DAG.setRoot(Res); + return; + } + case Intrinsic::call_preallocated_arg: { + const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0)); + SDValue SrcValue = DAG.getSrcValue(PreallocatedCall); + SDValue Ops[3]; + Ops[0] = getRoot(); + Ops[1] = SrcValue; + Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl, + MVT::i32); // arg index + SDValue Res = DAG.getNode( + ISD::PREALLOCATED_ARG, sdl, + DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return; + } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { const auto &DI = cast<DbgVariableIntrinsic>(I); @@ -5956,12 +5881,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DIExpression *Expression = DI.getExpression(); dropDanglingDebugInfo(Variable, Expression); assert(Variable && "Missing variable"); - + LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI + << "\n"); // Check if address has undef value. const Value *Address = DI.getVariableLocation(); if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI + << " (bad/undef/unused-arg address)\n"); return; } @@ -5990,6 +5917,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDDbgValue *SDV = DAG.getFrameIndexDbgValue( Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); + } else { + LLVM_DEBUG(dbgs() << "Skipping " << DI + << " (variable info stashed in MF side table)\n"); } return; } @@ -6024,7 +5954,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N)) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI + << " (could not emit func-arg dbg_value)\n"); } } return; @@ -6176,6 +6107,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::canonicalize: { unsigned Opcode; switch (Intrinsic) { @@ -6190,6 +6122,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break; case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } @@ -6253,7 +6186,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); @@ -6440,7 +6373,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } case Intrinsic::sdiv_fix: - case Intrinsic::udiv_fix: { + case Intrinsic::udiv_fix: + case Intrinsic::sdiv_fix_sat: + case Intrinsic::udiv_fix_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -6450,9 +6385,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::stacksave: { SDValue Op = getRoot(); - Res = DAG.getNode( - ISD::STACKSAVE, sdl, - DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; @@ -6463,7 +6397,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); - EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout()); EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Result type for @llvm.get.dynamic.area.offset should match PtrTy for // target. @@ -6477,13 +6411,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } case Intrinsic::stackguard: { - EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); const Module &M = *MF.getFunction().getParent(); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); } else { + EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Global = TLI.getSDagStackGuard(M); unsigned Align = DL->getPrefTypeAlignment(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), @@ -6500,7 +6434,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); if (TLI.useLoadStackGuardNode()) @@ -6512,6 +6445,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, int FI = FuncInfo.StaticAllocaMap[Slot]; MFI.setStackProtectorIndex(FI); + EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout()); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); @@ -6590,7 +6524,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: - setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); + Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot()); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); return; case Intrinsic::expect: @@ -6662,12 +6598,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); - SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, - DAG.getVTList(MVT::Other), Ops, - EVT::getIntegerVT(*Context, 8), - MachinePointerInfo(I.getArgOperand(0)), - 0, /* align */ - Flags); + SDValue Result = DAG.getMemIntrinsicNode( + ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, + EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), + /* align */ None, Flags); // Chain the prefetch in parallell with any pending loads, to stay out of // the way of later optimizations. @@ -6734,10 +6668,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - visitPatchpoint(&I); + visitPatchpoint(I); return; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I)); + LowerStatepoint(cast<GCStatepointInst>(I)); return; case Intrinsic::experimental_gc_result: visitGCResult(cast<GCResultInst>(I)); @@ -6778,7 +6712,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::localrecover: { // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); - MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); // Get the symbol that defines the frame offset. auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); @@ -6789,6 +6722,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); + Value *FP = I.getArgOperand(1); + SDValue FPVal = getValue(FP); + EVT PtrVT = FPVal.getValueType(); + // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); @@ -6796,8 +6733,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); // Add the offset to the FP. - Value *FP = I.getArgOperand(1); - SDValue FPVal = getValue(FP); SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl); setValue(&I, Add); @@ -6980,11 +6915,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Ptr = getValue(I.getOperand(0)); SDValue Const = getValue(I.getOperand(1)); - EVT DestVT = - EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + EVT PtrVT = Ptr.getValueType(); + setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr, + DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT))); + return; + } + case Intrinsic::get_active_lane_mask: { + auto DL = getCurSDLoc(); + SDValue Index = getValue(I.getOperand(0)); + SDValue BTC = getValue(I.getOperand(1)); + Type *ElementTy = I.getOperand(0)->getType(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + unsigned VecWidth = VT.getVectorNumElements(); + + SmallVector<SDValue, 16> OpsBTC; + SmallVector<SDValue, 16> OpsIndex; + SmallVector<SDValue, 16> OpsStepConstants; + for (unsigned i = 0; i < VecWidth; i++) { + OpsBTC.push_back(BTC); + OpsIndex.push_back(Index); + OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy))); + } + + EVT CCVT = MVT::i1; + CCVT = EVT::getVectorVT(I.getContext(), CCVT, VecWidth); - setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr, - DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT))); + auto VecTy = MVT::getVT(FixedVectorType::get(ElementTy, VecWidth)); + SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex); + SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants); + SDValue VectorInduction = DAG.getNode( + ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); + SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC); + SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0), + VectorBTC, ISD::CondCode::SETULE); + setValue(&I, DAG.getNode(ISD::AND, DL, CCVT, + DAG.getNOT(DL, VectorInduction.getValue(1), CCVT), + SetCC)); return; } } @@ -7016,14 +6982,67 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( Opers.push_back(getValue(FPI.getArgOperand(1))); } + auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) { + assert(Result.getNode()->getNumValues() == 2); + + // Push node to the appropriate list so that future instructions can be + // chained up correctly. + SDValue OutChain = Result.getValue(1); + switch (EB) { + case fp::ExceptionBehavior::ebIgnore: + // The only reason why ebIgnore nodes still need to be chained is that + // they might depend on the current rounding mode, and therefore must + // not be moved across instruction that may change that mode. + LLVM_FALLTHROUGH; + case fp::ExceptionBehavior::ebMayTrap: + // These must not be moved across calls or instructions that may change + // floating-point exception masks. + PendingConstrainedFP.push_back(OutChain); + break; + case fp::ExceptionBehavior::ebStrict: + // These must not be moved across calls or instructions that may change + // floating-point exception masks or read floating-point exception flags. + // In addition, they cannot be optimized out even if unused. + PendingConstrainedFPStrict.push_back(OutChain); + break; + } + }; + + SDVTList VTs = DAG.getVTList(ValueVTs); + fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + + SDNodeFlags Flags; + if (EB == fp::ExceptionBehavior::ebIgnore) + Flags.setNoFPExcept(true); + + if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI)) + Flags.copyFMF(*FPOp); + unsigned Opcode; switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case Intrinsic::INTRINSIC: \ Opcode = ISD::STRICT_##DAGN; \ break; #include "llvm/IR/ConstrainedOps.def" + case Intrinsic::experimental_constrained_fmuladd: { + Opcode = ISD::STRICT_FMA; + // Break fmuladd into fmul and fadd. + if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict || + !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), + ValueVTs[0])) { + Opers.pop_back(); + SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); + pushOutChain(Mul, EB); + Opcode = ISD::STRICT_FADD; + Opers.clear(); + Opers.push_back(Mul.getValue(1)); + Opers.push_back(Mul.getValue(0)); + Opers.push_back(getValue(FPI.getArgOperand(2))); + } + break; + } } // A few strict DAG nodes carry additional operands that are not @@ -7042,32 +7061,8 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } } - SDVTList VTs = DAG.getVTList(ValueVTs); - SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers); - - assert(Result.getNode()->getNumValues() == 2); - - // Push node to the appropriate list so that future instructions can be - // chained up correctly. - SDValue OutChain = Result.getValue(1); - switch (FPI.getExceptionBehavior().getValue()) { - case fp::ExceptionBehavior::ebIgnore: - // The only reason why ebIgnore nodes still need to be chained is that - // they might depend on the current rounding mode, and therefore must - // not be moved across instruction that may change that mode. - LLVM_FALLTHROUGH; - case fp::ExceptionBehavior::ebMayTrap: - // These must not be moved across calls or instructions that may change - // floating-point exception masks. - PendingConstrainedFP.push_back(OutChain); - break; - case fp::ExceptionBehavior::ebStrict: - // These must not be moved across calls or instructions that may change - // floating-point exception masks or read floating-point exception flags. - // In addition, they cannot be optimized out even if unused. - PendingConstrainedFPStrict.push_back(OutChain); - break; - } + SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags); + pushOutChain(Result, EB); SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); @@ -7134,10 +7129,9 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, // There is a platform (e.g. wasm) that uses funclet style IR but does not // actually use outlined funclets and their LSDA info style. if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { - assert(CLI.CS); + assert(CLI.CB); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); - EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), - BeginLabel, EndLabel); + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CB), BeginLabel, EndLabel); } else if (!isScopedEHPersonality(Pers)) { MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); } @@ -7146,15 +7140,15 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, return Result; } -void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, +void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, bool isTailCall, const BasicBlock *EHPadBB) { auto &DL = DAG.getDataLayout(); - FunctionType *FTy = CS.getFunctionType(); - Type *RetTy = CS.getType(); + FunctionType *FTy = CB.getFunctionType(); + Type *RetTy = CB.getType(); TargetLowering::ArgListTy Args; - Args.reserve(CS.arg_size()); + Args.reserve(CB.arg_size()); const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -7162,7 +7156,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (isTailCall) { // Avoid emitting tail calls in functions with the disable-tail-calls // attribute. - auto *Caller = CS.getInstruction()->getParent()->getParent(); + auto *Caller = CB.getParent()->getParent(); if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == "true") isTailCall = false; @@ -7175,10 +7169,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, isTailCall = false; } - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { + for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { TargetLowering::ArgListEntry Entry; - const Value *V = *i; + const Value *V = *I; // Skip empty types if (V->getType()->isEmptyTy()) @@ -7187,16 +7180,16 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, i - CS.arg_begin()); + Entry.setAttributes(&CB, I - CB.arg_begin()); // Use swifterror virtual register as input to the call. if (Entry.IsSwiftError && TLI.supportSwiftError()) { SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = DAG.getRegister( - SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V), - EVT(TLI.getPointerTy(DL))); + Entry.Node = + DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V), + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); @@ -7209,7 +7202,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // If call site has a cfguardtarget operand bundle, create and add an // additional ArgListEntry. - if (auto Bundle = CS.getOperandBundle(LLVMContext::OB_cfguardtarget)) { + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) { TargetLowering::ArgListEntry Entry; Value *V = Bundle->Inputs[0]; SDValue ArgNode = getValue(V); @@ -7221,7 +7214,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) + if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget())) isTailCall = false; // Disable tail calls if there is an swifterror argument. Targets have not @@ -7232,15 +7225,16 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setCallee(RetTy, FTy, Callee, std::move(Args), CB) .setTailCall(isTailCall) - .setConvergent(CS.isConvergent()); + .setConvergent(CB.isConvergent()) + .setIsPreallocated( + CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { - const Instruction *Inst = CS.getInstruction(); - Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); - setValue(Inst, Result.first); + Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first); + setValue(&CB, Result.first); } // The last element of CLI.InVals has the SDValue for swifterror return. @@ -7249,8 +7243,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - Register VReg = SwiftError.getOrCreateVRegDefAt( - CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); + Register VReg = + SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); DAG.setRoot(CopyNode); } @@ -7265,7 +7259,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, Type *LoadTy = Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits()); if (LoadVT.isVector()) - LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements()); + LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements()); LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); @@ -7439,11 +7433,10 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); - unsigned DstAlign = DAG.InferPtrAlignment(Dst); - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - unsigned Align = std::min(DstAlign, SrcAlign); - if (Align == 0) // Alignment of one or both could not be inferred. - Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved. + Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne(); + Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne(); + // DAG::getMemcpy needs Alignment to be defined. + Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = false; SDLoc sdl = getCurSDLoc(); @@ -7452,8 +7445,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol, - false, /*isTailCall=*/false, + SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false, + /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); assert(MC.getNode() != nullptr && @@ -7595,8 +7588,8 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. - if (isa<InlineAsm>(I.getCalledValue())) { - visitInlineAsm(&I); + if (I.isInlineAsm()) { + visitInlineAsm(I); return; } @@ -7762,12 +7755,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. // CFGuardTarget bundles are lowered in LowerCallTo. - assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, - LLVMContext::OB_funclet, - LLVMContext::OB_cfguardtarget}) && + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) && "Cannot lower calls with arbitrary operand bundles!"); - SDValue Callee = getValue(I.getCalledValue()); + SDValue Callee = getValue(I.getCalledOperand()); if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); @@ -7775,7 +7768,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check if we can potentially perform a tail call. More detailed checking // is be done within LowerCallTo, after more information about the call is // known. - LowerCallTo(&I, Callee, I.isTailCall()); + LowerCallTo(I, Callee, I.isTailCall()); } namespace { @@ -7818,7 +7811,7 @@ public: if (!CallOperandVal) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) - return TLI.getPointerTy(DL); + return TLI.getProgramPointerTy(DL); llvm::Type *OpTy = CallOperandVal->getType(); @@ -7858,7 +7851,6 @@ public: } }; -using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>; } // end anonymous namespace @@ -7920,9 +7912,9 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, Type *Ty = OpVal->getType(); auto &DL = DAG.getDataLayout(); uint64_t TySize = DL.getTypeAllocSize(Ty); - unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); + int SSFI = MF.getFrameInfo().CreateStackObject( + TySize, DL.getPrefTypeAlign(Ty), false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI), @@ -8067,13 +8059,13 @@ class ExtraFlags { unsigned Flags = 0; public: - explicit ExtraFlags(ImmutableCallSite CS) { - const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + explicit ExtraFlags(const CallBase &Call) { + const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); if (IA->hasSideEffects()) Flags |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) Flags |= InlineAsm::Extra_IsAlignStack; - if (CS.isConvergent()) + if (Call.isConvergent()) Flags |= InlineAsm::Extra_IsConvergent; Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; } @@ -8100,23 +8092,24 @@ public: } // end anonymous namespace /// visitInlineAsm - Handle a call to an InlineAsm object. -void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { - const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); +void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { + const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); /// ConstraintOperands - Information about all of the constraints. - SDISelAsmOperandInfoVector ConstraintOperands; + SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( - DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); + DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call); // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack, // AsmDialect, MayLoad, MayStore). bool HasSideEffect = IA->hasSideEffects(); - ExtraFlags ExtraInfo(CS); + ExtraFlags ExtraInfo(Call); unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. + unsigned NumMatchingOps = 0; for (auto &T : TargetConstraints) { ConstraintOperands.push_back(SDISelAsmOperandInfo(T)); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); @@ -8124,14 +8117,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the value type for each operand. if (OpInfo.Type == InlineAsm::isInput || (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); // Process the call argument. BasicBlocks are labels, currently appearing // only in asm's. - const Instruction *I = CS.getInstruction(); - if (isa<CallBrInst>(I) && - (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() - - cast<CallBrInst>(I)->getNumIndirectDests())) { + if (isa<CallBrInst>(Call) && + ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() - + cast<CallBrInst>(&Call)->getNumIndirectDests() - + NumMatchingOps) && + (NumMatchingOps == 0 || + ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() - + NumMatchingOps))) { const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal); EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true); OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); @@ -8148,20 +8144,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { // The return value of the call is this value. As such, there is no // corresponding argument. - assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); - if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(Call.getType())) { OpInfo.ConstraintVT = TLI.getSimpleValueType( DAG.getDataLayout(), STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); OpInfo.ConstraintVT = - TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); + TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType()); } ++ResNo; } else { OpInfo.ConstraintVT = MVT::Other; } + if (OpInfo.hasMatchingInput()) + ++NumMatchingOps; + if (!HasSideEffect) HasSideEffect = OpInfo.hasMemory(TLI); @@ -8175,9 +8174,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand)) // We've delayed emitting a diagnostic like the "n" constraint because // inlining could cause an integer showing up. - return emitInlineAsmError( - CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an " - "integer constant expression"); + return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) + + "' expects an integer constant " + "expression"); ExtraInfo.update(T); } @@ -8187,7 +8186,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // memory and is nonvolatile. SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); - bool IsCallBr = isa<CallBrInst>(CS.getInstruction()); + bool IsCallBr = isa<CallBrInst>(Call); if (IsCallBr) { // If this is a callbr we need to flush pending exports since inlineasm_br // is a terminator. We need to do this before nodes are glued to @@ -8237,12 +8236,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector<SDValue> AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( - IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); + IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout()))); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we // pass in the third operand as this (potentially null) inline asm MDNode. - const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); + const MDNode *SrcLoc = Call.getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore @@ -8260,6 +8259,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { : OpInfo; GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); + auto DetectWriteToReservedRegister = [&]() { + const MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + for (unsigned Reg : OpInfo.AssignedRegs.Regs) { + if (Register::isPhysicalRegister(Reg) && + TRI.isInlineAsmReadOnlyReg(MF, Reg)) { + const char *RegName = TRI.getName(Reg); + emitInlineAsmError(Call, "write to reserved register '" + + Twine(RegName) + "'"); + return true; + } + } + return false; + }; + switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory) { @@ -8280,11 +8294,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Immediate/C_Other). Find a register that we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( - CS, "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + Call, "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } + if (DetectWriteToReservedRegister()) + return; + // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs.AddInlineAsmOperands( @@ -8309,9 +8326,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c - emitInlineAsmError(CS, "inline asm not supported yet:" - " don't know how to handle tied " - "indirect register inputs"); + emitInlineAsmError(Call, "inline asm not supported yet: " + "don't know how to handle tied " + "indirect register inputs"); return; } @@ -8325,8 +8342,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(RegInfo.createVirtualRegister(RC)); } else { - emitInlineAsmError(CS, "inline asm error: This value type register " - "class is not natively supported!"); + emitInlineAsmError(Call, + "inline asm error: This value type register " + "class is not natively supported!"); return; } @@ -8334,8 +8352,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, - CS.getInstruction()); + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); @@ -8369,13 +8386,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (Ops.empty()) { if (OpInfo.ConstraintType == TargetLowering::C_Immediate) if (isa<ConstantSDNode>(InOperandVal)) { - emitInlineAsmError(CS, "value out of range for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(Call, "value out of range for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } - emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(Call, + "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -8416,23 +8434,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // TODO: Support this. if (OpInfo.isIndirect) { emitInlineAsmError( - CS, "Don't know how to handle indirect register inputs yet " - "for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + Call, "Don't know how to handle indirect register inputs yet " + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { - emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(Call, + "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } + if (DetectWriteToReservedRegister()) + return; + SDLoc dl = getCurSDLoc(); - OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, - Chain, &Flag, CS.getInstruction()); + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, + &Call); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, dl, DAG, AsmNodeOperands); @@ -8464,12 +8486,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SmallVector<SDValue, 1> ResultValues; SmallVector<SDValue, 8> OutChains; - llvm::Type *CSResultType = CS.getType(); + llvm::Type *CallResultType = Call.getType(); ArrayRef<Type *> ResultTypes; - if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) + if (StructType *StructResult = dyn_cast<StructType>(CallResultType)) ResultTypes = StructResult->elements(); - else if (!CSResultType->isVoidTy()) - ResultTypes = makeArrayRef(CSResultType); + else if (!CallResultType->isVoidTy()) + ResultTypes = makeArrayRef(CallResultType); auto CurResultType = ResultTypes.begin(); auto handleRegAssign = [&](SDValue V) { @@ -8513,8 +8535,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { switch (OpInfo.ConstraintType) { case TargetLowering::C_Register: case TargetLowering::C_RegisterClass: - Val = OpInfo.AssignedRegs.getCopyFromRegs( - DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); + Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), + Chain, &Flag, &Call); break; case TargetLowering::C_Immediate: case TargetLowering::C_Other: @@ -8536,7 +8558,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OutChains.push_back(Store); } else { // generate CopyFromRegs to associated registers. - assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); if (Val.getOpcode() == ISD::MERGE_VALUES) { for (const SDValue &V : Val->op_values()) handleRegAssign(V); @@ -8555,7 +8577,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ResultVTs), ResultValues); - setValue(CS.getInstruction(), V); + setValue(&Call, V); } // Collect store chains. @@ -8567,15 +8589,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { DAG.setRoot(Chain); } -void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, +void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call, const Twine &Message) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), Message); + Ctx.emitError(&Call, Message); // Make sure we leave the DAG in a valid state const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs); if (ValueVTs.empty()) return; @@ -8584,7 +8606,7 @@ void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ValueVTs[i])); - setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc())); + setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc())); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { @@ -8600,7 +8622,7 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { SDValue V = DAG.getVAArg( TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), - DL.getABITypeAlignment(I.getType())); + DL.getABITypeAlign(I.getType()).value()); DAG.setRoot(V.getValue(1)); if (I.getType()->isPointerTy()) @@ -8695,7 +8717,9 @@ void SelectionDAGBuilder::populateCallLoweringInfo( .setChain(getRoot()) .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) .setDiscardResult(Call->use_empty()) - .setIsPatchPoint(IsPatchPoint); + .setIsPatchPoint(IsPatchPoint) + .setIsPreallocated( + Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); } /// Add a stack map intrinsic call's live variable operands to a stackmap @@ -8715,11 +8739,11 @@ void SelectionDAGBuilder::populateCallLoweringInfo( /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. -static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, +static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx, const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { - for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { - SDValue OpVal = Builder.getValue(CS.getArgument(i)); + for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) { + SDValue OpVal = Builder.getValue(Call.getArgOperand(i)); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { Ops.push_back( Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); @@ -8745,7 +8769,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { SmallVector<SDValue, 32> Ops; SDLoc DL = getCurSDLoc(); - Callee = getValue(CI.getCalledValue()); + Callee = getValue(CI.getCalledOperand()); NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguments @@ -8771,7 +8795,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { MVT::i32)); // Push live variables for the stack map. - addStackMapLiveVars(&CI, 2, DL, Ops, *this); + addStackMapLiveVars(CI, 2, DL, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. @@ -8798,7 +8822,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { } /// Lower llvm.experimental.patchpoint directly to its target opcode. -void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, +void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, @@ -8807,11 +8831,11 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // [Args...], // [live variables...]) - CallingConv::ID CC = CS.getCallingConv(); + CallingConv::ID CC = CB.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; - bool HasDef = !CS->getType()->isVoidTy(); + bool HasDef = !CB.getType()->isVoidTy(); SDLoc dl = getCurSDLoc(); - SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); + SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos)); // Handle immediate and symbolic callees. if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee)) @@ -8823,23 +8847,23 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, SymbolicCallee->getValueType(0)); // Get the real number of arguments participating in the call <numArgs> - SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); + SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos)); unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> // Intrinsics include all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; - assert(CS.arg_size() >= NumMetaOpers + NumArgs && + assert(CB.arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = - IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); + IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType(); TargetLowering::CallLoweringInfo CLI(DAG); - populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()), - NumMetaOpers, NumCallArgs, Callee, ReturnTy, true); + populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee, + ReturnTy, true); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); @@ -8857,10 +8881,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, SmallVector<SDValue, 8> Ops; // Add the <id> and <numBytes> constants. - SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); + SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64)); - SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); + SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl, MVT::i32)); @@ -8882,14 +8906,14 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // place these in any free register. if (IsAnyRegCC) for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) - Ops.push_back(getValue(CS.getArgument(i))); + Ops.push_back(getValue(CB.getArgOperand(i))); // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. - addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); + addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this); // Push the register mask info. if (HasGlue) @@ -8910,7 +8934,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 3> ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end @@ -8927,9 +8951,9 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Update the NodeMap. if (HasDef) { if (IsAnyRegCC) - setValue(CS.getInstruction(), SDValue(MN, 0)); + setValue(&CB, SDValue(MN, 0)); else - setValue(CS.getInstruction(), Result.first); + setValue(&CB, Result.first); } // Fixup the consumers of the intrinsic. The chain and glue may be used in the @@ -9078,9 +9102,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // assert(!CS.hasInAllocaArgument() && // "sret demotion is incompatible with inalloca"); uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); - unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); + Align Alignment = DL.getPrefTypeAlign(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); - DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); + DemoteStackIdx = + MF.getFrameInfo().CreateStackObject(TySize, Alignment, false); Type *StackSlotPtrType = PointerType::get(CLI.RetTy, DL.getAllocaAddrSpace()); @@ -9098,7 +9123,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSwiftSelf = false; Entry.IsSwiftError = false; Entry.IsCFGuardTarget = false; - Entry.Alignment = Align; + Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); @@ -9214,6 +9239,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setCFGuardTarget(); if (Args[i].IsByVal) Flags.setByVal(); + if (Args[i].IsPreallocated) { + Flags.setPreallocated(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // preallocated. This way we can know how many bytes we should've + // allocated and how many bytes a callee cleanup function will pop. If + // we port preallocated to more targets, we'll have to add custom + // preallocated handling in the various CC lowering callbacks. + Flags.setByVal(); + } if (Args[i].IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about @@ -9223,7 +9257,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // in the various CC lowering callbacks. Flags.setByVal(); } - if (Args[i].IsByVal || Args[i].IsInAlloca) { + if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); @@ -9232,12 +9266,12 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setByValSize(FrameSize); // info is not there but there are cases it cannot get right. - unsigned FrameAlign; - if (Args[i].Alignment) - FrameAlign = Args[i].Alignment; + Align FrameAlign; + if (auto MA = Args[i].Alignment) + FrameAlign = *MA; else - FrameAlign = getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(Align(FrameAlign)); + FrameAlign = Align(getByValTypeAlignment(ElementTy, DL)); + Flags.setByValAlign(FrameAlign); } if (Args[i].IsNest) Flags.setNest(); @@ -9282,8 +9316,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setReturned(); } - getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS.getInstruction(), CLI.CallConv, ExtendKind); + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB, + CLI.CallConv, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -9295,7 +9329,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { - MyFlags.Flags.setOrigAlign(Align::None()); + MyFlags.Flags.setOrigAlign(Align(1)); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } @@ -9360,6 +9394,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx); for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, @@ -9368,7 +9404,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), - /* Alignment = */ 1); + HiddenSRetAlign); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -9535,7 +9571,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, // initializes the alloca. Don't elide copies from the same argument twice. const Value *Val = SI->getValueOperand()->stripPointerCasts(); const auto *Arg = dyn_cast<Argument>(Val); - if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() || + if (!Arg || Arg->hasPassPointeeByValueAttr() || Arg->getType()->isEmptyTy() || DL.getTypeStoreSize(Arg->getType()) != DL.getTypeAllocSize(AI->getAllocatedType()) || @@ -9591,16 +9627,12 @@ static void tryToElideArgumentCopy( "object size\n"); return; } - unsigned RequiredAlignment = AI->getAlignment(); - if (!RequiredAlignment) { - RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment( - AI->getAllocatedType()); - } - if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { + Align RequiredAlignment = AI->getAlign(); + if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) { LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " "greater than stack argument alignment (" - << RequiredAlignment << " vs " - << MFI.getObjectAlignment(FixedIndex) << ")\n"); + << DebugStr(RequiredAlignment) << " vs " + << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n"); return; } @@ -9637,6 +9669,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { const DataLayout &DL = DAG.getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; + // In Naked functions we aren't going to save any registers. + if (F.hasFnAttribute(Attribute::Naked)) + return; + if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; @@ -9725,12 +9761,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // in the various CC lowering callbacks. Flags.setByVal(); } + if (Arg.hasAttribute(Attribute::Preallocated)) { + Flags.setPreallocated(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // preallocated. This way we can know how many bytes we should've + // allocated and how many bytes a callee cleanup function will pop. If + // we port preallocated to more targets, we'll have to add custom + // preallocated handling in the various CC lowering callbacks. + Flags.setByVal(); + } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. if (ArgNo == 0) Flags.setByVal(); } - if (Flags.isByVal() || Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = Arg.getParamByValType(); // For ByVal, size and alignment should be passed from FE. BE will @@ -9770,7 +9815,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { - MyFlags.Flags.setOrigAlign(Align::None()); + MyFlags.Flags.setOrigAlign(Align(1)); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } @@ -9972,7 +10017,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } // Finally, if the target has anything special to do, allow it to do so. - EmitFunctionEntryCode(); + emitFunctionEntryCode(); } /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to @@ -10024,7 +10069,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { } Reg = RegOut; } else { - DenseMap<const Value *, unsigned>::iterator I = + DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(PHIOp); if (I != FuncInfo.ValueMap.end()) Reg = I->second; @@ -10638,6 +10683,19 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { - SDValue N = getValue(I.getOperand(0)); - setValue(&I, N); + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), + ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + SmallVector<SDValue, 4> Values(NumValues); + SDValue Op = getValue(I.getOperand(0)); + + for (unsigned i = 0; i != NumValues; ++i) + Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i], + SDValue(Op.getNode(), Op.getResNo() + i)); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ValueVTs), Values)); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 18e0edf7fc04..f0b7fb0d5229 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -14,19 +14,16 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H #include "StatepointLowering.h" -#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Statepoint.h" @@ -55,7 +52,6 @@ class CatchSwitchInst; class CleanupPadInst; class CleanupReturnInst; class Constant; -class ConstantInt; class ConstrainedFPIntrinsic; class DbgValueInst; class DataLayout; @@ -77,6 +73,7 @@ class PHINode; class ResumeInst; class ReturnInst; class SDDbgValue; +class SelectionDAG; class StoreInst; class SwiftErrorValueTracking; class SwitchInst; @@ -409,6 +406,8 @@ public: SelectionDAGBuilder *SDB; }; + // Data related to deferred switch lowerings. Used to construct additional + // Basic Blocks in SelectionDAGISel::FinishBasicBlock. std::unique_ptr<SDAGSwitchLowering> SL; /// A StackProtectorDescriptor structure used to communicate stack protector @@ -518,7 +517,6 @@ public: void resolveOrClearDbgInfo(); SDValue getValue(const Value *V); - bool findValue(const Value *V) const; /// Return the SDNode for the specified IR value if it exists. SDNode *getNodeForIRValue(const Value *V) { @@ -557,7 +555,7 @@ public: bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); - void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, + void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall, const BasicBlock *EHPadBB = nullptr); // Lower range metadata from 0 to N to assert zext to an integer of nearest @@ -627,7 +625,7 @@ public: // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. - void LowerStatepoint(ImmutableStatepoint ISP, + void LowerStatepoint(const GCStatepointInst &I, const BasicBlock *EHPadBB = nullptr); void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee, @@ -764,7 +762,7 @@ private: void visitStoreToSwiftError(const StoreInst &I); void visitFreeze(const FreezeInst &I); - void visitInlineAsm(ImmutableCallSite CS); + void visitInlineAsm(const CallBase &Call); void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); @@ -774,8 +772,7 @@ private: void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); void visitStackmap(const CallInst &I); - void visitPatchpoint(ImmutableCallSite CS, - const BasicBlock *EHPadBB = nullptr); + void visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB = nullptr); // These two are implemented in StatepointLowering.cpp void visitGCRelocate(const GCRelocateInst &Relocate); @@ -795,7 +792,7 @@ private: void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); - void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message); + void emitInlineAsmError(const CallBase &Call, const Twine &Message); /// If V is an function argument then create corresponding DBG_VALUE machine /// instruction for it now. At the end of instruction selection, they will be diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 6fd71393bf38..42e3016e65b8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -65,7 +65,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { if (G) if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo()) if (getMachineOpcode() < TII->getNumOpcodes()) - return TII->getName(getMachineOpcode()); + return std::string(TII->getName(getMachineOpcode())); return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; } if (G) { @@ -106,6 +106,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::TokenFactor: return "TokenFactor"; case ISD::AssertSext: return "AssertSext"; case ISD::AssertZext: return "AssertZext"; + case ISD::AssertAlign: return "AssertAlign"; case ISD::BasicBlock: return "BasicBlock"; case ISD::VALUETYPE: return "ValueType"; @@ -170,6 +171,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CopyToReg: return "CopyToReg"; case ISD::CopyFromReg: return "CopyFromReg"; case ISD::UNDEF: return "undef"; + case ISD::VSCALE: return "vscale"; case ISD::MERGE_VALUES: return "merge_values"; case ISD::INLINEASM: return "inlineasm"; case ISD::INLINEASM_BR: return "inlineasm_br"; @@ -210,6 +212,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; case ISD::FROUND: return "fround"; case ISD::STRICT_FROUND: return "strict_fround"; + case ISD::FROUNDEVEN: return "froundeven"; + case ISD::STRICT_FROUNDEVEN: return "strict_froundeven"; case ISD::FEXP: return "fexp"; case ISD::STRICT_FEXP: return "strict_fexp"; case ISD::FEXP2: return "fexp2"; @@ -313,7 +317,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UMULFIXSAT: return "umulfixsat"; case ISD::SDIVFIX: return "sdivfix"; + case ISD::SDIVFIXSAT: return "sdivfixsat"; case ISD::UDIVFIX: return "udivfix"; + case ISD::UDIVFIXSAT: return "udivfixsat"; // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; @@ -341,7 +347,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; + case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; + case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16"; case ISD::LROUND: return "lround"; case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; @@ -387,6 +395,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; + case ISD::FREEZE: return "freeze"; + case ISD::PREALLOCATED_SETUP: + return "call_setup"; + case ISD::PREALLOCATED_ARG: + return "call_alloc"; // Bit manipulation case ISD::ABS: return "abs"; @@ -547,9 +560,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasAllowReassociation()) OS << " reassoc"; - if (getFlags().hasVectorReduction()) - OS << " vector-reduction"; - if (getFlags().hasNoFPExcept()) OS << " nofpexcept"; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 6c57c72d47a7..1f0432196a2d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -215,6 +215,7 @@ namespace llvm { OptLevelChanger(SelectionDAGISel &ISel, CodeGenOpt::Level NewOptLevel) : IS(ISel) { SavedOptLevel = IS.OptLevel; + SavedFastISel = IS.TM.Options.EnableFastISel; if (NewOptLevel == SavedOptLevel) return; IS.OptLevel = NewOptLevel; @@ -223,7 +224,6 @@ namespace llvm { << IS.MF->getFunction().getName() << "\n"); LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" << NewOptLevel << "\n"); - SavedFastISel = IS.TM.Options.EnableFastISel; if (NewOptLevel == CodeGenOpt::None) { IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); LLVM_DEBUG( @@ -337,7 +337,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); + if (OptLevel != CodeGenOpt::None) + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -441,9 +442,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - auto *BFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : - nullptr; + BlockFrequencyInfo *BFI = nullptr; + if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None) + BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -513,15 +514,15 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // registers. If we don't apply the reg fixups before, some registers may // appear as unused and will be skipped, resulting in bad MI. MachineRegisterInfo &MRI = MF->getRegInfo(); - for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(), + for (DenseMap<Register, Register>::iterator I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); I != E; ++I) { - unsigned From = I->first; - unsigned To = I->second; + Register From = I->first; + Register To = I->second; // If To is also scheduled to be replaced, find what its ultimate // replacement is. while (true) { - DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); + DenseMap<Register, Register>::iterator J = FuncInfo->RegFixups.find(To); if (J == E) break; To = J->second; @@ -622,7 +623,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Otherwise this is another use or second copy use. CopyUseMI = nullptr; break; } - if (CopyUseMI) { + if (CopyUseMI && + TRI.getRegSizeInBits(LDI->second, MRI) == + TRI.getRegSizeInBits(CopyUseMI->getOperand(0).getReg(), MRI)) { // Use MI's debug location, which describes where Variable was // declared, rather than whatever is attached to CopyUseMI. MachineInstr *NewMI = @@ -658,36 +661,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if floating point is used for msvc computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI()); - // Replace forward-declared registers with the registers containing - // the desired value. - for (DenseMap<unsigned, unsigned>::iterator - I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); - I != E; ++I) { - unsigned From = I->first; - unsigned To = I->second; - // If To is also scheduled to be replaced, find what its ultimate - // replacement is. - while (true) { - DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); - if (J == E) break; - To = J->second; - } - // Make sure the new register has a sufficiently constrained register class. - if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) - MRI.constrainRegClass(To, MRI.getRegClass(From)); - // Replace it. - - - // Replacing one register with another won't touch the kill flags. - // We need to conservatively clear the kill flags as a kill on the old - // register might dominate existing uses of the new register. - if (!MRI.use_empty(To)) - MRI.clearKillFlags(From); - MRI.replaceRegWith(From, To); - } - - TLI->finalizeLowering(*MF); - // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -1321,8 +1294,11 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { assert(DI->getVariable() && "Missing variable"); assert(DI->getDebugLoc() && "Missing location"); const Value *Address = DI->getAddress(); - if (!Address) + if (!Address) { + LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI + << " (bad address)\n"); continue; + } // Look through casts and constant offset GEPs. These mostly come from // inalloca. @@ -1347,6 +1323,8 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { if (Offset.getBoolValue()) Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset.getZExtValue()); + LLVM_DEBUG(dbgs() << "processDbgDeclares: setVariableDbgInfo FI=" << FI + << ", " << *DI << "\n"); MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc()); } } @@ -1513,8 +1491,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // to keep track of gc-relocates for a particular gc-statepoint. This is // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before // visitGCRelocate. - if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) && - !isGCResult(Inst)) { + if (isa<CallInst>(Inst) && !isa<GCStatepointInst>(Inst) && + !isa<GCRelocateInst>(Inst) && !isa<GCResultInst>(Inst)) { OptimizationRemarkMissed R("sdagisel", "FastISelFailure", Inst->getDebugLoc(), LLVMBB); @@ -1532,7 +1510,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() && !Inst->use_empty()) { - unsigned &R = FuncInfo->ValueMap[Inst]; + Register &R = FuncInfo->ValueMap[Inst]; if (!R) R = FuncInfo->CreateRegs(Inst); } @@ -2234,14 +2212,14 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, return !findNonImmUse(Root, N.getNode(), U, IgnoreChains); } -void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) { +void SelectionDAGISel::Select_INLINEASM(SDNode *N) { SDLoc DL(N); std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops, DL); const EVT VTs[] = {MVT::Other, MVT::Glue}; - SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops); + SDValue New = CurDAG->getNode(N->getOpcode(), DL, VTs, Ops); New->setNodeId(-1); ReplaceUses(N, New.getNode()); CurDAG->RemoveDeadNode(N); @@ -2285,6 +2263,14 @@ void SelectionDAGISel::Select_UNDEF(SDNode *N) { CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); } +void SelectionDAGISel::Select_FREEZE(SDNode *N) { + // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now. + // If FREEZE instruction is added later, the code below must be changed as + // well. + CurDAG->SelectNodeTo(N, TargetOpcode::COPY, N->getValueType(0), + N->getOperand(0)); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2804,13 +2790,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, return; case ISD::AssertSext: case ISD::AssertZext: + case ISD::AssertAlign: ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); CurDAG->RemoveDeadNode(NodeToMatch); return; case ISD::INLINEASM: case ISD::INLINEASM_BR: - Select_INLINEASM(NodeToMatch, - NodeToMatch->getOpcode() == ISD::INLINEASM_BR); + Select_INLINEASM(NodeToMatch); return; case ISD::READ_REGISTER: Select_READ_REGISTER(NodeToMatch); @@ -2821,6 +2807,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::UNDEF: Select_UNDEF(NodeToMatch); return; + case ISD::FREEZE: + Select_FREEZE(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); @@ -3693,12 +3682,11 @@ bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { // Detect when "or" is used to add an offset to a stack object. if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) { MachineFrameInfo &MFI = MF->getFrameInfo(); - unsigned A = MFI.getObjectAlignment(FN->getIndex()); - assert(isPowerOf2_32(A) && "Unexpected alignment"); + Align A = MFI.getObjectAlign(FN->getIndex()); int32_t Off = C->getSExtValue(); // If the alleged offset fits in the zero bits guaranteed by // the alignment, then this or is really an add. - return (Off >= 0) && (((A - 1) & Off) == unsigned(Off)); + return (Off >= 0) && (((A.value() - 1) & Off) == unsigned(Off)); } return false; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index cdc09d59f6a4..059a6baf967a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -70,7 +70,7 @@ namespace llvm { } static std::string getGraphName(const SelectionDAG *G) { - return G->getMachineFunction().getName(); + return std::string(G->getMachineFunction().getName()); } static bool renderGraphFromBottomUp() { @@ -164,6 +164,20 @@ void SelectionDAG::viewGraph() { viewGraph(""); } +/// Just dump dot graph to a user-provided path and title. +/// This doesn't open the dot viewer program and +/// helps visualization when outside debugging session. +/// FileName expects absolute path. If provided +/// without any path separators then the file +/// will be created in the current directory. +/// Error will be emitted if the path is insane. +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SelectionDAG::dumpDotGraph(const Twine &FileName, + const Twine &Title) { + dumpDotGraphToFile(this, FileName, Title); +} +#endif + /// clearGraphAttrs - Clear all previously defined node graph attributes. /// Intended to be used from a debugging tool (eg. gdb). void SelectionDAG::clearGraphAttrs() { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index c628f379e415..2cb57c1d1ccc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -41,6 +42,7 @@ #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -61,6 +63,10 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); STATISTIC(StatepointMaxSlotsRequired, "Maximum number of stack slots required for a singe statepoint"); +cl::opt<bool> UseRegistersForDeoptValues( + "use-registers-for-deopt-values", cl::Hidden, cl::init(false), + cl::desc("Allow using registers for non pointer deopt args")); + static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops, SelectionDAGBuilder &Builder, uint64_t Value) { SDLoc L = Builder.getCurSDLoc(); @@ -215,6 +221,28 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, return None; } + +/// Return true if-and-only-if the given SDValue can be lowered as either a +/// constant argument or a stack reference. The key point is that the value +/// doesn't need to be spilled or tracked as a vreg use. +static bool willLowerDirectly(SDValue Incoming) { + // We are making an unchecked assumption that the frame size <= 2^16 as that + // is the largest offset which can be encoded in the stackmap format. + if (isa<FrameIndexSDNode>(Incoming)) + return true; + + // The largest constant describeable in the StackMap format is 64 bits. + // Potential Optimization: Constants values are sign extended by consumer, + // and thus there are many constants of static type > 64 bits whose value + // happens to be sext(Con64) and could thus be lowered directly. + if (Incoming.getValueType().getSizeInBits() > 64) + return false; + + return (isa<ConstantSDNode>(Incoming) || isa<ConstantFPSDNode>(Incoming) || + Incoming.isUndef()); +} + + /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. @@ -224,11 +252,10 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { SDValue Incoming = Builder.getValue(IncomingValue); - if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { - // We won't need to spill this, so no need to check for previously - // allocated stack slots + // If we won't spill this, we don't need to check for previously allocated + // stack slots. + if (willLowerDirectly(Incoming)) return; - } SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); if (OldLocation.getNode()) @@ -268,45 +295,6 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, Builder.StatepointLowering.setLocation(Incoming, Loc); } -/// Remove any duplicate (as SDValues) from the derived pointer pairs. This -/// is not required for correctness. It's purpose is to reduce the size of -/// StackMap section. It has no effect on the number of spill slots required -/// or the actual lowering. -static void -removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases, - SmallVectorImpl<const Value *> &Ptrs, - SmallVectorImpl<const GCRelocateInst *> &Relocs, - SelectionDAGBuilder &Builder, - FunctionLoweringInfo::StatepointSpillMap &SSM) { - DenseMap<SDValue, const Value *> Seen; - - SmallVector<const Value *, 64> NewBases, NewPtrs; - SmallVector<const GCRelocateInst *, 64> NewRelocs; - for (size_t i = 0, e = Ptrs.size(); i < e; i++) { - SDValue SD = Builder.getValue(Ptrs[i]); - auto SeenIt = Seen.find(SD); - - if (SeenIt == Seen.end()) { - // Only add non-duplicates - NewBases.push_back(Bases[i]); - NewPtrs.push_back(Ptrs[i]); - NewRelocs.push_back(Relocs[i]); - Seen[SD] = Ptrs[i]; - } else { - // Duplicate pointer found, note in SSM and move on: - SSM.DuplicateMap[Ptrs[i]] = SeenIt->second; - } - } - assert(Bases.size() >= NewBases.size()); - assert(Ptrs.size() >= NewPtrs.size()); - assert(Relocs.size() >= NewRelocs.size()); - Bases = NewBases; - Ptrs = NewPtrs; - Relocs = NewRelocs; - assert(Ptrs.size() == Bases.size()); - assert(Ptrs.size() == Relocs.size()); -} - /// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result @@ -353,9 +341,9 @@ static MachineMemOperand* getMachineMemOperand(MachineFunction &MF, auto MMOFlags = MachineMemOperand::MOStore | MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; auto &MFI = MF.getFrameInfo(); - return MF.getMachineMemOperand(PtrInfo, MMOFlags, + return MF.getMachineMemOperand(PtrInfo, MMOFlags, MFI.getObjectSize(FI.getIndex()), - MFI.getObjectAlignment(FI.getIndex())); + MFI.getObjectAlign(FI.getIndex())); } /// Spill a value incoming to the statepoint. It might be either part of @@ -393,10 +381,9 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // slots with preferred alignments larger than frame alignment.. auto &MF = Builder.DAG.getMachineFunction(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); - auto *StoreMMO = - MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - MFI.getObjectSize(Index), - MFI.getObjectAlignment(Index)); + auto *StoreMMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(Index), + MFI.getObjectAlign(Index)); Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, StoreMMO); @@ -412,59 +399,81 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, /// Lower a single value incoming to a statepoint node. This value can be /// either a deopt value or a gc value, the handling is the same. We special /// case constants and allocas, then fall back to spilling if required. -static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, - SmallVectorImpl<SDValue> &Ops, - SmallVectorImpl<MachineMemOperand*> &MemRefs, - SelectionDAGBuilder &Builder) { - // Note: We know all of these spills are independent, but don't bother to - // exploit that chain wise. DAGCombine will happily do so as needed, so - // doing it here would be a small compile time win at most. - SDValue Chain = Builder.getRoot(); - - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { +static void +lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot, + SmallVectorImpl<SDValue> &Ops, + SmallVectorImpl<MachineMemOperand *> &MemRefs, + SelectionDAGBuilder &Builder) { + + if (willLowerDirectly(Incoming)) { + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint (this is only + // really meaningful for a deopt value. For GC, we'd be trying to + // relocate the address of the alloca itself?) + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Builder.getFrameIndexTy())); + + auto &MF = Builder.DAG.getMachineFunction(); + auto *MMO = getMachineMemOperand(MF, *FI); + MemRefs.push_back(MMO); + return; + } + + assert(Incoming.getValueType().getSizeInBits() <= 64); + + if (Incoming.isUndef()) { + // Put an easily recognized constant that's unlikely to be a valid + // value so that uses of undef by the consumer of the stackmap is + // easily recognized. This is legal since the compiler is always + // allowed to chose an arbitrary value for undef. + pushStackMapConstant(Ops, Builder, 0xFEFEFEFE); + return; + } + // If the original value was a constant, make sure it gets recorded as // such in the stackmap. This is required so that the consumer can // parse any internal format to the deopt state. It also handles null - // pointers and other constant pointers in GC states. Note the constant - // vectors do not appear to actually hit this path and that anything larger - // than an i64 value (not type!) will fail asserts here. - pushStackMapConstant(Ops, Builder, C->getSExtValue()); - } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { - // This handles allocas as arguments to the statepoint (this is only - // really meaningful for a deopt value. For GC, we'd be trying to - // relocate the address of the alloca itself?) - assert(Incoming.getValueType() == Builder.getFrameIndexTy() && - "Incoming value is a frame index!"); - Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Builder.getFrameIndexTy())); + // pointers and other constant pointers in GC states. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { + pushStackMapConstant(Ops, Builder, C->getSExtValue()); + return; + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Incoming)) { + pushStackMapConstant(Ops, Builder, + C->getValueAPF().bitcastToAPInt().getZExtValue()); + return; + } - auto &MF = Builder.DAG.getMachineFunction(); - auto *MMO = getMachineMemOperand(MF, *FI); - MemRefs.push_back(MMO); - - } else if (LiveInOnly) { + llvm_unreachable("unhandled direct lowering case"); + } + + + + if (!RequireSpillSlot) { // If this value is live in (not live-on-return, or live-through), we can // treat it the same way patchpoint treats it's "live in" values. We'll // end up folding some of these into stack references, but they'll be // handled by the register allocator. Note that we do not have the notion // of a late use so these values might be placed in registers which are - // clobbered by the call. This is fine for live-in. + // clobbered by the call. This is fine for live-in. For live-through + // fix-up pass should be executed to force spilling of such registers. Ops.push_back(Incoming); } else { - // Otherwise, locate a spill slot and explicitly spill it so it - // can be found by the runtime later. We currently do not support - // tracking values through callee saved registers to their eventual - // spill location. This would be a useful optimization, but would - // need to be optional since it requires a lot of complexity on the - // runtime side which not all would support. + // Otherwise, locate a spill slot and explicitly spill it so it can be + // found by the runtime later. Note: We know all of these spills are + // independent, but don't bother to exploit that chain wise. DAGCombine + // will happily do so as needed, so doing it here would be a small compile + // time win at most. + SDValue Chain = Builder.getRoot(); auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder); Ops.push_back(std::get<0>(Res)); if (auto *MMO = std::get<2>(Res)) MemRefs.push_back(MMO); Chain = std::get<1>(Res);; + Builder.DAG.setRoot(Chain); } - Builder.DAG.setRoot(Chain); } /// Lower deopt state and gc pointer arguments of the statepoint. The actual @@ -522,8 +531,18 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, const bool LiveInDeopt = SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn; - auto isGCValue =[&](const Value *V) { - return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V); + auto isGCValue = [&](const Value *V) { + auto *Ty = V->getType(); + if (!Ty->isPtrOrPtrVectorTy()) + return false; + if (auto *GFI = Builder.GFI) + if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) + return *IsManaged; + return true; // conservative + }; + + auto requireSpillSlot = [&](const Value *V) { + return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V); }; // Before we actually start lowering (and allocating spill slots for values), @@ -532,7 +551,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : SI.DeoptState) { - if (!LiveInDeopt || isGCValue(V)) + if (requireSpillSlot(V)) reservePreviousStackSlotForValue(V, Builder); } for (unsigned i = 0; i < SI.Bases.size(); ++i) { @@ -559,8 +578,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, } if (!Incoming.getNode()) Incoming = Builder.getValue(V); - const bool LiveInValue = LiveInDeopt && !isGCValue(V); - lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs, + Builder); } // Finally, go ahead and lower all the gc arguments. There's no prefixed @@ -570,12 +589,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // (base[0], ptr[0], base[1], ptr[1], ...) for (unsigned i = 0; i < SI.Bases.size(); ++i) { const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false, - Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Builder.getValue(Base), + /*RequireSpillSlot*/ true, Ops, MemRefs, + Builder); const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false, - Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Builder.getValue(Ptr), + /*RequireSpillSlot*/ true, Ops, MemRefs, + Builder); } // If there are any explicit spill slots passed to the statepoint, record @@ -610,7 +631,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, SDValue Loc = Builder.StatepointLowering.getLocation(SDV); if (Loc.getNode()) { - SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); + SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas // and constants. For this values we can avoid emitting spill load while @@ -618,7 +639,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // Actually we do not need to record them in this map at all. // We do this only to check that we are not relocating any unvisited // value. - SpillMap.SlotMap[V] = None; + SpillMap[V] = None; // Default llvm mechanisms for exporting values which are used in // different basic blocks does not work for gc relocates. @@ -641,24 +662,15 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); + assert(SI.Bases.size() == SI.Ptrs.size() && + SI.Ptrs.size() <= SI.GCRelocates.size()); #ifndef NDEBUG - // We schedule gc relocates before removeDuplicateGCPtrs since we _will_ - // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs. for (auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) StatepointLowering.scheduleRelocCall(*Reloc); #endif - // Remove any redundant llvm::Values which map to the same SDValue as another - // input. Also has the effect of removing duplicates in the original - // llvm::Value input list as well. This is a useful optimization for - // reducing the size of the StackMap section. It has no other impact. - removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this, - FuncInfo.StatepointSpillMaps[SI.StatepointInstr]); - assert(SI.Bases.size() == SI.Ptrs.size() && - SI.Ptrs.size() == SI.GCRelocates.size()); - // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredMetaArgs; SmallVector<MachineMemOperand*, 16> MemRefs; @@ -830,97 +842,109 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( } void -SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, +SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, const BasicBlock *EHPadBB /*= nullptr*/) { - assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg && + assert(I.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); #ifndef NDEBUG - // If this is a malformed statepoint, report it early to simplify debugging. - // This should catch any IR level mistake that's made when constructing or - // transforming statepoints. - ISP.verify(); - // Check that the associated GCStrategy expects to encounter statepoints. assert(GFI->getStrategy().useStatepoints() && "GCStrategy does not expect to encounter statepoints"); #endif SDValue ActualCallee; + SDValue Callee = getValue(I.getActualCalledOperand()); - if (ISP.getNumPatchBytes() > 0) { + if (I.getNumPatchBytes() > 0) { // If we've been asked to emit a nop sequence instead of a call instruction // for this statepoint then don't lower the call target, but use a constant - // `null` instead. Not lowering the call target lets statepoint clients get - // away without providing a physical address for the symbolic call target at - // link time. - - const auto &TLI = DAG.getTargetLoweringInfo(); - const auto &DL = DAG.getDataLayout(); - - unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); - ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS)); + // `undef` instead. Not lowering the call target lets statepoint clients + // get away without providing a physical address for the symbolic call + // target at link time. + ActualCallee = DAG.getUNDEF(Callee.getValueType()); } else { - ActualCallee = getValue(ISP.getCalledValue()); + ActualCallee = Callee; } StatepointLoweringInfo SI(DAG); - populateCallLoweringInfo(SI.CLI, ISP.getCall(), - ImmutableStatepoint::CallArgsBeginPos, - ISP.getNumCallArgs(), ActualCallee, - ISP.getActualReturnType(), false /* IsPatchPoint */); - - for (const GCRelocateInst *Relocate : ISP.getRelocates()) { + populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos, + I.getNumCallArgs(), ActualCallee, + I.getActualReturnType(), false /* IsPatchPoint */); + + // There may be duplication in the gc.relocate list; such as two copies of + // each relocation on normal and exceptional path for an invoke. We only + // need to spill once and record one copy in the stackmap, but we need to + // reload once per gc.relocate. (Dedupping gc.relocates is trickier and best + // handled as a CSE problem elsewhere.) + // TODO: There a couple of major stackmap size optimizations we could do + // here if we wished. + // 1) If we've encountered a derived pair {B, D}, we don't need to actually + // record {B,B} if it's seen later. + // 2) Due to rematerialization, actual derived pointers are somewhat rare; + // given that, we could change the format to record base pointer relocations + // separately with half the space. This would require a format rev and a + // fairly major rework of the STATEPOINT node though. + SmallSet<SDValue, 8> Seen; + for (const GCRelocateInst *Relocate : I.getGCRelocates()) { SI.GCRelocates.push_back(Relocate); - SI.Bases.push_back(Relocate->getBasePtr()); - SI.Ptrs.push_back(Relocate->getDerivedPtr()); + + SDValue DerivedSD = getValue(Relocate->getDerivedPtr()); + if (Seen.insert(DerivedSD).second) { + SI.Bases.push_back(Relocate->getBasePtr()); + SI.Ptrs.push_back(Relocate->getDerivedPtr()); + } } - SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); - SI.StatepointInstr = ISP.getInstruction(); - SI.GCTransitionArgs = - ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); - SI.ID = ISP.getID(); - SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end()); - SI.StatepointFlags = ISP.getFlags(); - SI.NumPatchBytes = ISP.getNumPatchBytes(); + SI.GCArgs = ArrayRef<const Use>(I.gc_args_begin(), I.gc_args_end()); + SI.StatepointInstr = &I; + SI.ID = I.getID(); + + SI.DeoptState = ArrayRef<const Use>(I.deopt_begin(), I.deopt_end()); + SI.GCTransitionArgs = ArrayRef<const Use>(I.gc_transition_args_begin(), + I.gc_transition_args_end()); + + SI.StatepointFlags = I.getFlags(); + SI.NumPatchBytes = I.getNumPatchBytes(); SI.EHPadBB = EHPadBB; SDValue ReturnValue = LowerAsSTATEPOINT(SI); // Export the result value if needed - const GCResultInst *GCResult = ISP.getGCResult(); - Type *RetTy = ISP.getActualReturnType(); - if (!RetTy->isVoidTy() && GCResult) { - if (GCResult->getParent() != ISP.getCall()->getParent()) { - // Result value will be used in a different basic block so we need to - // export it now. Default exporting mechanism will not work here because - // statepoint call has a different type than the actual call. It means - // that by default llvm will create export register of the wrong type - // (always i32 in our case). So instead we need to create export register - // with correct type manually. - // TODO: To eliminate this problem we can remove gc.result intrinsics - // completely and make statepoint call to return a tuple. - unsigned Reg = FuncInfo.CreateRegs(RetTy); - RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy, - ISP.getCall()->getCallingConv()); - SDValue Chain = DAG.getEntryNode(); - - RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); - PendingExports.push_back(Chain); - FuncInfo.ValueMap[ISP.getInstruction()] = Reg; - } else { - // Result value will be used in a same basic block. Don't export it or - // perform any explicit register copies. - // We'll replace the actuall call node shortly. gc_result will grab - // this value. - setValue(ISP.getInstruction(), ReturnValue); - } - } else { - // The token value is never used from here on, just generate a poison value - setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc())); + const GCResultInst *GCResult = I.getGCResult(); + Type *RetTy = I.getActualReturnType(); + + if (RetTy->isVoidTy() || !GCResult) { + // The return value is not needed, just generate a poison value. + setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc())); + return; + } + + if (GCResult->getParent() == I.getParent()) { + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. The gc_result will simply grab + // this value. + setValue(&I, ReturnValue); + return; } + + // Result value will be used in a different basic block so we need to export + // it now. Default exporting mechanism will not work here because statepoint + // call has a different type than the actual call. It means that by default + // llvm will create export register of the wrong type (always i32 in our + // case). So instead we need to create export register with correct type + // manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completely and make statepoint call to return a tuple. + unsigned Reg = FuncInfo.CreateRegs(RetTy); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), Reg, RetTy, + I.getCallingConv()); + SDValue Chain = DAG.getEntryNode(); + + RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); + PendingExports.push_back(Chain); + FuncInfo.ValueMap[&I] = Reg; } void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( @@ -966,26 +990,23 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle( void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { // The result value of the gc_result is simply the result of the actual // call. We've already emitted this, so just grab the value. - const Instruction *I = CI.getStatepoint(); - - if (I->getParent() != CI.getParent()) { - // Statepoint is in different basic block so we should have stored call - // result in a virtual register. - // We can not use default getValue() functionality to copy value from this - // register because statepoint and actual call return types can be - // different, and getValue() will use CopyFromReg of the wrong type, - // which is always i32 in our case. - PointerType *CalleeType = cast<PointerType>( - ImmutableStatepoint(I).getCalledValue()->getType()); - Type *RetTy = - cast<FunctionType>(CalleeType->getElementType())->getReturnType(); - SDValue CopyFromReg = getCopyFromRegs(I, RetTy); - - assert(CopyFromReg.getNode()); - setValue(&CI, CopyFromReg); - } else { - setValue(&CI, getValue(I)); + const GCStatepointInst *SI = CI.getStatepoint(); + + if (SI->getParent() == CI.getParent()) { + setValue(&CI, getValue(SI)); + return; } + // Statepoint is in different basic block so we should have stored call + // result in a virtual register. + // We can not use default getValue() functionality to copy value from this + // register because statepoint and actual call return types can be + // different, and getValue() will use CopyFromReg of the wrong type, + // which is always i32 in our case. + Type *RetTy = SI->getActualReturnType(); + SDValue CopyFromReg = getCopyFromRegs(SI, RetTy); + + assert(CopyFromReg.getNode()); + setValue(&CI, CopyFromReg); } void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { @@ -1005,6 +1026,13 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { const Value *DerivedPtr = Relocate.getDerivedPtr(); SDValue SD = getValue(DerivedPtr); + if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) { + // Lowering relocate(undef) as arbitrary constant. Current constant value + // is chosen such that it's unlikely to be a valid pointer. + setValue(&Relocate, DAG.getTargetConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64)); + return; + } + auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()]; auto SlotIt = SpillMap.find(DerivedPtr); assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value"); @@ -1020,26 +1048,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { unsigned Index = *DerivedPtrLocation; SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); - // Note: We know all of these reloads are independent, but don't bother to - // exploit that chain wise. DAGCombine will happily do so as needed, so - // doing it here would be a small compile time win at most. - SDValue Chain = getRoot(); + // All the reloads are independent and are reading memory only modified by + // statepoints (i.e. no other aliasing stores); informing SelectionDAG of + // this this let's CSE kick in for free and allows reordering of instructions + // if possible. The lowering for statepoint sets the root, so this is + // ordering all reloads with the either a) the statepoint node itself, or b) + // the entry of the current block for an invoke statepoint. + const SDValue Chain = DAG.getRoot(); // != Builder.getRoot() auto &MF = DAG.getMachineFunction(); auto &MFI = MF.getFrameInfo(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); - auto *LoadMMO = - MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - MFI.getObjectSize(Index), - MFI.getObjectAlignment(Index)); + auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(Index), + MFI.getObjectAlign(Index)); auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), Relocate.getType()); SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, SpillSlot, LoadMMO); - - DAG.setRoot(SpillLoad.getValue(1)); + PendingLoads.push_back(SpillLoad.getValue(1)); assert(SpillLoad.getNode()); setValue(&Relocate, SpillLoad); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h index 70507932681d..634ef87f3840 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -15,11 +15,9 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/ValueTypes.h" #include <cassert> namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 368e2100031f..96df20039b15 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -83,7 +83,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, const CCValAssign &ArgLoc = ArgLocs[I]; if (!ArgLoc.isRegLoc()) continue; - Register Reg = ArgLoc.getLocReg(); + MCRegister Reg = ArgLoc.getLocReg(); // Only look at callee saved registers. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) continue; @@ -93,7 +93,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, SDValue Value = OutVals[I]; if (Value->getOpcode() != ISD::CopyFromReg) return false; - unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); + MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); if (MRI.getLiveInPhysReg(ArgReg) != Reg) return false; } @@ -110,14 +110,18 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet); IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest); IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal); + IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated); IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca); IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); - Alignment = Call->getParamAlignment(ArgIdx); + Alignment = Call->getParamAlign(ArgIdx); ByValType = nullptr; - if (Call->paramHasAttr(ArgIdx, Attribute::ByVal)) + if (IsByVal) ByValType = Call->getParamByValType(ArgIdx); + PreallocatedType = nullptr; + if (IsPreallocated) + PreallocatedType = Call->getParamPreallocatedType(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -176,38 +180,24 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, return LowerCallTo(CLI); } -bool -TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, - unsigned Limit, uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, - bool ZeroMemset, - bool MemcpyStrSrc, - bool AllowOverlap, - unsigned DstAS, unsigned SrcAS, - const AttributeList &FuncAttributes) const { - // If 'SrcAlign' is zero, that means the memory operation does not need to - // load the value, i.e. memset or memcpy from constant string. Otherwise, - // it's the inferred alignment of the source. 'DstAlign', on the other hand, - // is the specified alignment of the memory operation. If it is zero, that - // means it's possible to change the alignment of the destination. - // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does - // not need to be loaded. - if (!(SrcAlign == 0 || SrcAlign >= DstAlign)) +bool TargetLowering::findOptimalMemOpLowering( + std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, + unsigned SrcAS, const AttributeList &FuncAttributes) const { + if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) return false; - EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign, - IsMemset, ZeroMemset, MemcpyStrSrc, - FuncAttributes); + EVT VT = getOptimalMemOpType(Op, FuncAttributes); if (VT == MVT::Other) { // Use the largest integer type whose alignment constraints are satisfied. // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). VT = MVT::i64; - while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && - !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + if (Op.isFixedDstAlign()) + while ( + Op.getDstAlign() < (VT.getSizeInBits() / 8) && + !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value())) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); assert(VT.isInteger()); // Find the largest legal integer type. @@ -223,7 +213,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, } unsigned NumMemOps = 0; - while (Size != 0) { + uint64_t Size = Op.size(); + while (Size) { unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. @@ -257,9 +248,10 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. bool Fast; - if (NumMemOps && AllowOverlap && NewVTSize < Size && - allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, - MachineMemOperand::MONone, &Fast) && + if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && + allowsMisalignedMemoryAccesses( + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, + MachineMemOperand::MONone, &Fast) && Fast) VTSize = Size; else { @@ -491,13 +483,15 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// If the specified instruction has a constant integer operand and there are /// bits set in that constant that are not demanded, then clear those bits and /// return true. -bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, +bool TargetLowering::ShrinkDemandedConstant(SDValue Op, + const APInt &DemandedBits, + const APInt &DemandedElts, TargetLoweringOpt &TLO) const { SDLoc DL(Op); unsigned Opcode = Op.getOpcode(); // Do target-specific constant optimization. - if (targetShrinkDemandedConstant(Op, Demanded, TLO)) + if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return TLO.New.getNode(); // FIXME: ISD::SELECT, ISD::SELECT_CC @@ -513,12 +507,12 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, // If this is a 'not' op, don't touch it because that's a canonical form. const APInt &C = Op1C->getAPIntValue(); - if (Opcode == ISD::XOR && Demanded.isSubsetOf(C)) + if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C)) return false; - if (!C.isSubsetOf(Demanded)) { + if (!C.isSubsetOf(DemandedBits)) { EVT VT = Op.getValueType(); - SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT); + SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT); SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); return TLO.CombineTo(Op, NewOp); } @@ -530,6 +524,16 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, return false; } +bool TargetLowering::ShrinkDemandedConstant(SDValue Op, + const APInt &DemandedBits, + TargetLoweringOpt &TLO) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO); +} + /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be /// generalized for targets with other types of implicit widening casts. @@ -598,6 +602,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, unsigned Depth, bool AssumeSingleUse) const { EVT VT = Op.getValueType(); + + // TODO: We can probably do more work on calculating the known bits and + // simplifying the operations for scalable vectors, but for now we just + // bail out. + if (VT.isScalableVector()) { + // Pretend we don't know anything for now. + Known = KnownBits(DemandedBits.getBitWidth()); + return false; + } + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -623,15 +637,18 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return DAG.getUNDEF(Op.getValueType()); unsigned NumElts = DemandedElts.getBitWidth(); + unsigned BitWidth = DemandedBits.getBitWidth(); KnownBits LHSKnown, RHSKnown; switch (Op.getOpcode()) { case ISD::BITCAST: { SDValue Src = peekThroughBitcasts(Op.getOperand(0)); EVT SrcVT = Src.getValueType(); EVT DstVT = Op.getValueType(); + if (SrcVT == DstVT) + return Src; + unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); unsigned NumDstEltBits = DstVT.getScalarSizeInBits(); - if (NumSrcEltBits == NumDstEltBits) if (SDValue V = SimplifyMultipleUseDemandedBits( Src, DemandedBits, DemandedElts, DAG, Depth + 1)) @@ -719,6 +736,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return Op.getOperand(1); break; } + case ISD::SHL: { + // If we are only demanding sign bits then we can use the shift source + // directly. + if (const APInt *MaxSA = + DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) { + SDValue Op0 = Op.getOperand(0); + unsigned ShAmt = MaxSA->getZExtValue(); + unsigned NumSignBits = + DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); + unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); + if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits)) + return Op0; + } + break; + } case ISD::SETCC: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -727,7 +759,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. if (DemandedBits.isSignMask() && - Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() && + Op0.getScalarValueSizeInBits() == BitWidth && getBooleanContents(Op0.getValueType()) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! @@ -742,9 +774,30 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( } case ISD::SIGN_EXTEND_INREG: { // If none of the extended bits are demanded, eliminate the sextinreg. + SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits()) - return Op.getOperand(0); + unsigned ExBits = ExVT.getScalarSizeInBits(); + if (DemandedBits.getActiveBits() <= ExBits) + return Op0; + // If the input is already sign extended, just drop the extension. + unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); + if (NumSignBits >= (BitWidth - ExBits + 1)) + return Op0; + break; + } + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: { + // If we only want the lowest element and none of extended bits, then we can + // return the bitcasted source vector. + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() && + DAG.getDataLayout().isLittleEndian() && + DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) { + return DAG.getBitcast(DstVT, Src); + } break; } case ISD::INSERT_VECTOR_ELT: { @@ -757,6 +810,16 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return Vec; break; } + case ISD::INSERT_SUBVECTOR: { + // If we don't demand the inserted subvector, return the base vector. + SDValue Vec = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + uint64_t Idx = Op.getConstantOperandVal(2); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + if (DemandedElts.extractBits(NumSubElts, Idx) == 0) + return Vec; + break; + } case ISD::VECTOR_SHUFFLE: { ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); @@ -790,6 +853,25 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return SDValue(); } +SDValue TargetLowering::SimplifyMultipleUseDemandedBits( + SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG, + unsigned Depth) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, + Depth); +} + +SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( + SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, + unsigned Depth) const { + APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); + return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, + Depth); +} + /// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -805,6 +887,15 @@ bool TargetLowering::SimplifyDemandedBits( assert(Op.getScalarValueSizeInBits() == BitWidth && "Mask size mismatches value type size!"); + // Don't know anything. + Known = KnownBits(BitWidth); + + // TODO: We can probably do more work on calculating the known bits and + // simplifying the operations for scalable vectors, but for now we just + // bail out. + if (Op.getValueType().isScalableVector()) + return false; + unsigned NumElts = OriginalDemandedElts.getBitWidth(); assert((!Op.getValueType().isVector() || NumElts == Op.getValueType().getVectorNumElements()) && @@ -815,9 +906,6 @@ bool TargetLowering::SimplifyDemandedBits( SDLoc dl(Op); auto &DL = TLO.DAG.getDataLayout(); - // Don't know anything. - Known = KnownBits(BitWidth); - // Undef operand. if (Op.isUndef()) return false; @@ -850,7 +938,7 @@ bool TargetLowering::SimplifyDemandedBits( return false; } - KnownBits Known2, KnownOut; + KnownBits Known2; switch (Op.getOpcode()) { case ISD::TargetConstant: llvm_unreachable("Can't simplify this node"); @@ -864,7 +952,11 @@ bool TargetLowering::SimplifyDemandedBits( APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth); if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1)) return true; - Known = SrcKnown.zextOrTrunc(BitWidth, false); + + // Upper elements are undef, so only get the knownbits if we just demand + // the bottom element. + if (DemandedElts == 1) + Known = SrcKnown.anyextOrTrunc(BitWidth); break; } case ISD::BUILD_VECTOR: @@ -877,6 +969,12 @@ bool TargetLowering::SimplifyDemandedBits( if (getTargetConstantFromLoad(LD)) { Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); return false; // Don't fall through, will infinitely loop. + } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + // If this is a ZEXTLoad and we are looking at the loaded value. + EVT MemVT = LD->getMemoryVT(); + unsigned MemBits = MemVT.getScalarSizeInBits(); + Known.Zero.setBitsFrom(MemBits); + return false; // Don't fall through, will infinitely loop. } break; } @@ -904,7 +1002,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1)) return true; - Known = KnownScl.zextOrTrunc(BitWidth, false); + Known = KnownScl.anyextOrTrunc(BitWidth); KnownBits KnownVec; if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO, @@ -919,57 +1017,75 @@ bool TargetLowering::SimplifyDemandedBits( return false; } case ISD::INSERT_SUBVECTOR: { - SDValue Base = Op.getOperand(0); + // Demand any elements from the subvector and the remainder from the src its + // inserted into. + SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - EVT SubVT = Sub.getValueType(); - unsigned NumSubElts = SubVT.getVectorNumElements(); - - // If index isn't constant, assume we need the original demanded base - // elements and ALL the inserted subvector elements. - APInt BaseElts = DemandedElts; - APInt SubElts = APInt::getAllOnesValue(NumSubElts); - if (isa<ConstantSDNode>(Op.getOperand(2))) { - const APInt &Idx = Op.getConstantOperandAPInt(2); - if (Idx.ule(NumElts - NumSubElts)) { - unsigned SubIdx = Idx.getZExtValue(); - SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); - BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); - } - } - - KnownBits KnownSub, KnownBase; - if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO, + uint64_t Idx = Op.getConstantOperandVal(2); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + + KnownBits KnownSub, KnownSrc; + if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO, Depth + 1)) return true; - if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO, + if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO, Depth + 1)) return true; Known.Zero.setAllBits(); Known.One.setAllBits(); - if (!!SubElts) { - Known.One &= KnownSub.One; - Known.Zero &= KnownSub.Zero; + if (!!DemandedSubElts) { + Known.One &= KnownSub.One; + Known.Zero &= KnownSub.Zero; } - if (!!BaseElts) { - Known.One &= KnownBase.One; - Known.Zero &= KnownBase.Zero; + if (!!DemandedSrcElts) { + Known.One &= KnownSrc.One; + Known.Zero &= KnownSrc.Zero; + } + + // Attempt to avoid multi-use src if we don't need anything from it. + if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() || + !DemandedSrcElts.isAllOnesValue()) { + SDValue NewSub = SimplifyMultipleUseDemandedBits( + Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1); + SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1); + if (NewSub || NewSrc) { + NewSub = NewSub ? NewSub : Sub; + NewSrc = NewSrc ? NewSrc : Src; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub, + Op.getOperand(2)); + return TLO.CombineTo(Op, NewOp); + } } break; } case ISD::EXTRACT_SUBVECTOR: { - // If index isn't constant, assume we need all the source vector elements. + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt SrcElts = APInt::getAllOnesValue(NumSrcElts); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - } - if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1)) + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + + if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO, + Depth + 1)) return true; + + // Attempt to avoid multi-use src if we don't need anything from it. + if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) { + SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1); + if (DemandedSrc) { + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, + Op.getOperand(1)); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::CONCAT_VECTORS: { @@ -1069,7 +1185,8 @@ bool TargetLowering::SimplifyDemandedBits( // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. - if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, + DemandedElts, TLO)) return true; // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its @@ -1117,16 +1234,14 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts, + TLO)) return true; // If the operation can be done in a smaller type, do so. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; - // Output known-1 bits are only known if set in both the LHS & RHS. - Known.One &= Known2.One; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - Known.Zero |= Known2.Zero; + Known &= Known2; break; } case ISD::OR: { @@ -1163,16 +1278,13 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) return TLO.CombineTo(Op, Op1); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; // If the operation can be done in a smaller type, do so. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; - // Output known-0 bits are only known if clear in both the LHS & RHS. - Known.Zero &= Known2.Zero; - // Output known-1 are known to be set if set in either the LHS | RHS. - Known.One |= Known2.One; + Known |= Known2; break; } case ISD::XOR: { @@ -1218,12 +1330,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); - - if (ConstantSDNode *C = isConstOrConstSplat(Op1)) { + ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts); + if (C) { // If one side is a constant, and all of the known set bits on the other // side are also set in the constant, turn this into an AND, as we know // the bits will be cleared. @@ -1238,19 +1346,20 @@ bool TargetLowering::SimplifyDemandedBits( // If the RHS is a constant, see if we can change it. Don't alter a -1 // constant because that's a 'not' op, and that is better for combining // and codegen. - if (!C->isAllOnesValue()) { - if (DemandedBits.isSubsetOf(C->getAPIntValue())) { - // We're flipping all demanded bits. Flip the undemanded bits too. - SDValue New = TLO.DAG.getNOT(dl, Op0, VT); - return TLO.CombineTo(Op, New); - } - // If we can't turn this into a 'not', try to shrink the constant. - if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) - return true; + if (!C->isAllOnesValue() && + DemandedBits.isSubsetOf(C->getAPIntValue())) { + // We're flipping all demanded bits. Flip the undemanded bits too. + SDValue New = TLO.DAG.getNOT(dl, Op0, VT); + return TLO.CombineTo(Op, New); } } - Known = std::move(KnownOut); + // If we can't turn this into a 'not', try to shrink the constant. + if (!C || !C->isAllOnesValue()) + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) + return true; + + Known ^= Known2; break; } case ISD::SELECT: @@ -1264,7 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -1282,7 +1391,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -1320,12 +1429,10 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::SHL: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + EVT ShiftVT = Op1.getValueType(); - if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { - // If the shift count is an invalid immediate, don't do anything. - if (SA->getAPIntValue().uge(BitWidth)) - break; - + if (const APInt *SA = + TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); if (ShAmt == 0) return TLO.CombineTo(Op, Op0); @@ -1336,37 +1443,25 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { - if (ConstantSDNode *SA2 = - isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { - if (SA2->getAPIntValue().ult(BitWidth)) { - unsigned C1 = SA2->getZExtValue(); - unsigned Opc = ISD::SHL; - int Diff = ShAmt - C1; - if (Diff < 0) { - Diff = -Diff; - Opc = ISD::SRL; - } - - SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType()); - return TLO.CombineTo( - Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); + if (const APInt *SA2 = + TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt - C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; } + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); } } } - if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, - Known, TLO, Depth + 1)) - return true; - - // Try shrinking the operation as long as the shift amount will still be - // in range. - if ((ShAmt < DemandedBits.getActiveBits()) && - ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) - return true; - // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits // are not demanded. This will likely allow the anyext to be folded away. + // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::ANY_EXTEND) { SDValue InnerOp = Op0.getOperand(0); EVT InnerVT = InnerOp.getValueType(); @@ -1382,22 +1477,24 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo( Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); } + // Repeat the SHL optimization above in cases where an extension // intervenes: (shl (anyext (shr x, c1)), c2) to // (shl (anyext x), c2-c1). This requires that the bottom c1 bits // aren't demanded (as above) and that the shifted upper c1 bits of // x aren't demanded. + // TODO - support non-uniform vector amounts. if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && InnerOp.hasOneUse()) { - if (ConstantSDNode *SA2 = - isConstOrConstSplat(InnerOp.getOperand(1))) { - unsigned InnerShAmt = SA2->getLimitedValue(InnerBits); + if (const APInt *SA2 = + TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) { + unsigned InnerShAmt = SA2->getZExtValue(); if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && DemandedBits.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && DemandedBits.countTrailingZeros() >= ShAmt) { - SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, - Op1.getValueType()); + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT); SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, InnerOp.getOperand(0)); return TLO.CombineTo( @@ -1407,60 +1504,76 @@ bool TargetLowering::SimplifyDemandedBits( } } + APInt InDemandedMask = DemandedBits.lshr(ShAmt); + if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero <<= ShAmt; Known.One <<= ShAmt; // low bits known zero. Known.Zero.setLowBits(ShAmt); + + // Try shrinking the operation as long as the shift amount will still be + // in range. + if ((ShAmt < DemandedBits.getActiveBits()) && + ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) + return true; + } + + // If we are only demanding sign bits then we can use the shift source + // directly. + if (const APInt *MaxSA = + TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) { + unsigned ShAmt = MaxSA->getZExtValue(); + unsigned NumSignBits = + TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); + unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); + if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits)) + return TLO.CombineTo(Op, Op0); } break; } case ISD::SRL: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + EVT ShiftVT = Op1.getValueType(); - if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { - // If the shift count is an invalid immediate, don't do anything. - if (SA->getAPIntValue().uge(BitWidth)) - break; - + if (const APInt *SA = + TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); if (ShAmt == 0) return TLO.CombineTo(Op, Op0); - EVT ShiftVT = Op1.getValueType(); - APInt InDemandedMask = (DemandedBits << ShAmt); - - // If the shift is exact, then it does demand the low bits (and knows that - // they are zero). - if (Op->getFlags().hasExact()) - InDemandedMask.setLowBits(ShAmt); - // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a // single shift. We can do this if the top bits (which are shifted out) // are never demanded. // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SA2 = - isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { - if (!DemandedBits.intersects( - APInt::getHighBitsSet(BitWidth, ShAmt))) { - if (SA2->getAPIntValue().ult(BitWidth)) { - unsigned C1 = SA2->getZExtValue(); - unsigned Opc = ISD::SRL; - int Diff = ShAmt - C1; - if (Diff < 0) { - Diff = -Diff; - Opc = ISD::SHL; - } - - SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); - return TLO.CombineTo( - Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); + if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) { + if (const APInt *SA2 = + TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt - C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; } + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); } } } + APInt InDemandedMask = (DemandedBits << ShAmt); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (Op->getFlags().hasExact()) + InDemandedMask.setLowBits(ShAmt); + // Compute the new bits that are at the top now. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) @@ -1468,14 +1581,22 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); - - Known.Zero.setHighBits(ShAmt); // High bits known zero. + // High bits known zero. + Known.Zero.setHighBits(ShAmt); } break; } case ISD::SRA: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + EVT ShiftVT = Op1.getValueType(); + + // If we only want bits that already match the signbit then we don't need + // to shift. + unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); + if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >= + NumHiDemandedBits) + return TLO.CombineTo(Op, Op0); // If this is an arithmetic shift right and only the low-bit is set, we can // always convert this into a logical shr, even if the shift amount is @@ -1484,11 +1605,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isOneValue()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); - if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { - // If the shift count is an invalid immediate, don't do anything. - if (SA->getAPIntValue().uge(BitWidth)) - break; - + if (const APInt *SA = + TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); if (ShAmt == 0) return TLO.CombineTo(Op, Op0); @@ -1525,14 +1643,23 @@ bool TargetLowering::SimplifyDemandedBits( int Log2 = DemandedBits.exactLogBase2(); if (Log2 >= 0) { // The bit must come from the sign. - SDValue NewSA = - TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType()); + SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA)); } if (Known.One[BitWidth - ShAmt - 1]) // New bits are known one. Known.One.setHighBits(ShAmt); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0) { + SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } } break; } @@ -1573,6 +1700,32 @@ bool TargetLowering::SimplifyDemandedBits( Known.One |= Known2.One; Known.Zero |= Known2.Zero; } + + // For pow-2 bitwidths we only demand the bottom modulo amt bits. + if (isPowerOf2_32(BitWidth)) { + APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1); + if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, + Known2, TLO, Depth + 1)) + return true; + } + break; + } + case ISD::ROTL: + case ISD::ROTR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + // If we're rotating an 0/-1 value, then it stays an 0/-1 value. + if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, Op0); + + // For pow-2 bitwidths we only demand the bottom modulo amt bits. + if (isPowerOf2_32(BitWidth)) { + APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1); + if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + } break; } case ISD::BITREVERSE: { @@ -1602,7 +1755,8 @@ bool TargetLowering::SimplifyDemandedBits( // If we only care about the highest bit, don't bother shifting right. if (DemandedBits.isSignMask()) { - unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0); + unsigned NumSignBits = + TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. @@ -1639,8 +1793,7 @@ bool TargetLowering::SimplifyDemandedBits( // If the input sign bit is known zero, convert this into a zero extension. if (Known.Zero[ExVTBits - 1]) - return TLO.CombineTo( - Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType())); + return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT)); APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits); if (Known.One[ExVTBits - 1]) { // Input sign bit known set @@ -1704,7 +1857,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); - Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zext(BitWidth); break; } case ISD::SIGN_EXTEND: @@ -1777,7 +1930,12 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); - Known = Known.zext(BitWidth, false /* => any extend */); + Known = Known.anyext(BitWidth); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } case ISD::TRUNCATE: { @@ -1886,7 +2044,7 @@ bool TargetLowering::SimplifyDemandedBits( Known = Known2; if (BitWidth > EltBitWidth) - Known = Known.zext(BitWidth, false /* => any extend */); + Known = Known.anyext(BitWidth); break; } case ISD::BITCAST: { @@ -2151,14 +2309,20 @@ bool TargetLowering::SimplifyDemandedVectorElts( APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, bool AssumeSingleUse) const { EVT VT = Op.getValueType(); + unsigned Opcode = Op.getOpcode(); APInt DemandedElts = OriginalDemandedElts; unsigned NumElts = DemandedElts.getBitWidth(); assert(VT.isVector() && "Expected vector op"); - assert(VT.getVectorNumElements() == NumElts && - "Mask size mismatches value type element count!"); KnownUndef = KnownZero = APInt::getNullValue(NumElts); + // TODO: For now we assume we know nothing about scalable vectors. + if (VT.isScalableVector()) + return false; + + assert(VT.getVectorNumElements() == NumElts && + "Mask size mismatches value type element count!"); + // Undef operand. if (Op.isUndef()) { KnownUndef.setAllBits(); @@ -2182,7 +2346,22 @@ bool TargetLowering::SimplifyDemandedVectorElts( SDLoc DL(Op); unsigned EltSizeInBits = VT.getScalarSizeInBits(); - switch (Op.getOpcode()) { + // Helper for demanding the specified elements and all the bits of both binary + // operands. + auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) { + SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts, + TLO.DAG, Depth + 1); + SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts, + TLO.DAG, Depth + 1); + if (NewOp0 || NewOp1) { + SDValue NewOp = TLO.DAG.getNode( + Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1); + return TLO.CombineTo(Op, NewOp); + } + return false; + }; + + switch (Opcode) { case ISD::SCALAR_TO_VECTOR: { if (!DemandedElts[0]) { KnownUndef.setAllBits(); @@ -2234,7 +2413,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( } KnownBits Known; - if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known, + TLO, Depth + 1)) return true; } @@ -2323,53 +2503,75 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::INSERT_SUBVECTOR: { - if (!isa<ConstantSDNode>(Op.getOperand(2))) - break; - SDValue Base = Op.getOperand(0); + // Demand any elements from the subvector and the remainder from the src its + // inserted into. + SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - EVT SubVT = Sub.getValueType(); - unsigned NumSubElts = SubVT.getVectorNumElements(); - const APInt &Idx = Op.getConstantOperandAPInt(2); - if (Idx.ugt(NumElts - NumSubElts)) - break; - unsigned SubIdx = Idx.getZExtValue(); - APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); + uint64_t Idx = Op.getConstantOperandVal(2); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + APInt SubUndef, SubZero; - if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO, + if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO, Depth + 1)) return true; - APInt BaseElts = DemandedElts; - BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); - - // If none of the base operand elements are demanded, replace it with undef. - if (!BaseElts && !Base.isUndef()) - return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - TLO.DAG.getUNDEF(VT), - Op.getOperand(1), - Op.getOperand(2))); - - if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, - Depth + 1)) + + // If none of the src operand elements are demanded, replace it with undef. + if (!DemandedSrcElts && !Src.isUndef()) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + TLO.DAG.getUNDEF(VT), Sub, + Op.getOperand(2))); + + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero, + TLO, Depth + 1)) return true; - KnownUndef.insertBits(SubUndef, SubIdx); - KnownZero.insertBits(SubZero, SubIdx); + KnownUndef.insertBits(SubUndef, Idx); + KnownZero.insertBits(SubZero, Idx); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedSrcElts.isAllOnesValue() || + !DemandedSubElts.isAllOnesValue()) { + SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( + Src, DemandedSrcElts, TLO.DAG, Depth + 1); + SDValue NewSub = SimplifyMultipleUseDemandedVectorElts( + Sub, DemandedSubElts, TLO.DAG, Depth + 1); + if (NewSrc || NewSub) { + NewSrc = NewSrc ? NewSrc : Src; + NewSub = NewSub ? NewSub : Sub; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc, + NewSub, Op.getOperand(2)); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::EXTRACT_SUBVECTOR: { + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, - Depth + 1)) - return true; - KnownUndef = SrcUndef.extractBits(NumElts, Idx); - KnownZero = SrcZero.extractBits(NumElts, Idx); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) + return true; + KnownUndef = SrcUndef.extractBits(NumElts, Idx); + KnownZero = SrcZero.extractBits(NumElts, Idx); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedElts.isAllOnesValue()) { + SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( + Src, DemandedSrcElts, TLO.DAG, Depth + 1); + if (NewSrc) { + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc, + Op.getOperand(1)); + return TLO.CombineTo(Op, NewOp); + } } break; } @@ -2538,7 +2740,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } - // TODO: There are more binop opcodes that could be handled here - MUL, MIN, + // TODO: There are more binop opcodes that could be handled here - MIN, // MAX, saturated math, etc. case ISD::OR: case ISD::XOR: @@ -2549,17 +2751,26 @@ bool TargetLowering::SimplifyDemandedVectorElts( case ISD::FMUL: case ISD::FDIV: case ISD::FREM: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; APInt UndefLHS, ZeroLHS; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; KnownZero = ZeroLHS & ZeroRHS; KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS); + + // Attempt to avoid multi-use ops if we don't need anything from them. + // TODO - use KnownUndef to relax the demandedelts? + if (!DemandedElts.isAllOnesValue()) + if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) + return true; break; } case ISD::SHL: @@ -2567,27 +2778,39 @@ bool TargetLowering::SimplifyDemandedVectorElts( case ISD::SRA: case ISD::ROTL: case ISD::ROTR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; APInt UndefLHS, ZeroLHS; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; KnownZero = ZeroLHS; KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop? + + // Attempt to avoid multi-use ops if we don't need anything from them. + // TODO - use KnownUndef to relax the demandedelts? + if (!DemandedElts.isAllOnesValue()) + if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) + return true; break; } case ISD::MUL: case ISD::AND: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, - KnownZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero, + TLO, Depth + 1)) return true; // If either side has a zero element, then the result element is zero, even @@ -2597,6 +2820,12 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero |= SrcZero; KnownUndef &= SrcUndef; KnownUndef &= ~KnownZero; + + // Attempt to avoid multi-use ops if we don't need anything from them. + // TODO - use KnownUndef to relax the demandedelts? + if (!DemandedElts.isAllOnesValue()) + if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) + return true; break; } case ISD::TRUNCATE: @@ -2661,17 +2890,16 @@ void TargetLowering::computeKnownBitsForTargetInstr( Known.resetAll(); } -void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const { - assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex"); +void TargetLowering::computeKnownBitsForFrameIndex( + const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const { + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx))); +} - if (unsigned Align = DAG.InferPtrAlignment(Op)) { - // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2_32(Align)); - } +Align TargetLowering::computeKnownAlignForTargetInstr( + GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, + unsigned Depth) const { + return Align(1); } /// This method can be implemented by targets that want to expose additional @@ -2689,6 +2917,12 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } +unsigned TargetLowering::computeNumSignBitsForTargetInstr( + GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, + const MachineRegisterInfo &MRI, unsigned Depth) const { + return 1; +} + bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth) const { @@ -3788,33 +4022,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the comparison operands is infinity or negative infinity, convert the // condition to a less-awkward <= or >=. if (CFP->getValueAPF().isInfinity()) { - if (CFP->getValueAPF().isNegative()) { - if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); - if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); - if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); - if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); - } else { - if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); - if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); - if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); - if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); + bool IsNegInf = CFP->getValueAPF().isNegative(); + ISD::CondCode NewCond = ISD::SETCC_INVALID; + switch (Cond) { + case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break; + case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break; + case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break; + case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break; + default: break; } + if (NewCond != ISD::SETCC_INVALID && + isCondCodeLegal(NewCond, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); } } } @@ -4245,10 +4464,10 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, - ImmutableCallSite CS) const { + const CallBase &Call) const { /// Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; - const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); unsigned maCount = 0; // Largest number of multiple alternative constraints. // Do a prepass over the constraints, canonicalizing them, and building up the @@ -4271,25 +4490,24 @@ TargetLowering::ParseConstraints(const DataLayout &DL, case InlineAsm::isOutput: // Indirect outputs just consume an argument. if (OpInfo.isIndirect) { - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); break; } // The return value of the call is this value. As such, there is no // corresponding argument. - assert(!CS.getType()->isVoidTy() && - "Bad inline asm!"); - if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(Call.getType())) { OpInfo.ConstraintVT = getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType()); + OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType()); } ++ResNo; break; case InlineAsm::isInput: - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); break; case InlineAsm::isClobber: // Nothing to do. @@ -5479,152 +5697,79 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { return false; } -char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth) const { +SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOps, bool OptForSize, + NegatibleCost &Cost, + unsigned Depth) const { // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return 2; + if (Op.getOpcode() == ISD::FNEG) { + Cost = NegatibleCost::Cheaper; + return Op.getOperand(0); + } - // Don't allow anything with multiple uses unless we know it is free. - EVT VT = Op.getValueType(); + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return SDValue(); + + // Pre-increment recursion depth for use in recursive calls. + ++Depth; const SDNodeFlags Flags = Op->getFlags(); const TargetOptions &Options = DAG.getTarget().Options; - if (!Op.hasOneUse()) { - bool IsFreeExtend = Op.getOpcode() == ISD::FP_EXTEND && - isFPExtFree(VT, Op.getOperand(0).getValueType()); - - // If we already have the use of the negated floating constant, it is free - // to negate it even it has multiple uses. - bool IsFreeConstant = - Op.getOpcode() == ISD::ConstantFP && - !getNegatedExpression(Op, DAG, LegalOperations, ForCodeSize) - .use_empty(); + EVT VT = Op.getValueType(); + unsigned Opcode = Op.getOpcode(); - if (!IsFreeExtend && !IsFreeConstant) - return 0; + // Don't allow anything with multiple uses unless we know it is free. + if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) { + bool IsFreeExtend = Opcode == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType()); + if (!IsFreeExtend) + return SDValue(); } - // Don't recurse exponentially. - if (Depth > SelectionDAG::MaxRecursionDepth) - return 0; + SDLoc DL(Op); - switch (Op.getOpcode()) { + switch (Opcode) { case ISD::ConstantFP: { - if (!LegalOperations) - return 1; - // Don't invert constant FP values after legalization unless the target says // the negated constant is legal. - return isOperationLegal(ISD::ConstantFP, VT) || - isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, - ForCodeSize); + bool IsOpLegal = + isOperationLegal(ISD::ConstantFP, VT) || + isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, + OptForSize); + + if (LegalOps && !IsOpLegal) + break; + + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + SDValue CFP = DAG.getConstantFP(V, DL, VT); + + // If we already have the use of the negated floating constant, it is free + // to negate it even it has multiple uses. + if (!Op.hasOneUse() && CFP.use_empty()) + break; + Cost = NegatibleCost::Neutral; + return CFP; } case ISD::BUILD_VECTOR: { // Only permit BUILD_VECTOR of constants. if (llvm::any_of(Op->op_values(), [&](SDValue N) { return !N.isUndef() && !isa<ConstantFPSDNode>(N); })) - return 0; - if (!LegalOperations) - return 1; - if (isOperationLegal(ISD::ConstantFP, VT) && - isOperationLegal(ISD::BUILD_VECTOR, VT)) - return 1; - return llvm::all_of(Op->op_values(), [&](SDValue N) { - return N.isUndef() || - isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, - ForCodeSize); - }); - } - case ISD::FADD: - if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT)) - return 0; - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1)) - return V; - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - case ISD::FSUB: - // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return 1; - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1)) - return V; - - // Ignore X * 2.0 because that is expected to be canonicalized to X + X. - if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) - if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) - return 0; - - return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - - case ISD::FMA: - case ISD::FMAD: { - if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) - // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) - char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations, - ForCodeSize, Depth + 1); - if (!V2) - return 0; - - // One of Op0/Op1 must be cheaply negatible, then select the cheapest. - char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); - char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - char V01 = std::max(V0, V1); - return V01 ? std::max(V01, V2) : 0; - } - - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); - } - - return 0; -} + break; -SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, - bool ForCodeSize, - unsigned Depth) const { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return Op.getOperand(0); + bool IsOpLegal = + (isOperationLegal(ISD::ConstantFP, VT) && + isOperationLegal(ISD::BUILD_VECTOR, VT)) || + llvm::all_of(Op->op_values(), [&](SDValue N) { + return N.isUndef() || + isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, + OptForSize); + }); - assert(Depth <= SelectionDAG::MaxRecursionDepth && - "getNegatedExpression doesn't match isNegatibleForFree"); - const SDNodeFlags Flags = Op->getFlags(); + if (LegalOps && !IsOpLegal) + break; - switch (Op.getOpcode()) { - case ISD::ConstantFP: { - APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); - V.changeSign(); - return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); - } - case ISD::BUILD_VECTOR: { SmallVector<SDValue, 4> Ops; for (SDValue C : Op->op_values()) { if (C.isUndef()) { @@ -5633,101 +5778,140 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, } APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); V.changeSign(); - Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType())); } - return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); + Cost = NegatibleCost::Neutral; + return DAG.getBuildVector(VT, DL, Ops); } - case ISD::FADD: - assert((DAG.getTarget().Options.NoSignedZerosFPMath || - Flags.hasNoSignedZeros()) && - "Expected NSZ fp-flag"); - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth + 1)) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(0), Flags); - case ISD::FSUB: - // fold (fneg (fsub 0, B)) -> B - if (ConstantFPSDNode *N0CFP = - isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) - if (N0CFP->isZero()) - return Op.getOperand(1); + case ISD::FADD: { + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + break; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT)) + break; + SDValue X = Op.getOperand(0), Y = Op.getOperand(1); + + // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y) + NegatibleCost CostX = NegatibleCost::Expensive; + SDValue NegX = + getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X) + NegatibleCost CostY = NegatibleCost::Expensive; + SDValue NegY = + getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + + // Negate the X if its cost is less or equal than Y. + if (NegX && (CostX <= CostY)) { + Cost = CostX; + return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags); + } + + // Negate the Y if it is not expensive. + if (NegY) { + Cost = CostY; + return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags); + } + break; + } + case ISD::FSUB: { + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + break; - // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0), Flags); + SDValue X = Op.getOperand(0), Y = Op.getOperand(1); + // fold (fneg (fsub 0, Y)) -> Y + if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true)) + if (C->isZero()) { + Cost = NegatibleCost::Cheaper; + return Y; + } + // fold (fneg (fsub X, Y)) -> (fsub Y, X) + Cost = NegatibleCost::Neutral; + return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags); + } case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth + 1)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); + case ISD::FDIV: { + SDValue X = Op.getOperand(0), Y = Op.getOperand(1); + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + NegatibleCost CostX = NegatibleCost::Expensive; + SDValue NegX = + getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode( - Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), - getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1), - Flags); + NegatibleCost CostY = NegatibleCost::Expensive; + SDValue NegY = + getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + + // Negate the X if its cost is less or equal than Y. + if (NegX && (CostX <= CostY)) { + Cost = CostX; + return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags); + } + // Ignore X * 2.0 because that is expected to be canonicalized to X + X. + if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) + if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) + break; + + // Negate the Y if it is not expensive. + if (NegY) { + Cost = CostY; + return DAG.getNode(Opcode, DL, VT, X, NegY, Flags); + } + break; + } case ISD::FMA: case ISD::FMAD: { - assert((DAG.getTarget().Options.NoSignedZerosFPMath || - Flags.hasNoSignedZeros()) && - "Expected NSZ fp-flag"); + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + break; - SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations, - ForCodeSize, Depth + 1); + SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2); + NegatibleCost CostZ = NegatibleCost::Expensive; + SDValue NegZ = + getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth); + // Give up if fail to negate the Z. + if (!NegZ) + break; + + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + NegatibleCost CostX = NegatibleCost::Expensive; + SDValue NegX = + getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + NegatibleCost CostY = NegatibleCost::Expensive; + SDValue NegY = + getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); - char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); - char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - if (V0 > V1) { - // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) - SDValue Neg0 = getNegatedExpression( - Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0, - Op.getOperand(1), Neg2, Flags); + // Negate the X if its cost is less or equal than Y. + if (NegX && (CostX <= CostY)) { + Cost = std::min(CostX, CostZ); + return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags); } - // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) - SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - Op.getOperand(0), Neg1, Neg2, Flags); + // Negate the Y if it is not expensive. + if (NegY) { + Cost = std::min(CostY, CostZ); + return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags); + } + break; } case ISD::FP_EXTEND: case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1)); + if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps, + OptForSize, Cost, Depth)) + return DAG.getNode(Opcode, DL, VT, NegV); + break; case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1)); + if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps, + OptForSize, Cost, Depth)) + return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1)); + break; } - llvm_unreachable("Unknown code"); + return SDValue(); } //===----------------------------------------------------------------------===// @@ -5933,6 +6117,14 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, return Ok; } +// Check that (every element of) Z is undef or not an exact multiple of BW. +static bool isNonZeroModBitWidth(SDValue Z, unsigned BW) { + return ISD::matchUnaryPredicate( + Z, + [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; }, + true); +} + bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); @@ -5943,41 +6135,54 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) return false; - // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) - // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) SDValue X = Node->getOperand(0); SDValue Y = Node->getOperand(1); SDValue Z = Node->getOperand(2); - unsigned EltSizeInBits = VT.getScalarSizeInBits(); + unsigned BW = VT.getScalarSizeInBits(); bool IsFSHL = Node->getOpcode() == ISD::FSHL; SDLoc DL(SDValue(Node, 0)); EVT ShVT = Z.getValueType(); - SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); - SDValue Zero = DAG.getConstant(0, DL, ShVT); - SDValue ShAmt; - if (isPowerOf2_32(EltSizeInBits)) { - SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); - ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask); - } else { + SDValue ShX, ShY; + SDValue ShAmt, InvShAmt; + if (isNonZeroModBitWidth(Z, BW)) { + // fshl: X << C | Y >> (BW - C) + // fshr: X << (BW - C) | Y >> C + // where C = Z % BW is not zero + SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT); ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC); - } - - SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt); - SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt); - SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt); - SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY); - - // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, - // and that is undefined. We must compare and select to avoid UB. - EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT); + InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt); + ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt); + ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt); + } else { + // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) + // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) + SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT); + if (isPowerOf2_32(BW)) { + // Z % BW -> Z & (BW - 1) + ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask); + // (BW - 1) - (Z % BW) -> ~Z & (BW - 1) + InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask); + } else { + SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT); + ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC); + InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt); + } - // For fshl, 0-shift returns the 1st arg (X). - // For fshr, 0-shift returns the 2nd arg (Y). - SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ); - Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or); + SDValue One = DAG.getConstant(1, DL, ShVT); + if (IsFSHL) { + ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt); + SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One); + ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt); + } else { + SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One); + ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt); + ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt); + } + } + Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY); return true; } @@ -5992,12 +6197,15 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, SDLoc DL(SDValue(Node, 0)); EVT ShVT = Op1.getValueType(); - SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); + SDValue Zero = DAG.getConstant(0, DL, ShVT); + + assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 && + "Expecting the type bitwidth to be a power of 2"); - // If a rotate in the other direction is legal, use it. + // If a rotate in the other direction is supported, use it. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; - if (isOperationLegal(RevRot, VT)) { - SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1); + if (isOperationLegalOrCustom(RevRot, VT)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); Result = DAG.getNode(RevRot, DL, VT, Op0, Sub); return true; } @@ -6010,15 +6218,13 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, return false; // Otherwise, - // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) - // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) + // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1))) + // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1))) // - assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 && - "Expecting the type bitwidth to be a power of 2"); unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); - SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1); + SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC); SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC); Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0), @@ -6202,114 +6408,50 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); - if (SrcVT.getScalarType() != MVT::i64) + if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64) + return false; + + // Only expand vector types if we have the appropriate vector bit operations. + if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || + !isOperationLegalOrCustom(ISD::FADD, DstVT) || + !isOperationLegalOrCustom(ISD::FSUB, DstVT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) return false; SDLoc dl(SDValue(Node, 0)); EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout()); - if (DstVT.getScalarType() == MVT::f32) { - // Only expand vector types if we have the appropriate vector bit - // operations. - if (SrcVT.isVector() && - (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || - !isOperationLegalOrCustom(ISD::FADD, DstVT) || - !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) || - !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || - !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) - return false; - - // For unsigned conversions, convert them to signed conversions using the - // algorithm from the x86_64 __floatundisf in compiler_rt. - - // TODO: This really should be implemented using a branch rather than a - // select. We happen to get lucky and machinesink does the right - // thing most of the time. This would be a good candidate for a - // pseudo-op, or, even better, for whole-function isel. - EVT SetCCVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); - - SDValue SignBitTest = DAG.getSetCC( - dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); - - SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); - SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); - SDValue AndConst = DAG.getConstant(1, dl, SrcVT); - SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); - SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); - - SDValue Slow, Fast; - if (Node->isStrictFPOpcode()) { - // In strict mode, we must avoid spurious exceptions, and therefore - // must make sure to only emit a single STRICT_SINT_TO_FP. - SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src); - Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other }, - { Node->getOperand(0), InCvt }); - Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other }, - { Fast.getValue(1), Fast, Fast }); - Chain = Slow.getValue(1); - // The STRICT_SINT_TO_FP inherits the exception mode from the - // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can - // never raise any exception. - SDNodeFlags Flags; - Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept()); - Fast->setFlags(Flags); - Flags.setNoFPExcept(true); - Slow->setFlags(Flags); - } else { - SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); - Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); - Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); - } - - Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast); - return true; - } - - if (DstVT.getScalarType() == MVT::f64) { - // Only expand vector types if we have the appropriate vector bit - // operations. - if (SrcVT.isVector() && - (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || - !isOperationLegalOrCustom(ISD::FADD, DstVT) || - !isOperationLegalOrCustom(ISD::FSUB, DstVT) || - !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || - !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) - return false; - - // Implementation of unsigned i64 to f64 following the algorithm in - // __floatundidf in compiler_rt. This implementation has the advantage - // of performing rounding correctly, both in the default rounding mode - // and in all alternate rounding modes. - SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); - SDValue TwoP84PlusTwoP52 = DAG.getConstantFP( - BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT); - SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); - SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); - SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); - - SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask); - SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift); - SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52); - SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); - SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); - SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); - if (Node->isStrictFPOpcode()) { - SDValue HiSub = - DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other}, - {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52}); - Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, - {HiSub.getValue(1), LoFlt, HiSub}); - Chain = Result.getValue(1); - } else { - SDValue HiSub = - DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); - Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); - } - return true; + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); + SDValue TwoP84PlusTwoP52 = DAG.getConstantFP( + BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT); + SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); + SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); + SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); + + SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask); + SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift); + SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); + SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); + SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); + if (Node->isStrictFPOpcode()) { + SDValue HiSub = + DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other}, + {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52}); + Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, + {HiSub.getValue(1), LoFlt, HiSub}); + Chain = Result.getValue(1); + } else { + SDValue HiSub = + DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); } - - return false; + return true; } SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, @@ -6568,12 +6710,61 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SDValue Chain = LD->getChain(); SDValue BasePTR = LD->getBasePtr(); EVT SrcVT = LD->getMemoryVT(); + EVT DstVT = LD->getValueType(0); ISD::LoadExtType ExtType = LD->getExtensionType(); unsigned NumElem = SrcVT.getVectorNumElements(); EVT SrcEltVT = SrcVT.getScalarType(); - EVT DstEltVT = LD->getValueType(0).getScalarType(); + EVT DstEltVT = DstVT.getScalarType(); + + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!SrcEltVT.isByteSized()) { + unsigned NumLoadBits = SrcVT.getStoreSizeInBits(); + EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits); + + unsigned NumSrcBits = SrcVT.getSizeInBits(); + EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits); + + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant( + APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT); + + // Load the whole vector and avoid masking off the top bits as it makes + // the codegen worse. + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR, + LD->getPointerInfo(), SrcIntVT, LD->getAlignment(), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + + SmallVector<SDValue, 8> Vals; + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + unsigned ShiftIntoIdx = + (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); + SDValue ShiftAmount = + DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(), + LoadVT, SL, /*LegalTypes=*/false); + SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount); + SDValue Elt = + DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask); + SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt); + + if (ExtType != ISD::NON_EXTLOAD) { + unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType); + Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar); + } + + Vals.push_back(Scalar); + } + + SDValue Value = DAG.getBuildVector(DstVT, SL, Vals); + return std::make_pair(Value, Load.getValue(1)); + } unsigned Stride = SrcEltVT.getSizeInBits() / 8; assert(SrcEltVT.isByteSized()); @@ -6595,7 +6786,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, } SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); - SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals); + SDValue Value = DAG.getBuildVector(DstVT, SL, Vals); return std::make_pair(Value, NewChain); } @@ -6616,7 +6807,6 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // The type of data as saved in memory. EVT MemSclVT = StVT.getScalarType(); - EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); unsigned NumElem = StVT.getVectorNumElements(); // A vector must always be stored in memory as-is, i.e. without any padding @@ -6633,7 +6823,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, for (unsigned Idx = 0; Idx < NumElem; ++Idx) { SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, - DAG.getConstant(Idx, SL, IdxVT)); + DAG.getVectorIdxConstant(Idx, SL)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt); SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc); unsigned ShiftIntoIdx = @@ -6658,7 +6848,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < NumElem; ++Idx) { SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, - DAG.getConstant(Idx, SL, IdxVT)); + DAG.getVectorIdxConstant(Idx, SL)); SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride); @@ -7317,12 +7507,13 @@ SDValue TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const { - assert((Opcode == ISD::SDIVFIX || - Opcode == ISD::UDIVFIX) && + assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT || + Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) && "Expected a fixed point division opcode"); EVT VT = LHS.getValueType(); - bool Signed = Opcode == ISD::SDIVFIX; + bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT; + bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT; EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); // If there is enough room in the type to upscale the LHS or downscale the @@ -7334,7 +7525,15 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, : DAG.computeKnownBits(LHS).countMinLeadingZeros(); unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros(); - if (LHSLead + RHSTrail < Scale) + // For signed saturating operations, we need to be able to detect true integer + // division overflow; that is, when you have MIN / -EPS. However, this + // is undefined behavior and if we emit divisions that could take such + // values it may cause undesired behavior (arithmetic exceptions on x86, for + // example). + // Avoid this by requiring an extra bit so that we never get this case. + // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale + // signed saturating division, we need to emit a whopping 32-bit division. + if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed)) return SDValue(); unsigned LHSShift = std::min(LHSLead, Scale); @@ -7388,8 +7587,6 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, Quot = DAG.getNode(ISD::UDIV, dl, VT, LHS, RHS); - // TODO: Saturation. - return Quot; } @@ -7663,3 +7860,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res); return Res; } + +bool TargetLowering::expandREM(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getValueType(0); + SDLoc dl(Node); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + SDValue Dividend = Node->getOperand(0); + SDValue Divisor = Node->getOperand(1); + if (isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1); + return true; + } else if (isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor); + SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor); + Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul); + return true; + } + return false; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp index 85dd4f59fa13..ce43fb1fbd4b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -494,17 +494,15 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { "EH Funclets are not supported yet.", MBB.front().getDebugLoc(), &MBB); - if (MBB.isEHPad()) { - // Push the prologue and epilogue outside of - // the region that may throw by making sure - // that all the landing pads are at least at the - // boundary of the save and restore points. - // The problem with exceptions is that the throw - // is not properly modeled and in particular, a - // basic block can jump out from the middle. + if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) { + // Push the prologue and epilogue outside of the region that may throw (or + // jump out via inlineasm_br), by making sure that all the landing pads + // are at least at the boundary of the save and restore points. The + // problem is that a basic block can jump out from the middle in these + // cases, which we do not handle. updateSaveRestorePoints(MBB, RS.get()); if (!ArePointsInteresting()) { - LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n"); + LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n"); return false; } continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 4abf9ea41b65..0683058f177e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -27,6 +27,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -37,6 +38,7 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { class SjLjEHPrepare : public FunctionPass { + IntegerType *DataTy; Type *doubleUnderDataTy; Type *doubleUnderJBufTy; Type *FunctionContextTy; @@ -50,10 +52,12 @@ class SjLjEHPrepare : public FunctionPass { Function *CallSiteFn; Function *FuncCtxFn; AllocaInst *FuncCtx; + const TargetMachine *TM; public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare() : FunctionPass(ID) {} + explicit SjLjEHPrepare(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) {} bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; @@ -77,23 +81,28 @@ INITIALIZE_PASS(SjLjEHPrepare, DEBUG_TYPE, "Prepare SjLj exceptions", false, false) // Public Interface To the SjLjEHPrepare pass. -FunctionPass *llvm::createSjLjEHPreparePass() { return new SjLjEHPrepare(); } +FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) { + return new SjLjEHPrepare(TM); +} + // doInitialization - Set up decalarations and types needed to process // exceptions. bool SjLjEHPrepare::doInitialization(Module &M) { // Build the function context structure. // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - Type *Int32Ty = Type::getInt32Ty(M.getContext()); - doubleUnderDataTy = ArrayType::get(Int32Ty, 4); + unsigned DataBits = + TM ? TM->getSjLjDataSize() : TargetMachine::DefaultSjLjDataSize; + DataTy = Type::getIntNTy(M.getContext(), DataBits); + doubleUnderDataTy = ArrayType::get(DataTy, 4); doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5); FunctionContextTy = StructType::get(VoidPtrTy, // __prev - Int32Ty, // call_site + DataTy, // call_site doubleUnderDataTy, // __data VoidPtrTy, // __personality VoidPtrTy, // __lsda doubleUnderJBufTy // __jbuf - ); + ); return true; } @@ -112,8 +121,7 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site"); // Insert a store of the call-site number - ConstantInt *CallSiteNoC = - ConstantInt::get(Type::getInt32Ty(I->getContext()), Number); + ConstantInt *CallSiteNoC = ConstantInt::get(DataTy, Number); Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/); } @@ -128,7 +136,6 @@ static void MarkBlocksLiveIn(BasicBlock *BB, for (BasicBlock *B : inverse_depth_first_ext(BB, Visited)) LiveBBs.insert(B); - } /// substituteLPadValues - Substitute the values returned by the landingpad @@ -190,16 +197,18 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Type *Int32Ty = Type::getInt32Ty(F.getContext()); Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, 0, 0, "exception_gep"); - Value *ExnVal = Builder.CreateLoad(Int32Ty, ExceptionAddr, true, "exn_val"); + Value *ExnVal = Builder.CreateLoad(DataTy, ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, 0, 1, "exn_selector_gep"); Value *SelVal = - Builder.CreateLoad(Int32Ty, SelectorAddr, true, "exn_selector_val"); + Builder.CreateLoad(DataTy, SelectorAddr, true, "exn_selector_val"); + + // SelVal must be Int32Ty, so trunc it + SelVal = Builder.CreateTrunc(SelVal, Type::getInt32Ty(F.getContext())); substituteLPadValues(LPI, ExnVal, SelVal); } @@ -457,8 +466,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); StackAddr->insertAfter(&I); - Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); - StoreStackAddr->insertAfter(StackAddr); + new StoreInst(StackAddr, StackPtr, true, StackAddr->getNextNode()); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp index 6664b58eccf8..d2bfdc663edb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp @@ -112,9 +112,10 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { return false; } -void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI) { - assert(!MI.isBundledWithPred() && - "Use removeSingleMachineInstrFromMaps() instread"); +void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI, + bool AllowBundled) { + assert((AllowBundled || !MI.isBundledWithPred()) && + "Use removeSingleMachineInstrFromMaps() instead"); Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI); if (mi2iItr == mi2iMap.end()) return; @@ -141,7 +142,7 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) { // instruction. if (MI.isBundledWithSucc()) { // Only the first instruction of a bundle should have an index assigned. - assert(!MI.isBundledWithPred() && "Should have first bundle isntruction"); + assert(!MI.isBundledWithPred() && "Should be first bundle instruction"); MachineBasicBlock::instr_iterator Next = std::next(MI.getIterator()); MachineInstr &NextMI = *Next; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Spiller.h b/contrib/llvm-project/llvm/lib/CodeGen/Spiller.h deleted file mode 100644 index 66dabf78f873..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGen/Spiller.h +++ /dev/null @@ -1,43 +0,0 @@ -//===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_SPILLER_H -#define LLVM_LIB_CODEGEN_SPILLER_H - -namespace llvm { - -class LiveRangeEdit; -class MachineFunction; -class MachineFunctionPass; -class VirtRegMap; - - /// Spiller interface. - /// - /// Implementations are utility classes which insert spill or remat code on - /// demand. - class Spiller { - virtual void anchor(); - - public: - virtual ~Spiller() = 0; - - /// spill - Spill the LRE.getParent() live interval. - virtual void spill(LiveRangeEdit &LRE) = 0; - - virtual void postOptimization() {} - }; - - /// Create and return a spiller that will insert spill code directly instead - /// of deferring though VirtRegMap. - Spiller *createInlineSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm); - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_SPILLER_H diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp index 0c1f1220c421..8dec620536a7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp @@ -19,9 +19,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -79,10 +80,15 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num]; SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB); - SmallVector<const MachineBasicBlock *, 1> EHPadSuccessors; - for (const MachineBasicBlock *SMBB : MBB.successors()) - if (SMBB->isEHPad()) - EHPadSuccessors.push_back(SMBB); + SmallVector<const MachineBasicBlock *, 1> ExceptionalSuccessors; + bool EHPadSuccessor = false; + for (const MachineBasicBlock *SMBB : MBB.successors()) { + if (SMBB->isEHPad()) { + ExceptionalSuccessors.push_back(SMBB); + EHPadSuccessor = true; + } else if (SMBB->isInlineAsmBrIndirectTarget()) + ExceptionalSuccessors.push_back(SMBB); + } // Compute insert points on the first call. The pair is independent of the // current live interval. @@ -93,15 +99,17 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, else LIP.first = LIS.getInstructionIndex(*FirstTerm); - // If there is a landing pad successor, also find the call instruction. - if (EHPadSuccessors.empty()) + // If there is a landing pad or inlineasm_br successor, also find the + // instruction. If there is no such instruction, we don't need to do + // anything special. We assume there cannot be multiple instructions that + // are Calls with EHPad successors or INLINEASM_BR in a block. Further, we + // assume that if there are any, they will be after any other call + // instructions in the block. + if (ExceptionalSuccessors.empty()) return LIP.first; - // There may not be a call instruction (?) in which case we ignore LPad. - LIP.second = LIP.first; - for (MachineBasicBlock::const_iterator I = MBB.end(), E = MBB.begin(); - I != E;) { - --I; - if (I->isCall()) { + for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) { + if ((EHPadSuccessor && I->isCall()) || + I->getOpcode() == TargetOpcode::INLINEASM_BR) { LIP.second = LIS.getInstructionIndex(*I); break; } @@ -113,7 +121,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, if (!LIP.second) return LIP.first; - if (none_of(EHPadSuccessors, [&](const MachineBasicBlock *EHPad) { + if (none_of(ExceptionalSuccessors, [&](const MachineBasicBlock *EHPad) { return LIS.isLiveInToMBB(CurLI, EHPad); })) return LIP.first; @@ -379,11 +387,11 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { RegAssign.clear(); Values.clear(); - // Reset the LiveRangeCalc instances needed for this spill mode. - LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + // Reset the LiveIntervalCalc instances needed for this spill mode. + LICalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); if (SpillMode) - LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + LICalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); // We don't need an AliasAnalysis since we will only be performing @@ -832,7 +840,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && "Range cannot span basic blocks"); - // The complement interval will be extended as needed by LRCalc.extend(). + // The complement interval will be extended as needed by LICalc.extend(). if (ParentVNI) forceRecompute(0, *ParentVNI); LLVM_DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); @@ -1118,7 +1126,7 @@ void SplitEditor::hoistCopies() { } /// transferValues - Transfer all possible values to the new live ranges. -/// Values that were rematerialized are left alone, they need LRCalc.extend(). +/// Values that were rematerialized are left alone, they need LICalc.extend(). bool SplitEditor::transferValues() { bool Skipped = false; RegAssignMap::const_iterator AssignI = RegAssign.begin(); @@ -1166,7 +1174,7 @@ bool SplitEditor::transferValues() { continue; } - LiveRangeCalc &LRC = getLRCalc(RegIdx); + LiveIntervalCalc &LIC = getLICalc(RegIdx); // This value has multiple defs in RegIdx, but it wasn't rematerialized, // so the live range is accurate. Add live-in blocks in [Start;End) to the @@ -1182,7 +1190,7 @@ bool SplitEditor::transferValues() { LLVM_DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB)); // MBB has its own def. Is it also live-out? if (BlockEnd <= End) - LRC.setLiveOutValue(&*MBB, VNI); + LIC.setLiveOutValue(&*MBB, VNI); // Skip to the next block for live-in. ++MBB; @@ -1200,16 +1208,16 @@ bool SplitEditor::transferValues() { VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); if (End >= BlockEnd) - LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well. + LIC.setLiveOutValue(&*MBB, VNI); // Live-out as well. } else { // This block needs a live-in value. The last block covered may not // be live-out. if (End < BlockEnd) - LRC.addLiveInBlock(LI, MDT[&*MBB], End); + LIC.addLiveInBlock(LI, MDT[&*MBB], End); else { // Live-through, and we don't know the value. - LRC.addLiveInBlock(LI, MDT[&*MBB]); - LRC.setLiveOutValue(&*MBB, nullptr); + LIC.addLiveInBlock(LI, MDT[&*MBB]); + LIC.setLiveOutValue(&*MBB, nullptr); } } BlockStart = BlockEnd; @@ -1220,9 +1228,9 @@ bool SplitEditor::transferValues() { LLVM_DEBUG(dbgs() << '\n'); } - LRCalc[0].calculateValues(); + LICalc[0].calculateValues(); if (SpillMode) - LRCalc[1].calculateValues(); + LICalc[1].calculateValues(); return Skipped; } @@ -1238,7 +1246,7 @@ static bool removeDeadSegment(SlotIndex Def, LiveRange &LR) { return true; } -void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC, +void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC, LiveRange &LR, LaneBitmask LM, ArrayRef<SlotIndex> Undefs) { for (MachineBasicBlock *P : B.predecessors()) { @@ -1252,7 +1260,7 @@ void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC, LiveRange &PSR = !LM.all() ? getSubRangeForMask(LM, PLI) : static_cast<LiveRange&>(PLI); if (PSR.liveAt(LastUse)) - LRC.extend(LR, End, /*PhysReg=*/0, Undefs); + LIC.extend(LR, End, /*PhysReg=*/0, Undefs); } } @@ -1270,14 +1278,14 @@ void SplitEditor::extendPHIKillRanges() { unsigned RegIdx = RegAssign.lookup(V->def); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); - LiveRangeCalc &LRC = getLRCalc(RegIdx); + LiveIntervalCalc &LIC = getLICalc(RegIdx); MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def); if (!removeDeadSegment(V->def, LI)) - extendPHIRange(B, LRC, LI, LaneBitmask::getAll(), /*Undefs=*/{}); + extendPHIRange(B, LIC, LI, LaneBitmask::getAll(), /*Undefs=*/{}); } SmallVector<SlotIndex, 4> Undefs; - LiveRangeCalc SubLRC; + LiveIntervalCalc SubLIC; for (LiveInterval::SubRange &PS : ParentLI.subranges()) { for (const VNInfo *V : PS.valnos) { @@ -1290,11 +1298,11 @@ void SplitEditor::extendPHIKillRanges() { continue; MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def); - SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + SubLIC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); Undefs.clear(); LI.computeSubRangeUndefs(Undefs, PS.LaneMask, MRI, *LIS.getSlotIndexes()); - extendPHIRange(B, SubLRC, S, PS.LaneMask, Undefs); + extendPHIRange(B, SubLIC, S, PS.LaneMask, Undefs); } } } @@ -1363,8 +1371,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { if (MO.isUse()) ExtPoints.push_back(ExtPoint(MO, RegIdx, Next)); } else { - LiveRangeCalc &LRC = getLRCalc(RegIdx); - LRC.extend(LI, Next, 0, ArrayRef<SlotIndex>()); + LiveIntervalCalc &LIC = getLICalc(RegIdx); + LIC.extend(LI, Next, 0, ArrayRef<SlotIndex>()); } } @@ -1372,7 +1380,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { LiveInterval &LI = LIS.getInterval(Edit->get(EP.RegIdx)); assert(LI.hasSubRanges()); - LiveRangeCalc SubLRC; + LiveIntervalCalc SubLIC; Register Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg(); LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub) : MRI.getMaxLaneMaskForVReg(Reg); @@ -1386,11 +1394,11 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // %1 = COPY %0 if (S.empty()) continue; - SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + SubLIC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); SmallVector<SlotIndex, 4> Undefs; LI.computeSubRangeUndefs(Undefs, S.LaneMask, MRI, *LIS.getSlotIndexes()); - SubLRC.extend(S, EP.Next, 0, Undefs); + SubLIC.extend(S, EP.Next, 0, Undefs); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h index 78f0bbd24db5..3ab5f2585f34 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h @@ -23,8 +23,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SlotIndexes.h" @@ -34,6 +34,7 @@ namespace llvm { +class AAResults; class LiveIntervals; class LiveRangeEdit; class MachineBlockFrequencyInfo; @@ -53,7 +54,7 @@ private: /// Last legal insert point in each basic block in the current function. /// The first entry is the first terminator, the second entry is the /// last valid point to insert a split or spill for a variable that is - /// live into a landing pad successor. + /// live into a landing pad or inlineasm_br successor. SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastInsertPoint; SlotIndex computeLastInsertPoint(const LiveInterval &CurLI, @@ -256,7 +257,7 @@ public: /// class LLVM_LIBRARY_VISIBILITY SplitEditor { SplitAnalysis &SA; - AliasAnalysis &AA; + AAResults &AA; LiveIntervals &LIS; VirtRegMap &VRM; MachineRegisterInfo &MRI; @@ -327,21 +328,21 @@ private: /// its def. The full live range can be inferred exactly from the range /// of RegIdx in RegAssign. /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and - /// the live range must be recomputed using LiveRangeCalc::extend(). + /// the live range must be recomputed using ::extend(). /// 4. (VNI, false) The value is mapped to a single new value. /// The new value has no live ranges anywhere. ValueMap Values; - /// LRCalc - Cache for computing live ranges and SSA update. Each instance + /// LICalc - Cache for computing live ranges and SSA update. Each instance /// can only handle non-overlapping live ranges, so use a separate - /// LiveRangeCalc instance for the complement interval when in spill mode. - LiveRangeCalc LRCalc[2]; + /// LiveIntervalCalc instance for the complement interval when in spill mode. + LiveIntervalCalc LICalc[2]; - /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the + /// getLICalc - Return the LICalc to use for RegIdx. In spill mode, the /// complement interval can overlap the other intervals, so it gets its own - /// LRCalc instance. When not in spill mode, all intervals can share one. - LiveRangeCalc &getLRCalc(unsigned RegIdx) { - return LRCalc[SpillMode != SM_Partition && RegIdx != 0]; + /// LICalc instance. When not in spill mode, all intervals can share one. + LiveIntervalCalc &getLICalc(unsigned RegIdx) { + return LICalc[SpillMode != SM_Partition && RegIdx != 0]; } /// Find a subrange corresponding to the lane mask @p LM in the live @@ -414,7 +415,7 @@ private: /// all predecessor values that reach this def. If @p LR is a subrange, /// the array @p Undefs is the set of all locations where it is undefined /// via <def,read-undef> in other subranges for the same register. - void extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC, + void extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC, LiveRange &LR, LaneBitmask LM, ArrayRef<SlotIndex> Undefs); @@ -442,7 +443,7 @@ private: public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. - SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa, LiveIntervals &lis, + SplitEditor(SplitAnalysis &sa, AAResults &aa, LiveIntervals &lis, VirtRegMap &vrm, MachineDominatorTree &mdt, MachineBlockFrequencyInfo &mbfi); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp index 9d4fdc6b624c..d720d93c306d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp @@ -913,6 +913,11 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { assert(To && From && "Invalid allocation object"); Allocas[From] = To; + // If From is before wo, its possible that there is a use of From between + // them. + if (From->comesBefore(To)) + const_cast<AllocaInst*>(To)->moveBefore(const_cast<AllocaInst*>(From)); + // AA might be used later for instruction scheduling, and we need it to be // able to deduce the correct aliasing releationships between pointers // derived from the alloca being remapped and the target of that remapping. @@ -1290,8 +1295,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { SortedSlots[J] = -1; LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #" << SecondSlot << " together.\n"); - unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), - MFI->getObjectAlignment(SecondSlot)); + Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot), + MFI->getObjectAlign(SecondSlot)); assert(MFI->getObjectSize(FirstSlot) >= MFI->getObjectSize(SecondSlot) && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp index e16587c44a55..1e060ecbeb43 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp @@ -300,7 +300,7 @@ void StackMaps::recordStackMapOpers(const MCSymbol &MILabel, MachineInstr::const_mop_iterator MOE, bool recordResult) { MCContext &OutContext = AP.OutStreamer->getContext(); - + LocationVec Locations; LiveOutVec LiveOuts; @@ -413,19 +413,19 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) { /// uint32 : NumRecords void StackMaps::emitStackmapHeader(MCStreamer &OS) { // Header. - OS.EmitIntValue(StackMapVersion, 1); // Version. - OS.EmitIntValue(0, 1); // Reserved. - OS.EmitIntValue(0, 2); // Reserved. + OS.emitIntValue(StackMapVersion, 1); // Version. + OS.emitIntValue(0, 1); // Reserved. + OS.emitInt16(0); // Reserved. // Num functions. LLVM_DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n'); - OS.EmitIntValue(FnInfos.size(), 4); + OS.emitInt32(FnInfos.size()); // Num constants. LLVM_DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n'); - OS.EmitIntValue(ConstPool.size(), 4); + OS.emitInt32(ConstPool.size()); // Num callsites. LLVM_DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n'); - OS.EmitIntValue(CSInfos.size(), 4); + OS.emitInt32(CSInfos.size()); } /// Emit the function frame record for each function. @@ -442,9 +442,9 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { LLVM_DEBUG(dbgs() << WSMP << "function addr: " << FR.first << " frame size: " << FR.second.StackSize << " callsite count: " << FR.second.RecordCount << '\n'); - OS.EmitSymbolValue(FR.first, 8); - OS.EmitIntValue(FR.second.StackSize, 8); - OS.EmitIntValue(FR.second.RecordCount, 8); + OS.emitSymbolValue(FR.first, 8); + OS.emitIntValue(FR.second.StackSize, 8); + OS.emitIntValue(FR.second.RecordCount, 8); } } @@ -456,7 +456,7 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { LLVM_DEBUG(dbgs() << WSMP << "constants:\n"); for (const auto &ConstEntry : ConstPool) { LLVM_DEBUG(dbgs() << WSMP << ConstEntry.second << '\n'); - OS.EmitIntValue(ConstEntry.second, 8); + OS.emitIntValue(ConstEntry.second, 8); } } @@ -501,46 +501,46 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { // simple overflow checks, but we may eventually communicate other // compilation errors this way. if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) { - OS.EmitIntValue(UINT64_MAX, 8); // Invalid ID. - OS.EmitValue(CSI.CSOffsetExpr, 4); - OS.EmitIntValue(0, 2); // Reserved. - OS.EmitIntValue(0, 2); // 0 locations. - OS.EmitIntValue(0, 2); // padding. - OS.EmitIntValue(0, 2); // 0 live-out registers. - OS.EmitIntValue(0, 4); // padding. + OS.emitIntValue(UINT64_MAX, 8); // Invalid ID. + OS.emitValue(CSI.CSOffsetExpr, 4); + OS.emitInt16(0); // Reserved. + OS.emitInt16(0); // 0 locations. + OS.emitInt16(0); // padding. + OS.emitInt16(0); // 0 live-out registers. + OS.emitInt32(0); // padding. continue; } - OS.EmitIntValue(CSI.ID, 8); - OS.EmitValue(CSI.CSOffsetExpr, 4); + OS.emitIntValue(CSI.ID, 8); + OS.emitValue(CSI.CSOffsetExpr, 4); // Reserved for flags. - OS.EmitIntValue(0, 2); - OS.EmitIntValue(CSLocs.size(), 2); + OS.emitInt16(0); + OS.emitInt16(CSLocs.size()); for (const auto &Loc : CSLocs) { - OS.EmitIntValue(Loc.Type, 1); - OS.EmitIntValue(0, 1); // Reserved - OS.EmitIntValue(Loc.Size, 2); - OS.EmitIntValue(Loc.Reg, 2); - OS.EmitIntValue(0, 2); // Reserved - OS.EmitIntValue(Loc.Offset, 4); + OS.emitIntValue(Loc.Type, 1); + OS.emitIntValue(0, 1); // Reserved + OS.emitInt16(Loc.Size); + OS.emitInt16(Loc.Reg); + OS.emitInt16(0); // Reserved + OS.emitInt32(Loc.Offset); } // Emit alignment to 8 byte. - OS.EmitValueToAlignment(8); + OS.emitValueToAlignment(8); // Num live-out registers and padding to align to 4 byte. - OS.EmitIntValue(0, 2); - OS.EmitIntValue(LiveOuts.size(), 2); + OS.emitInt16(0); + OS.emitInt16(LiveOuts.size()); for (const auto &LO : LiveOuts) { - OS.EmitIntValue(LO.DwarfRegNum, 2); - OS.EmitIntValue(0, 1); - OS.EmitIntValue(LO.Size, 1); + OS.emitInt16(LO.DwarfRegNum); + OS.emitIntValue(0, 1); + OS.emitIntValue(LO.Size, 1); } // Emit alignment to 8 byte. - OS.EmitValueToAlignment(8); + OS.emitValueToAlignment(8); } } @@ -564,7 +564,7 @@ void StackMaps::serializeToStackMapSection() { OS.SwitchSection(StackMapSection); // Emit a dummy symbol to force section inclusion. - OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps"))); + OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps"))); // Serialize data. LLVM_DEBUG(dbgs() << "********** Stack Map Output **********\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp index 4e2189884bb1..a343791807e6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -161,9 +162,16 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, return NeedsProtector; } -bool StackProtector::HasAddressTaken(const Instruction *AI) { +bool StackProtector::HasAddressTaken(const Instruction *AI, + uint64_t AllocSize) { + const DataLayout &DL = M->getDataLayout(); for (const User *U : AI->users()) { const auto *I = cast<Instruction>(U); + // If this instruction accesses memory make sure it doesn't access beyond + // the bounds of the allocated object. + Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I); + if (MemLoc.hasValue() && MemLoc->Size.getValue() > AllocSize) + return true; switch (I->getOpcode()) { case Instruction::Store: if (AI == cast<StoreInst>(I)->getValueOperand()) @@ -189,11 +197,26 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { } case Instruction::Invoke: return true; + case Instruction::GetElementPtr: { + // If the GEP offset is out-of-bounds, or is non-constant and so has to be + // assumed to be potentially out-of-bounds, then any memory access that + // would use it could also be out-of-bounds meaning stack protection is + // required. + const GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); + unsigned TypeSize = DL.getIndexTypeSizeInBits(I->getType()); + APInt Offset(TypeSize, 0); + APInt MaxOffset(TypeSize, AllocSize); + if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.ugt(MaxOffset)) + return true; + // Adjust AllocSize to be the space remaining after this offset. + if (HasAddressTaken(I, AllocSize - Offset.getLimitedValue())) + return true; + break; + } case Instruction::BitCast: - case Instruction::GetElementPtr: case Instruction::Select: case Instruction::AddrSpaceCast: - if (HasAddressTaken(I)) + if (HasAddressTaken(I, AllocSize)) return true; break; case Instruction::PHI: { @@ -201,7 +224,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { // they are only visited once. const auto *PN = cast<PHINode>(I); if (VisitedPHIs.insert(PN).second) - if (HasAddressTaken(PN)) + if (HasAddressTaken(PN, AllocSize)) return true; break; } @@ -330,7 +353,8 @@ bool StackProtector::RequiresStackProtector() { continue; } - if (Strong && HasAddressTaken(AI)) { + if (Strong && HasAddressTaken(AI, M->getDataLayout().getTypeAllocSize( + AI->getAllocatedType()))) { ++NumAddrTaken; Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf)); ORE.emit([&]() { @@ -342,6 +366,9 @@ bool StackProtector::RequiresStackProtector() { }); NeedsProtector = true; } + // Clear any PHIs that we visited, to make sure we examine all uses of + // any subsequent allocas that we look at. + VisitedPHIs.clear(); } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp index 7ae758323280..3cc5d30ebad7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -74,7 +74,7 @@ namespace { SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs; // OrigAlignments - Alignments of stack objects before coloring. - SmallVector<unsigned, 16> OrigAlignments; + SmallVector<Align, 16> OrigAlignments; // OrigSizes - Sizess of stack objects before coloring. SmallVector<unsigned, 16> OrigSizes; @@ -227,7 +227,7 @@ void StackSlotColoring::InitializeSlots() { continue; SSIntervals.push_back(&li); - OrigAlignments[FI] = MFI->getObjectAlignment(FI); + OrigAlignments[FI] = MFI->getObjectAlign(FI); OrigSizes[FI] = MFI->getObjectSize(FI); auto StackID = MFI->getStackID(FI); @@ -309,9 +309,9 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { // Change size and alignment of the allocated slot. If there are multiple // objects sharing the same slot, then make sure the size and alignment // are large enough for all. - unsigned Align = OrigAlignments[FI]; - if (!Share || Align > MFI->getObjectAlignment(Color)) - MFI->setObjectAlignment(Color, Align); + Align Alignment = OrigAlignments[FI]; + if (!Share || Alignment > MFI->getObjectAlign(Color)) + MFI->setObjectAlignment(Color, Alignment); int64_t Size = OrigSizes[FI]; if (!Share || Size > MFI->getObjectSize(Color)) MFI->setObjectSize(Color, Size); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp index c72a04276a4f..dd0b9d4c2e48 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -264,11 +264,10 @@ void SwiftErrorValueTracking::preassignVRegs( // Iterator over instructions and assign vregs to swifterror defs and uses. for (auto It = Begin; It != End; ++It) { - ImmutableCallSite CS(&*It); - if (CS) { + if (auto *CB = dyn_cast<CallBase>(&*It)) { // A call-site with a swifterror argument is both use and def. const Value *SwiftErrorAddr = nullptr; - for (auto &Arg : CS.args()) { + for (auto &Arg : CB->args()) { if (!Arg->isSwiftError()) continue; // Use of swifterror. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index c2cd8fa0324e..078c9691f8dc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; using namespace SwitchCG; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp index 648bf48b7d17..20892a79d35f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp @@ -31,6 +31,7 @@ namespace { class TailDuplicateBase : public MachineFunctionPass { TailDuplicator Duplicator; + std::unique_ptr<MBFIWrapper> MBFIW; bool PreRegAlloc; public: TailDuplicateBase(char &PassID, bool PreRegAlloc) @@ -88,7 +89,10 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { auto *MBFI = (PSI && PSI->hasProfileSummary()) ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() : nullptr; - Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false); + if (MBFI) + MBFIW = std::make_unique<MBFIWrapper>(*MBFI); + Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI, + /*LayoutMode=*/false); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp index cd1278fd4d8d..bd554189f12b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp @@ -80,7 +80,7 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPIin, - const MachineBlockFrequencyInfo *MBFIin, + MBFIWrapper *MBFIin, ProfileSummaryInfo *PSIin, bool LayoutModeIn, unsigned TailDupSizeIn) { MF = &MFin; @@ -159,14 +159,16 @@ bool TailDuplicator::tailDuplicateAndUpdate( bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds, - function_ref<void(MachineBasicBlock *)> *RemovalCallback) { + function_ref<void(MachineBasicBlock *)> *RemovalCallback, + SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) { // Save the successors list. SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), MBB->succ_end()); SmallVector<MachineBasicBlock *, 8> TDBBs; SmallVector<MachineInstr *, 16> Copies; - if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies)) + if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, + TDBBs, Copies, CandidatePtr)) return false; ++NumTails; @@ -204,11 +206,11 @@ bool TailDuplicator::tailDuplicateAndUpdate( } // Add the new vregs as available values. - DenseMap<unsigned, AvailableValsTy>::iterator LI = + DenseMap<Register, AvailableValsTy>::iterator LI = SSAUpdateVals.find(VReg); for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; - unsigned SrcReg = LI->second[j].second; + Register SrcReg = LI->second[j].second; SSAUpdate.AddAvailableValue(SrcBB, SrcReg); } @@ -292,7 +294,7 @@ bool TailDuplicator::tailDuplicateBlocks() { return MadeChange; } -static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, +static bool isDefLiveOut(Register Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI) { for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { if (UseMI.isDebugValue()) @@ -314,7 +316,7 @@ static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { // used to determine which registers are liveout while modifying the // block (which is why we need to copy the information). static void getRegsUsedByPHIs(const MachineBasicBlock &BB, - DenseSet<unsigned> *UsedByPhi) { + DenseSet<Register> *UsedByPhi) { for (const auto &MI : BB) { if (!MI.isPHI()) break; @@ -326,9 +328,9 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB, } /// Add a definition and source virtual registers pair for SSA update. -void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, +void TailDuplicator::addSSAUpdateEntry(Register OrigReg, Register NewReg, MachineBasicBlock *BB) { - DenseMap<unsigned, AvailableValsTy>::iterator LI = + DenseMap<Register, AvailableValsTy>::iterator LI = SSAUpdateVals.find(OrigReg); if (LI != SSAUpdateVals.end()) LI->second.push_back(std::make_pair(BB, NewReg)); @@ -344,9 +346,9 @@ void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, /// source register that's contributed by PredBB and update SSA update map. void TailDuplicator::processPHI( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, - DenseMap<unsigned, RegSubRegPair> &LocalVRMap, - SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies, - const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { + DenseMap<Register, RegSubRegPair> &LocalVRMap, + SmallVectorImpl<std::pair<Register, RegSubRegPair>> &Copies, + const DenseSet<Register> &RegsUsedByPhi, bool Remove) { Register DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); @@ -376,8 +378,8 @@ void TailDuplicator::processPHI( /// the source operands due to earlier PHI translation. void TailDuplicator::duplicateInstruction( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, - DenseMap<unsigned, RegSubRegPair> &LocalVRMap, - const DenseSet<unsigned> &UsedByPhi) { + DenseMap<Register, RegSubRegPair> &LocalVRMap, + const DenseSet<Register> &UsedByPhi) { // Allow duplication of CFI instructions. if (MI->isCFIInstruction()) { BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()), @@ -502,7 +504,7 @@ void TailDuplicator::updateSuccessorsPHIs( // If Idx is set, the operands at Idx and Idx+1 must be removed. // We reuse the location to avoid expensive RemoveOperand calls. - DenseMap<unsigned, AvailableValsTy>::iterator LI = + DenseMap<Register, AvailableValsTy>::iterator LI = SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. @@ -515,7 +517,7 @@ void TailDuplicator::updateSuccessorsPHIs( if (!SrcBB->isSuccessor(SuccBB)) continue; - unsigned SrcReg = LI->second[j].second; + Register SrcReg = LI->second[j].second; if (Idx != 0) { MI.getOperand(Idx).setReg(SrcReg); MI.getOperand(Idx + 1).setMBB(SrcBB); @@ -625,7 +627,9 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (PreRegAlloc && MI.isCall()) return false; - if (!MI.isPHI() && !MI.isMetaInstruction()) + if (MI.isBundle()) + InstrCount += MI.getBundleSize(); + else if (!MI.isPHI() && !MI.isMetaInstruction()) InstrCount += 1; if (InstrCount > MaxDuplicateCount) @@ -704,7 +708,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { bool TailDuplicator::duplicateSimpleBB( MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, - const DenseSet<unsigned> &UsedByPhi, + const DenseSet<Register> &UsedByPhi, SmallVectorImpl<MachineInstr *> &Copies) { SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), TailBB->succ_end()); @@ -712,7 +716,7 @@ bool TailDuplicator::duplicateSimpleBB( TailBB->pred_end()); bool Changed = false; for (MachineBasicBlock *PredBB : Preds) { - if (PredBB->hasEHPadSuccessor()) + if (PredBB->hasEHPadSuccessor() || PredBB->mayHaveInlineAsmBr()) continue; if (bothUsedInPHI(*PredBB, Succs)) @@ -802,13 +806,16 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, /// \p Copies A vector of copy instructions inserted. Used later to /// walk all the inserted copies and remove redundant ones. bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, - MachineBasicBlock *ForcedLayoutPred, - SmallVectorImpl<MachineBasicBlock *> &TDBBs, - SmallVectorImpl<MachineInstr *> &Copies) { + MachineBasicBlock *ForcedLayoutPred, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies, + SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) { LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) << '\n'); - DenseSet<unsigned> UsedByPhi; + bool ShouldUpdateTerminators = TailBB->canFallThrough(); + + DenseSet<Register> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); if (IsSimple) @@ -818,8 +825,12 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; - SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), - TailBB->pred_end()); + SmallSetVector<MachineBasicBlock *, 8> Preds; + if (CandidatePtr) + Preds.insert(CandidatePtr->begin(), CandidatePtr->end()); + else + Preds.insert(TailBB->pred_begin(), TailBB->pred_end()); + for (MachineBasicBlock *PredBB : Preds) { assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); @@ -828,13 +839,17 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, continue; // Don't duplicate into a fall-through predecessor (at least for now). - bool IsLayoutSuccessor = false; - if (ForcedLayoutPred) - IsLayoutSuccessor = (ForcedLayoutPred == PredBB); - else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) - IsLayoutSuccessor = true; - if (IsLayoutSuccessor) - continue; + // If profile is available, findDuplicateCandidates can choose better + // fall-through predecessor. + if (!(MF->getFunction().hasProfileData() && LayoutMode)) { + bool IsLayoutSuccessor = false; + if (ForcedLayoutPred) + IsLayoutSuccessor = (ForcedLayoutPred == PredBB); + else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + IsLayoutSuccessor = true; + if (IsLayoutSuccessor) + continue; + } LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); @@ -845,8 +860,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, TII->removeBranch(*PredBB); // Clone the contents of TailBB into PredBB. - DenseMap<unsigned, RegSubRegPair> LocalVRMap; - SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; + DenseMap<Register, RegSubRegPair> LocalVRMap; + SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); I != E; /* empty */) { MachineInstr *MI = &*I; @@ -872,6 +887,10 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, for (MachineBasicBlock *Succ : TailBB->successors()) PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); + // Update branches in pred to jump to tail's layout successor if needed. + if (ShouldUpdateTerminators) + PredBB->updateTerminator(TailBB->getNextNode()); + Changed = true; ++NumTailDups; } @@ -901,8 +920,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, // duplicating the instructions in all cases. TII->removeBranch(*PrevBB); if (PreRegAlloc) { - DenseMap<unsigned, RegSubRegPair> LocalVRMap; - SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; + DenseMap<Register, RegSubRegPair> LocalVRMap; + SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { @@ -930,6 +949,11 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); PrevBB->transferSuccessors(TailBB); + + // Update branches in PrevBB based on Tail's layout successor. + if (ShouldUpdateTerminators) + PrevBB->updateTerminator(TailBB->getNextNode()); + TDBBs.push_back(PrevBB); Changed = true; } @@ -964,8 +988,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, if (PredBB->succ_size() != 1) continue; - DenseMap<unsigned, RegSubRegPair> LocalVRMap; - SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; + DenseMap<Register, RegSubRegPair> LocalVRMap; + SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { @@ -983,7 +1007,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, /// At the end of the block \p MBB generate COPY instructions between registers /// described by \p CopyInfos. Append resulting instructions to \p Copies. void TailDuplicator::appendCopies(MachineBasicBlock *MBB, - SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos, + SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos, SmallVectorImpl<MachineInstr*> &Copies) { MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); @@ -1002,6 +1026,13 @@ void TailDuplicator::removeDeadBlock( assert(MBB->pred_empty() && "MBB must be dead!"); LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + MachineFunction *MF = MBB->getParent(); + // Update the call site info. + std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) { + if (MI.shouldUpdateCallSiteInfo()) + MF->eraseCallSiteInfo(&MI); + }); + if (RemovalCallback) (*RemovalCallback)(MBB); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index bc59be890c97..f8b482c04a58 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -10,17 +10,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetMachine.h" @@ -42,7 +42,8 @@ bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const /// (in output arg FrameReg). This is the default implementation which /// is overridden for some targets. int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, - int FI, unsigned &FrameReg) const { + int FI, + Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); @@ -140,8 +141,8 @@ bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) { return false; // Function should not be optimized as tail call. for (const User *U : F.users()) - if (auto CS = ImmutableCallSite(U)) - if (CS.isTailCall()) + if (auto *CB = dyn_cast<CallBase>(U)) + if (CB->isTailCall()) return false; return true; } @@ -150,7 +151,13 @@ int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { llvm_unreachable("getInitialCFAOffset() not implemented!"); } -unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) - const { +Register +TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) const { llvm_unreachable("getInitialCFARegister() not implemented!"); } + +TargetFrameLowering::DwarfFrameBase +TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF)}}; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp index a98c627dab09..24f3f96d0b1d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -104,14 +105,14 @@ unsigned TargetInstrInfo::getInlineAsmLength( AtInsnStart = false; } - if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { + if (AtInsnStart && !isSpace(static_cast<unsigned char>(*Str))) { unsigned AddLength = MaxInstLength; if (strncmp(Str, ".space", 6) == 0) { char *EStr; int SpaceSize; SpaceSize = strtol(Str + 6, &EStr, 10); SpaceSize = SpaceSize < 0 ? 0 : SpaceSize; - while (*EStr != '\n' && std::isspace(static_cast<unsigned char>(*EStr))) + while (*EStr != '\n' && isSpace(static_cast<unsigned char>(*EStr))) ++EStr; if (*EStr == '\0' || *EStr == '\n' || isAsmComment(EStr, MAI)) // Successfully parsed .space argument @@ -143,7 +144,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // from the end of MBB. while (Tail != MBB->end()) { auto MI = Tail++; - if (MI->isCall()) + if (MI->shouldUpdateCallSiteInfo()) MBB->getParent()->eraseCallSiteInfo(&*MI); MBB->erase(MI); } @@ -408,7 +409,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SubIdx, + Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); @@ -591,11 +592,15 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, NewMI->mayLoad()) && "Folded a use to a non-load!"); assert(MFI.getObjectOffset(FI) != -1); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, FI), Flags, MemSize, - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), + Flags, MemSize, MFI.getObjectAlign(FI)); NewMI->addMemOperand(MF, MMO); + // The pass "x86 speculative load hardening" always attaches symbols to + // call instructions. We need copy it form old instruction. + NewMI->cloneInstrSymbols(MF, MI); + return NewMI; } @@ -699,10 +704,13 @@ bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst, std::swap(MI1, MI2); // 1. The previous instruction must be the same type as Inst. - // 2. The previous instruction must have virtual register definitions for its + // 2. The previous instruction must also be associative/commutative (this can + // be different even for instructions with the same opcode if traits like + // fast-math-flags are included). + // 3. The previous instruction must have virtual register definitions for its // operands in the same basic block as Inst. - // 3. The previous instruction's result must only be used by Inst. - return MI1->getOpcode() == AssocOpcode && + // 4. The previous instruction's result must only be used by Inst. + return MI1->getOpcode() == AssocOpcode && isAssociativeAndCommutative(*MI1) && hasReassociableOperands(*MI1, MBB) && MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()); } @@ -991,6 +999,10 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr &MI, if (MI.isTerminator() || MI.isPosition()) return true; + // INLINEASM_BR can jump to another block + if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) + return true; + // Don't attempt to schedule around any instruction that defines // a stack-oriented pointer, as it's unlikely to be profitable. This // saves compile time, because it doesn't require every single @@ -1028,6 +1040,20 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } +// Default implementation of getMemOperandWithOffset. +bool TargetInstrInfo::getMemOperandWithOffset( + const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, + bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { + SmallVector<const MachineOperand *, 4> BaseOps; + unsigned Width; + if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, + Width, TRI) || + BaseOps.size() != 1) + return false; + BaseOp = BaseOps.front(); + return true; +} + //===----------------------------------------------------------------------===// // SelectionDAG latency interface. //===----------------------------------------------------------------------===// @@ -1125,6 +1151,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {}); int64_t Offset; + bool OffsetIsScalable; // To simplify the sub-register handling, verify that we only need to // consider physical registers. @@ -1134,6 +1161,11 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, if (auto DestSrc = isCopyInstr(MI)) { Register DestReg = DestSrc->Destination->getReg(); + // If the copy destination is the forwarding reg, describe the forwarding + // reg using the copy source as the backup location. Example: + // + // x0 = MOV x7 + // call callee(x0) ; x0 described as x7 if (Reg == DestReg) return ParamLoadedValue(*DestSrc->Source, Expr); @@ -1163,11 +1195,22 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, return None; const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, + TRI)) return None; - assert(MI.getNumExplicitDefs() == 1 && - "Can currently only handle mem instructions with a single define"); + // FIXME: Scalable offsets are not yet handled in the offset code below. + if (OffsetIsScalable) + return None; + + // TODO: Can currently only handle mem instructions with a single define. + // An example from the x86 target: + // ... + // DIV64m $rsp, 1, $noreg, 24, $noreg, implicit-def dead $rax, implicit-def $rdx + // ... + // + if (MI.getNumExplicitDefs() != 1) + return None; // TODO: In what way do we need to take Reg into consideration here? @@ -1290,4 +1333,60 @@ bool TargetInstrInfo::getInsertSubregInputs( return true; } +// Returns a MIRPrinter comment for this machine operand. +std::string TargetInstrInfo::createMIROperandComment( + const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, + const TargetRegisterInfo *TRI) const { + + if (!MI.isInlineAsm()) + return ""; + + std::string Flags; + raw_string_ostream OS(Flags); + + if (OpIdx == InlineAsm::MIOp_ExtraInfo) { + // Print HasSideEffects, MayLoad, MayStore, IsAlignStack + unsigned ExtraInfo = Op.getImm(); + bool First = true; + for (StringRef Info : InlineAsm::getExtraInfoNames(ExtraInfo)) { + if (!First) + OS << " "; + First = false; + OS << Info; + } + + return OS.str(); + } + + int FlagIdx = MI.findInlineAsmFlagIdx(OpIdx); + if (FlagIdx < 0 || (unsigned)FlagIdx != OpIdx) + return ""; + + assert(Op.isImm() && "Expected flag operand to be an immediate"); + // Pretty print the inline asm operand descriptor. + unsigned Flag = Op.getImm(); + unsigned Kind = InlineAsm::getKind(Flag); + OS << InlineAsm::getKindName(Kind); + + unsigned RCID = 0; + if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) && + InlineAsm::hasRegClassConstraint(Flag, RCID)) { + if (TRI) { + OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID)); + } else + OS << ":RC" << RCID; + } + + if (InlineAsm::isMemKind(Flag)) { + unsigned MCID = InlineAsm::getMemoryConstraintID(Flag); + OS << ":" << InlineAsm::getMemConstraintName(MCID); + } + + unsigned TiedTo = 0; + if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo)) + OS << " tiedto:$" << TiedTo; + + return OS.str(); +} + TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp index e5a7b70d82c8..2c94c2c62e5f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -51,6 +53,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -612,7 +615,7 @@ void TargetLoweringBase::initActions() { std::end(TargetDAGCombineArray), 0); for (MVT VT : MVT::fp_valuetypes()) { - MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); + MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits().getFixedSize()); if (IntVT.isValid()) { setOperationAction(ISD::ATOMIC_SWAP, VT, Promote); AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT); @@ -659,7 +662,9 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::UMULFIX, VT, Expand); setOperationAction(ISD::UMULFIXSAT, VT, Expand); setOperationAction(ISD::SDIVFIX, VT, Expand); + setOperationAction(ISD::SDIVFIXSAT, VT, Expand); setOperationAction(ISD::UDIVFIX, VT, Expand); + setOperationAction(ISD::UDIVFIXSAT, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); @@ -688,6 +693,7 @@ void TargetLoweringBase::initActions() { // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::FROUNDEVEN, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); // These operations default to expand for vector types. @@ -701,7 +707,7 @@ void TargetLoweringBase::initActions() { } // Constrained floating-point operations default to expand. -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ setOperationAction(ISD::STRICT_##DAGN, VT, Expand); #include "llvm/IR/ConstrainedOps.def" @@ -753,6 +759,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::FROUNDEVEN, VT, Expand); setOperationAction(ISD::LROUND, VT, Expand); setOperationAction(ISD::LLROUND, VT, Expand); setOperationAction(ISD::LRINT, VT, Expand); @@ -810,6 +817,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); assert((LA == TypeLegal || LA == TypeSoftenFloat || + LA == TypeSoftPromoteHalf || (NVT.isVector() || ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) && "Promote may not follow Expand or Promote"); @@ -817,7 +825,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { if (LA == TypeSplitVector) return LegalizeKind(LA, EVT::getVectorVT(Context, SVT.getVectorElementType(), - SVT.getVectorNumElements() / 2)); + SVT.getVectorElementCount() / 2)); if (LA == TypeScalarizeVector) return LegalizeKind(LA, SVT.getVectorElementType()); return LegalizeKind(LA, NVT); @@ -844,13 +852,16 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { } // Handle vector types. - unsigned NumElts = VT.getVectorNumElements(); + ElementCount NumElts = VT.getVectorElementCount(); EVT EltVT = VT.getVectorElementType(); // Vectors with only one element are always scalarized. if (NumElts == 1) return LegalizeKind(TypeScalarizeVector, EltVT); + if (VT.getVectorElementCount() == ElementCount(1, true)) + report_fatal_error("Cannot legalize this vector"); + // Try to widen vector elements until the element type is a power of two and // promote it to a legal type later on, for example: // <3 x i8> -> <4 x i8> -> <4 x i32> @@ -858,7 +869,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // Vectors with a number of elements that is not a power of two are always // widened, for example <3 x i8> -> <4 x i8>. if (!VT.isPow2VectorType()) { - NumElts = (unsigned)NextPowerOf2(NumElts); + NumElts = NumElts.NextPowerOf2(); EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts); return LegalizeKind(TypeWidenVector, NVT); } @@ -907,7 +918,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // If there is no wider legal type, split the vector. while (true) { // Round up to the next power of 2. - NumElts = (unsigned)NextPowerOf2(NumElts); + NumElts = NumElts.NextPowerOf2(); // If there is no simple vector type with this many elements then there // cannot be a larger legal vector type. Note that this assumes that @@ -930,7 +941,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { } // Vectors with illegal element types are expanded. - EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2); + EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorElementCount() / 2); return LegalizeKind(TypeSplitVector, NVT); } @@ -939,42 +950,51 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, MVT &RegisterVT, TargetLoweringBase *TLI) { // Figure out the right, legal destination reg to copy into. - unsigned NumElts = VT.getVectorNumElements(); + ElementCount EC = VT.getVectorElementCount(); MVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we - // could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(NumElts)) { - NumVectorRegs = NumElts; - NumElts = 1; + // Scalable vectors cannot be scalarized, so splitting or widening is + // required. + if (VT.isScalableVector() && !isPowerOf2_32(EC.Min)) + llvm_unreachable( + "Splitting or widening of non-power-of-2 MVTs is not implemented."); + + // FIXME: We don't support non-power-of-2-sized vectors for now. + // Ideally we could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(EC.Min)) { + // Split EC to unit size (scalable property is preserved). + NumVectorRegs = EC.Min; + EC = EC / NumVectorRegs; } - // Divide the input until we get to a supported size. This will always - // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { - NumElts >>= 1; + // Divide the input until we get to a supported size. This will + // always end up with an EC that represent a scalar or a scalable + // scalar. + while (EC.Min > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) { + EC.Min >>= 1; NumVectorRegs <<= 1; } NumIntermediates = NumVectorRegs; - MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + MVT NewVT = MVT::getVectorVT(EltTy, EC); if (!TLI->isTypeLegal(NewVT)) NewVT = EltTy; IntermediateVT = NewVT; - unsigned NewVTSize = NewVT.getSizeInBits(); + unsigned LaneSizeInBits = NewVT.getScalarSizeInBits().getFixedSize(); // Convert sizes such as i33 to i64. - if (!isPowerOf2_32(NewVTSize)) - NewVTSize = NextPowerOf2(NewVTSize); + if (!isPowerOf2_32(LaneSizeInBits)) + LaneSizeInBits = NextPowerOf2(LaneSizeInBits); MVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + return NumVectorRegs * + (LaneSizeInBits / DestVT.getScalarSizeInBits().getFixedSize()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -1012,20 +1032,25 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, // all stack slots), but we need to handle the different type of stackmap // operands and memory effects here. - // MI changes inside this loop as we grow operands. - for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) { - MachineOperand &MO = MI->getOperand(OperIdx); - if (!MO.isFI()) + if (!llvm::any_of(MI->operands(), + [](MachineOperand &Operand) { return Operand.isFI(); })) + return MBB; + + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc()); + + // Inherit previous memory operands. + MIB.cloneMemRefs(*MI); + + for (auto &MO : MI->operands()) { + if (!MO.isFI()) { + MIB.add(MO); continue; + } // foldMemoryOperand builds a new MI after replacing a single FI operand // with the canonical set of five x86 addressing-mode operands. int FI = MO.getIndex(); - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc()); - // Copy operands before the frame-index. - for (unsigned i = 0; i < OperIdx; ++i) - MIB.add(MI->getOperand(i)); // Add frame index operands recognized by stackmaps.cpp if (MFI.isStatepointSpillSlotObjectIndex(FI)) { // indirect-mem-ref tag, size, #FI, offset. @@ -1035,21 +1060,16 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity"); MIB.addImm(StackMaps::IndirectMemRefOp); MIB.addImm(MFI.getObjectSize(FI)); - MIB.add(MI->getOperand(OperIdx)); + MIB.add(MO); MIB.addImm(0); } else { // direct-mem-ref tag, #FI, offset. // Used by patchpoint, and direct alloca arguments to statepoints MIB.addImm(StackMaps::DirectMemRefOp); - MIB.add(MI->getOperand(OperIdx)); + MIB.add(MO); MIB.addImm(0); } - // Copy the operands after the frame index. - for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i) - MIB.add(MI->getOperand(i)); - // Inherit previous memory operands. - MIB.cloneMemRefs(*MI); assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!"); // Add a new memory operand for this FI. @@ -1061,16 +1081,12 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, auto Flags = MachineMemOperand::MOLoad; MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), Flags, - MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI)); + MF.getDataLayout().getPointerSize(), MFI.getObjectAlign(FI)); MIB->addMemOperand(MF, MMO); } - - // Replace the instruction and update the operand index. - MBB->insert(MachineBasicBlock::iterator(MI), MIB); - OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1; - MI->eraseFromParent(); - MI = MIB; } + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + MI->eraseFromParent(); return MBB; } @@ -1228,10 +1244,18 @@ void TargetLoweringBase::computeRegisterProperties( // promote it to f32, because there are no f16 library calls (except for // conversions). if (!isTypeLegal(MVT::f16)) { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; - TransformToType[MVT::f16] = MVT::f32; - ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); + // Allow targets to control how we legalize half. + if (softPromoteHalfType()) { + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; + TransformToType[MVT::f16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf); + } else { + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::f16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); + } } // Loop over all of the vector value types to see which need transformations. @@ -1242,7 +1266,7 @@ void TargetLoweringBase::computeRegisterProperties( continue; MVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); + ElementCount EC = VT.getVectorElementCount(); bool IsLegalWiderType = false; bool IsScalable = VT.isScalableVector(); LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); @@ -1259,8 +1283,7 @@ void TargetLoweringBase::computeRegisterProperties( // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() && - SVT.getVectorNumElements() == NElts && - SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) { + SVT.getVectorElementCount() == EC && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1275,13 +1298,13 @@ void TargetLoweringBase::computeRegisterProperties( } case TypeWidenVector: - if (isPowerOf2_32(NElts)) { + if (isPowerOf2_32(EC.Min)) { // Try to widen the vector. for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType) nVT; - if (SVT.getVectorElementType() == EltVT - && SVT.getVectorNumElements() > NElts - && SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) { + if (SVT.getVectorElementType() == EltVT && + SVT.isScalableVector() == IsScalable && + SVT.getVectorElementCount().Min > EC.Min && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1325,10 +1348,12 @@ void TargetLoweringBase::computeRegisterProperties( ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); else if (PreferredAction == TypeSplitVector) ValueTypeActions.setTypeAction(VT, TypeSplitVector); + else if (EC.Min > 1) + ValueTypeActions.setTypeAction(VT, TypeSplitVector); else - // Set type action according to the number of elements. - ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector - : TypeSplitVector); + ValueTypeActions.setTypeAction(VT, EC.Scalable + ? TypeScalarizeScalableVector + : TypeScalarizeVector); } else { TransformToType[i] = NVT; ValueTypeActions.setTypeAction(VT, TypeWidenVector); @@ -1376,7 +1401,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const { - unsigned NumElts = VT.getVectorNumElements(); + ElementCount EltCnt = VT.getVectorElementCount(); // If there is a wider vector type with the same element type as this one, // or a promoted vector type that has the same number of elements which @@ -1384,7 +1409,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT // This handles things like <2 x float> -> <4 x float> and // <4 x i1> -> <4 x i32>. LegalizeTypeAction TA = getTypeAction(Context, VT); - if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { + if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { EVT RegisterEVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterEVT)) { IntermediateVT = RegisterEVT; @@ -1399,38 +1424,64 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT unsigned NumVectorRegs = 1; - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we - // could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(NumElts)) { - NumVectorRegs = NumElts; - NumElts = 1; + // Scalable vectors cannot be scalarized, so handle the legalisation of the + // types like done elsewhere in SelectionDAG. + if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) { + LegalizeKind LK; + EVT PartVT = VT; + do { + // Iterate until we've found a legal (part) type to hold VT. + LK = getTypeConversion(Context, PartVT); + PartVT = LK.second; + } while (LK.first != TypeLegal); + + NumIntermediates = + VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min; + + // FIXME: This code needs to be extended to handle more complex vector + // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only + // supported cases are vectors that are broken down into equal parts + // such as nxv6i64 -> 3 x nxv2i64. + assert(NumIntermediates * PartVT.getVectorElementCount().Min == + VT.getVectorElementCount().Min && + "Expected an integer multiple of PartVT"); + IntermediateVT = PartVT; + RegisterVT = getRegisterType(Context, IntermediateVT); + return NumIntermediates; + } + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally + // we could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(EltCnt.Min)) { + NumVectorRegs = EltCnt.Min; + EltCnt.Min = 1; } // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !isTypeLegal( - EVT::getVectorVT(Context, EltTy, NumElts))) { - NumElts >>= 1; + while (EltCnt.Min > 1 && + !isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) { + EltCnt.Min >>= 1; NumVectorRegs <<= 1; } NumIntermediates = NumVectorRegs; - EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); + EVT NewVT = EVT::getVectorVT(Context, EltTy, EltCnt); if (!isTypeLegal(NewVT)) NewVT = EltTy; IntermediateVT = NewVT; MVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; - unsigned NewVTSize = NewVT.getSizeInBits(); - // Convert sizes such as i33 to i64. - if (!isPowerOf2_32(NewVTSize)) - NewVTSize = NextPowerOf2(NewVTSize); - - if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + if (EVT(DestVT).bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16. + TypeSize NewVTSize = NewVT.getSizeInBits(); + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize.getKnownMinSize())) + NewVTSize = NewVTSize.NextPowerOf2(); return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + } // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -1517,19 +1568,19 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, /// alignment, not its logarithm. unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, const DataLayout &DL) const { - return DL.getABITypeAlignment(Ty); + return DL.getABITypeAlign(Ty).value(); } bool TargetLoweringBase::allowsMemoryAccessForAlignment( LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, - unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { + Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { // Check if the specified alignment is sufficient based on the data layout. // TODO: While using the data layout works in practice, a better solution // would be to implement this check directly (make this a virtual function). // For example, the ABI alignment may change based on software platform while // this function should only be affected by hardware implementation. Type *Ty = VT.getTypeForEVT(Context); - if (Alignment >= DL.getABITypeAlignment(Ty)) { + if (Alignment >= DL.getABITypeAlign(Ty)) { // Assume that an access that meets the ABI-specified alignment is fast. if (Fast != nullptr) *Fast = true; @@ -1537,20 +1588,22 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment( } // This is a misaligned access. - return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); + return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment.value(), Flags, + Fast); } bool TargetLoweringBase::allowsMemoryAccessForAlignment( LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, bool *Fast) const { return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(), - MMO.getAlignment(), MMO.getFlags(), - Fast); + MMO.getAlign(), MMO.getFlags(), Fast); } -bool TargetLoweringBase::allowsMemoryAccess( - LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, - unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { +bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, + const DataLayout &DL, EVT VT, + unsigned AddrSpace, Align Alignment, + MachineMemOperand::Flags Flags, + bool *Fast) const { return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, Flags, Fast); } @@ -1559,8 +1612,8 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, bool *Fast) const { - return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), - MMO.getAlignment(), MMO.getFlags(), Fast); + return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), + MMO.getFlags(), Fast); } BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const { @@ -1644,7 +1697,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case ExtractValue: return ISD::MERGE_VALUES; case InsertValue: return ISD::MERGE_VALUES; case LandingPad: return 0; - case Freeze: return 0; + case Freeze: return ISD::FREEZE; } llvm_unreachable("Unknown instruction type encountered!"); @@ -1818,6 +1871,10 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { MaximumJumpTableSize = Val; } +bool TargetLoweringBase::isJumpTableRelative() const { + return getTargetMachine().isPositionIndependent(); +} + //===----------------------------------------------------------------------===// // Reciprocal Estimates //===----------------------------------------------------------------------===// @@ -2005,3 +2062,119 @@ int TargetLoweringBase::getDivRefinementSteps(EVT VT, void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { MF.getRegInfo().freezeReservedRegs(MF); } + +MachineMemOperand::Flags +TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI, + const DataLayout &DL) const { + MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad; + if (LI.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + + if (LI.hasMetadata(LLVMContext::MD_nontemporal)) + Flags |= MachineMemOperand::MONonTemporal; + + if (LI.hasMetadata(LLVMContext::MD_invariant_load)) + Flags |= MachineMemOperand::MOInvariant; + + if (isDereferenceablePointer(LI.getPointerOperand(), LI.getType(), DL)) + Flags |= MachineMemOperand::MODereferenceable; + + Flags |= getTargetMMOFlags(LI); + return Flags; +} + +MachineMemOperand::Flags +TargetLoweringBase::getStoreMemOperandFlags(const StoreInst &SI, + const DataLayout &DL) const { + MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; + + if (SI.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + + if (SI.hasMetadata(LLVMContext::MD_nontemporal)) + Flags |= MachineMemOperand::MONonTemporal; + + // FIXME: Not preserving dereferenceable + Flags |= getTargetMMOFlags(SI); + return Flags; +} + +MachineMemOperand::Flags +TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI, + const DataLayout &DL) const { + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(&AI)) { + if (RMW->isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(&AI)) { + if (CmpX->isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + } else + llvm_unreachable("not an atomic instruction"); + + // FIXME: Not preserving dereferenceable + Flags |= getTargetMMOFlags(AI); + return Flags; +} + +//===----------------------------------------------------------------------===// +// GlobalISel Hooks +//===----------------------------------------------------------------------===// + +bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI, + const TargetTransformInfo *TTI) const { + auto &MF = *MI.getMF(); + auto &MRI = MF.getRegInfo(); + // Assuming a spill and reload of a value has a cost of 1 instruction each, + // this helper function computes the maximum number of uses we should consider + // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We + // break even in terms of code size when the original MI has 2 users vs + // choosing to potentially spill. Any more than 2 users we we have a net code + // size increase. This doesn't take into account register pressure though. + auto maxUses = [](unsigned RematCost) { + // A cost of 1 means remats are basically free. + if (RematCost == 1) + return UINT_MAX; + if (RematCost == 2) + return 2U; + + // Remat is too expensive, only sink if there's one user. + if (RematCost > 2) + return 1U; + llvm_unreachable("Unexpected remat cost"); + }; + + // Helper to walk through uses and terminate if we've reached a limit. Saves + // us spending time traversing uses if all we want to know is if it's >= min. + auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { + unsigned NumUses = 0; + auto UI = MRI.use_instr_nodbg_begin(Reg), UE = MRI.use_instr_nodbg_end(); + for (; UI != UE && NumUses < MaxUses; ++UI) { + NumUses++; + } + // If we haven't reached the end yet then there are more than MaxUses users. + return UI == UE; + }; + + switch (MI.getOpcode()) { + default: + return false; + // Constants-like instructions should be close to their users. + // We don't want long live-ranges for them. + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FRAME_INDEX: + case TargetOpcode::G_INTTOPTR: + return true; + case TargetOpcode::G_GLOBAL_VALUE: { + unsigned RematCost = TTI->getGISelRematGlobalCost(); + Register Reg = MI.getOperand(0).getReg(); + unsigned MaxUses = maxUses(RematCost); + if (MaxUses == UINT_MAX) + return true; // Remats are "free" so always localize. + bool B = isUsesAtMost(Reg, MaxUses); + return B; + } + } +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 8cb9814300d1..27bebe503ce6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -21,12 +21,16 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" @@ -52,8 +56,8 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/Format.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include <cassert> @@ -84,6 +88,15 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, } else if (Key == "Objective-C Image Info Section") { Section = cast<MDString>(MFE.Val)->getString(); } + // Backend generates L_OBJC_IMAGE_INFO from Swift ABI version + major + minor + + // "Objective-C Garbage Collection". + else if (Key == "Swift ABI Version") { + Flags |= (mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue()) << 8; + } else if (Key == "Swift Major Version") { + Flags |= (mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue()) << 24; + } else if (Key == "Swift Minor Version") { + Flags |= (mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue()) << 16; + } } } @@ -97,6 +110,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, TM = &TgtM; CodeModel::Model CM = TgtM.getCodeModel(); + InitializeELF(TgtM.Options.UseInitArray); switch (TgtM.getTargetTriple().getArch()) { case Triple::arm: @@ -277,8 +291,8 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, if (cast<MDNode>(Operand)->getNumOperands() != 2) report_fatal_error("invalid llvm.linker.options"); for (const auto &Option : cast<MDNode>(Operand)->operands()) { - Streamer.EmitBytes(cast<MDString>(Option)->getString()); - Streamer.EmitIntValue(0, 1); + Streamer.emitBytes(cast<MDString>(Option)->getString()); + Streamer.emitInt8(0); } } } @@ -290,9 +304,9 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, Streamer.SwitchSection(S); for (const auto *Operand : DependentLibraries->operands()) { - Streamer.EmitBytes( + Streamer.emitBytes( cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString()); - Streamer.EmitIntValue(0, 1); + Streamer.emitInt8(0); } } @@ -304,9 +318,9 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, if (!Section.empty()) { auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); Streamer.SwitchSection(S); - Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); - Streamer.EmitIntValue(Version, 4); - Streamer.EmitIntValue(Flags, 4); + Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.emitInt32(Version); + Streamer.emitInt32(Flags); Streamer.AddBlankLine(); } @@ -370,20 +384,20 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( NameData += Sym->getName(); MCSymbolELF *Label = cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData)); - Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); - Streamer.EmitSymbolAttribute(Label, MCSA_Weak); + Streamer.emitSymbolAttribute(Label, MCSA_Hidden); + Streamer.emitSymbolAttribute(Label, MCSA_Weak); unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(), ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0).value()); - Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); + Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0).value()); + Streamer.emitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); Streamer.emitELFSize(Label, E); - Streamer.EmitLabel(Label); + Streamer.emitLabel(Label); - Streamer.EmitSymbolValue(Sym, Size); + Streamer.emitSymbolValue(Sym, Size); } const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( @@ -420,6 +434,8 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { // .section .eh_frame,"a",@progbits if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF, + /*AddSegmentInfo=*/false) || + Name == getInstrProfSectionName(IPSK_covfun, Triple::ELF, /*AddSegmentInfo=*/false)) return SectionKind::getMetadata(); @@ -512,8 +528,8 @@ static const Comdat *getELFComdat(const GlobalValue *GV) { return C; } -static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO, - const TargetMachine &TM) { +static const MCSymbolELF *getLinkedToSymbol(const GlobalObject *GO, + const TargetMachine &TM) { MDNode *MD = GO->getMetadata(LLVMContext::MD_associated); if (!MD) return nullptr; @@ -554,6 +570,75 @@ static unsigned getEntrySizeForKind(SectionKind Kind) { } } +/// Return the section prefix name used by options FunctionsSections and +/// DataSections. +static StringRef getSectionPrefixForGlobal(SectionKind Kind) { + if (Kind.isText()) + return ".text"; + if (Kind.isReadOnly()) + return ".rodata"; + if (Kind.isBSS()) + return ".bss"; + if (Kind.isThreadData()) + return ".tdata"; + if (Kind.isThreadBSS()) + return ".tbss"; + if (Kind.isData()) + return ".data"; + if (Kind.isReadOnlyWithRel()) + return ".data.rel.ro"; + llvm_unreachable("Unknown section kind"); +} + +static SmallString<128> +getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind, + Mangler &Mang, const TargetMachine &TM, + unsigned EntrySize, bool UniqueSectionName) { + SmallString<128> Name; + if (Kind.isMergeableCString()) { + // We also need alignment here. + // FIXME: this is getting the alignment of the character, not the + // alignment of the global! + Align Alignment = GO->getParent()->getDataLayout().getPreferredAlign( + cast<GlobalVariable>(GO)); + + std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; + Name = SizeSpec + utostr(Alignment.value()); + } else if (Kind.isMergeableConst()) { + Name = ".rodata.cst"; + Name += utostr(EntrySize); + } else { + Name = getSectionPrefixForGlobal(Kind); + } + + bool HasPrefix = false; + if (const auto *F = dyn_cast<Function>(GO)) { + if (Optional<StringRef> Prefix = F->getSectionPrefix()) { + Name += *Prefix; + HasPrefix = true; + } + } + + if (UniqueSectionName) { + Name.push_back('.'); + TM.getNameWithPrefix(Name, GO, Mang, /*MayAlwaysUsePrivate*/true); + } else if (HasPrefix) + Name.push_back('.'); + return Name; +} + +namespace { +class LoweringDiagnosticInfo : public DiagnosticInfo { + const Twine &Msg; + +public: + LoweringDiagnosticInfo(const Twine &DiagMsg, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(DK_Lowering, Severity), Msg(DiagMsg) {} + void print(DiagnosticPrinter &DP) const override { DP << Msg; } +}; +} + MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { StringRef SectionName = GO->getSection(); @@ -589,42 +674,84 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( Flags |= ELF::SHF_GROUP; } + unsigned EntrySize = getEntrySizeForKind(Kind); + // A section can have at most one associated section. Put each global with // MD_associated in a unique section. unsigned UniqueID = MCContext::GenericSectionID; - const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM); - if (AssociatedSymbol) { + const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM); + if (LinkedToSym) { UniqueID = NextUniqueID++; Flags |= ELF::SHF_LINK_ORDER; + } else { + if (getContext().getAsmInfo()->useIntegratedAssembler()) { + // Symbols must be placed into sections with compatible entry + // sizes. Generate unique sections for symbols that have not + // been assigned to compatible sections. + if (Flags & ELF::SHF_MERGE) { + auto maybeID = getContext().getELFUniqueIDForEntsize(SectionName, Flags, + EntrySize); + if (maybeID) + UniqueID = *maybeID; + else { + // If the user has specified the same section name as would be created + // implicitly for this symbol e.g. .rodata.str1.1, then we don't need + // to unique the section as the entry size for this symbol will be + // compatible with implicitly created sections. + SmallString<128> ImplicitSectionNameStem = getELFSectionNameForGlobal( + GO, Kind, getMangler(), TM, EntrySize, false); + if (!(getContext().isELFImplicitMergeableSectionNamePrefix( + SectionName) && + SectionName.startswith(ImplicitSectionNameStem))) + UniqueID = NextUniqueID++; + } + } else { + // We need to unique the section if the user has explicity + // assigned a non-mergeable symbol to a section name for + // a generic mergeable section. + if (getContext().isELFGenericMergeableSection(SectionName)) { + auto maybeID = getContext().getELFUniqueIDForEntsize( + SectionName, Flags, EntrySize); + UniqueID = maybeID ? *maybeID : NextUniqueID++; + } + } + } else { + // If two symbols with differing sizes end up in the same mergeable + // section that section can be assigned an incorrect entry size. To avoid + // this we usually put symbols of the same size into distinct mergeable + // sections with the same name. Doing so relies on the ",unique ," + // assembly feature. This feature is not avalible until bintuils + // version 2.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=25380). + Flags &= ~ELF::SHF_MERGE; + EntrySize = 0; + } } MCSectionELF *Section = getContext().getELFSection( SectionName, getELFSectionType(SectionName, Kind), Flags, - getEntrySizeForKind(Kind), Group, UniqueID, AssociatedSymbol); + EntrySize, Group, UniqueID, LinkedToSym); // Make sure that we did not get some other section with incompatible sh_link. // This should not be possible due to UniqueID code above. - assert(Section->getAssociatedSymbol() == AssociatedSymbol && + assert(Section->getLinkedToSymbol() == LinkedToSym && "Associated symbol mismatch between sections"); - return Section; -} -/// Return the section prefix name used by options FunctionsSections and -/// DataSections. -static StringRef getSectionPrefixForGlobal(SectionKind Kind) { - if (Kind.isText()) - return ".text"; - if (Kind.isReadOnly()) - return ".rodata"; - if (Kind.isBSS()) - return ".bss"; - if (Kind.isThreadData()) - return ".tdata"; - if (Kind.isThreadBSS()) - return ".tbss"; - if (Kind.isData()) - return ".data"; - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return ".data.rel.ro"; + if (!getContext().getAsmInfo()->useIntegratedAssembler()) { + // If we are not using the integrated assembler then this symbol might have + // been placed in an incompatible mergeable section. Emit an error if this + // is the case to avoid creating broken output. + if ((Section->getFlags() & ELF::SHF_MERGE) && + (Section->getEntrySize() != getEntrySizeForKind(Kind))) + GO->getContext().diagnose(LoweringDiagnosticInfo( + "Symbol '" + GO->getName() + "' from module '" + + (GO->getParent() ? GO->getParent()->getSourceFileName() : "unknown") + + "' required a section with entry-size=" + + Twine(getEntrySizeForKind(Kind)) + " but was placed in section '" + + SectionName + "' with entry-size=" + Twine(Section->getEntrySize()) + + ": Explicit assignment by pragma or attribute of an incompatible " + "symbol to this section?")); + } + + return Section; } static MCSectionELF *selectELFSectionForGlobal( @@ -641,39 +768,19 @@ static MCSectionELF *selectELFSectionForGlobal( // Get the section entry size based on the kind. unsigned EntrySize = getEntrySizeForKind(Kind); - SmallString<128> Name; - if (Kind.isMergeableCString()) { - // We also need alignment here. - // FIXME: this is getting the alignment of the character, not the - // alignment of the global! - unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment( - cast<GlobalVariable>(GO)); - - std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; - Name = SizeSpec + utostr(Align); - } else if (Kind.isMergeableConst()) { - Name = ".rodata.cst"; - Name += utostr(EntrySize); - } else { - Name = getSectionPrefixForGlobal(Kind); - } - - if (const auto *F = dyn_cast<Function>(GO)) { - const auto &OptionalPrefix = F->getSectionPrefix(); - if (OptionalPrefix) - Name += *OptionalPrefix; - } - + bool UniqueSectionName = false; unsigned UniqueID = MCContext::GenericSectionID; if (EmitUniqueSection) { if (TM.getUniqueSectionNames()) { - Name.push_back('.'); - TM.getNameWithPrefix(Name, GO, Mang, true /*MayAlwaysUsePrivate*/); + UniqueSectionName = true; } else { UniqueID = *NextUniqueID; (*NextUniqueID)++; } } + SmallString<128> Name = getELFSectionNameForGlobal( + GO, Kind, Mang, TM, EntrySize, UniqueSectionName); + // Use 0 as the unique ID for execute-only text. if (Kind.isExecuteOnly()) UniqueID = 0; @@ -696,16 +803,16 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( } EmitUniqueSection |= GO->hasComdat(); - const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM); - if (AssociatedSymbol) { + const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM); + if (LinkedToSym) { EmitUniqueSection = true; Flags |= ELF::SHF_LINK_ORDER; } MCSectionELF *Section = selectELFSectionForGlobal( getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, Flags, - &NextUniqueID, AssociatedSymbol); - assert(Section->getAssociatedSymbol() == AssociatedSymbol); + &NextUniqueID, LinkedToSym); + assert(Section->getLinkedToSymbol() == LinkedToSym); return Section; } @@ -735,7 +842,7 @@ bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( /// information, return a section that it should be placed in. MCSection *TargetLoweringObjectFileELF::getSectionForConstant( const DataLayout &DL, SectionKind Kind, const Constant *C, - unsigned &Align) const { + Align &Alignment) const { if (Kind.isMergeableConst4() && MergeableConst4Section) return MergeableConst4Section; if (Kind.isMergeableConst8() && MergeableConst8Section) @@ -751,6 +858,46 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant( return DataRelROSection; } +/// Returns a unique section for the given machine basic block. +MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock( + const Function &F, const MachineBasicBlock &MBB, + const TargetMachine &TM) const { + assert(MBB.isBeginSection() && "Basic block does not start a section!"); + unsigned UniqueID = MCContext::GenericSectionID; + + // For cold sections use the .text.unlikely prefix along with the parent + // function name. All cold blocks for the same function go to the same + // section. Similarly all exception blocks are grouped by symbol name + // under the .text.eh prefix. For regular sections, we either use a unique + // name, or a unique ID for the section. + SmallString<128> Name; + if (MBB.getSectionID() == MBBSectionID::ColdSectionID) { + Name += ".text.unlikely."; + Name += MBB.getParent()->getName(); + } else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) { + Name += ".text.eh."; + Name += MBB.getParent()->getName(); + } else { + Name += MBB.getParent()->getSection()->getName(); + if (TM.getUniqueBasicBlockSectionNames()) { + Name += "."; + Name += MBB.getSymbol()->getName(); + } else { + UniqueID = NextUniqueID++; + } + } + + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; + std::string GroupName = ""; + if (F.hasComdat()) { + Flags |= ELF::SHF_GROUP; + GroupName = F.getComdat()->getName().str(); + } + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, Flags, + 0 /* Entry Size */, GroupName, UniqueID, + nullptr); +} + static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray, bool IsCtor, unsigned Priority, const MCSymbol *KeySym) { @@ -888,8 +1035,8 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, for (const auto *Option : LinkerOptions->operands()) { SmallVector<std::string, 4> StrOptions; for (const auto &Piece : cast<MDNode>(Option)->operands()) - StrOptions.push_back(cast<MDString>(Piece)->getString()); - Streamer.EmitLinkerOptions(StrOptions); + StrOptions.push_back(std::string(cast<MDString>(Piece)->getString())); + Streamer.emitLinkerOptions(StrOptions); } } @@ -918,10 +1065,10 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, MCSectionMachO *S = getContext().getMachOSection( Segment, Section, TAA, StubSize, SectionKind::getData()); Streamer.SwitchSection(S); - Streamer.EmitLabel(getContext(). + Streamer.emitLabel(getContext(). getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); - Streamer.EmitIntValue(VersionVal, 4); - Streamer.EmitIntValue(ImageInfoFlags, 4); + Streamer.emitInt32(VersionVal); + Streamer.emitInt32(ImageInfoFlags); Streamer.AddBlankLine(); } @@ -998,16 +1145,16 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - GO->getParent()->getDataLayout().getPreferredAlignment( - cast<GlobalVariable>(GO)) < 32) + GO->getParent()->getDataLayout().getPreferredAlign( + cast<GlobalVariable>(GO)) < Align(32)) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() && - GO->getParent()->getDataLayout().getPreferredAlignment( - cast<GlobalVariable>(GO)) < 32) + GO->getParent()->getDataLayout().getPreferredAlign( + cast<GlobalVariable>(GO)) < Align(32)) return UStringSection; // With MachO only variables whose corresponding symbol starts with 'l' or @@ -1047,7 +1194,7 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( MCSection *TargetLoweringObjectFileMachO::getSectionForConstant( const DataLayout &DL, SectionKind Kind, const Constant *C, - unsigned &Align) const { + Align &Alignment) const { // If this constant requires a relocation, we have to put it in the data // segment, not in the text segment. if (Kind.isData() || Kind.isReadOnlyWithRel()) @@ -1453,8 +1600,8 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, for (const auto &Piece : cast<MDNode>(Option)->operands()) { // Lead with a space for consistency with our dllexport implementation. std::string Directive(" "); - Directive.append(cast<MDString>(Piece)->getString()); - Streamer.EmitBytes(Directive); + Directive.append(std::string(cast<MDString>(Piece)->getString())); + Streamer.emitBytes(Directive); } } } @@ -1472,9 +1619,9 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); Streamer.SwitchSection(S); - Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); - Streamer.EmitIntValue(Version, 4); - Streamer.EmitIntValue(Flags, 4); + Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.emitInt32(Version); + Streamer.emitInt32(Flags); Streamer.AddBlankLine(); } @@ -1599,7 +1746,7 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( static std::string APIntToHexString(const APInt &AI) { unsigned Width = (AI.getBitWidth() / 8) * 2; std::string HexString = AI.toString(16, /*Signed=*/false); - transform(HexString.begin(), HexString.end(), HexString.begin(), tolower); + llvm::transform(HexString, HexString.begin(), tolower); unsigned Size = HexString.size(); assert(Width >= Size && "hex string is too large!"); HexString.insert(HexString.begin(), Width - Size, '0'); @@ -1617,8 +1764,8 @@ static std::string scalarConstantToHexString(const Constant *C) { return APIntToHexString(CI->getValue()); } else { unsigned NumElements; - if (isa<VectorType>(Ty)) - NumElements = Ty->getVectorNumElements(); + if (auto *VTy = dyn_cast<VectorType>(Ty)) + NumElements = cast<FixedVectorType>(VTy)->getNumElements(); else NumElements = Ty->getArrayNumElements(); std::string HexString; @@ -1630,7 +1777,7 @@ static std::string scalarConstantToHexString(const Constant *C) { MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant( const DataLayout &DL, SectionKind Kind, const Constant *C, - unsigned &Align) const { + Align &Alignment) const { if (Kind.isMergeableConst() && C && getContext().getAsmInfo()->hasCOFFComdatConstants()) { // This creates comdat sections with the given symbol name, but unless @@ -1642,25 +1789,25 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant( COFF::IMAGE_SCN_LNK_COMDAT; std::string COMDATSymName; if (Kind.isMergeableConst4()) { - if (Align <= 4) { + if (Alignment <= 4) { COMDATSymName = "__real@" + scalarConstantToHexString(C); - Align = 4; + Alignment = Align(4); } } else if (Kind.isMergeableConst8()) { - if (Align <= 8) { + if (Alignment <= 8) { COMDATSymName = "__real@" + scalarConstantToHexString(C); - Align = 8; + Alignment = Align(8); } } else if (Kind.isMergeableConst16()) { // FIXME: These may not be appropriate for non-x86 architectures. - if (Align <= 16) { + if (Alignment <= 16) { COMDATSymName = "__xmm@" + scalarConstantToHexString(C); - Align = 16; + Alignment = Align(16); } } else if (Kind.isMergeableConst32()) { - if (Align <= 32) { + if (Alignment <= 32) { COMDATSymName = "__ymm@" + scalarConstantToHexString(C); - Align = 32; + Alignment = Align(32); } } @@ -1670,10 +1817,10 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant( COFF::IMAGE_COMDAT_SELECT_ANY); } - return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C, Align); + return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C, + Alignment); } - //===----------------------------------------------------------------------===// // Wasm //===----------------------------------------------------------------------===// @@ -1691,16 +1838,6 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) { return C; } -static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) { - // If we're told we have function data, then use that. - if (K.isText()) - return SectionKind::getText(); - - // Otherwise, ignore whatever section type the generic impl detected and use - // a plain data section. - return SectionKind::getData(); -} - MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { // We don't support explict section names for functions in the wasm object @@ -1711,7 +1848,13 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( StringRef Name = GO->getSection(); - Kind = getWasmKindForNamedSection(Name, Kind); + // Certain data sections we treat as named custom sections rather than + // segments within the data section. + // This could be avoided if all data segements (the wasm sense) were + // represented as their own sections (in the llvm sense). + // TODO(sbc): https://github.com/WebAssembly/tool-conventions/issues/138 + if (Name == ".llvmcmd" || Name == ".llvmbc") + Kind = SectionKind::getMetadata(); StringRef Group = ""; if (const Comdat *C = getWasmComdat(GO)) { @@ -1827,11 +1970,61 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( //===----------------------------------------------------------------------===// // XCOFF //===----------------------------------------------------------------------===// +MCSymbol * +TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, + const TargetMachine &TM) const { + if (TM.getDataSections()) + report_fatal_error("XCOFF unique data sections not yet implemented"); + + // We always use a qualname symbol for a GV that represents + // a declaration, a function descriptor, or a common symbol. + // It is inherently ambiguous when the GO represents the address of a + // function, as the GO could either represent a function descriptor or a + // function entry point. We choose to always return a function descriptor + // here. + if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) { + if (GO->isDeclarationForLinker()) + return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM)) + ->getQualNameSymbol(); + + SectionKind GOKind = getKindForGlobal(GO, TM); + if (GOKind.isText()) + return cast<MCSectionXCOFF>( + getSectionForFunctionDescriptor(cast<Function>(GO), TM)) + ->getQualNameSymbol(); + if (GOKind.isCommon() || GOKind.isBSSLocal()) + return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM)) + ->getQualNameSymbol(); + } + + // For all other cases, fall back to getSymbol to return the unqualified name. + // This could change for a GV that is a GlobalVariable when we decide to + // support -fdata-sections since we could avoid having label symbols if the + // linkage name is applied to the csect symbol. + return nullptr; +} + MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { report_fatal_error("XCOFF explicit sections not yet implemented."); } +MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference( + const GlobalObject *GO, const TargetMachine &TM) const { + assert(GO->isDeclarationForLinker() && + "Tried to get ER section for a defined global."); + + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); + + // Externals go into a csect of type ER. + return getContext().getXCOFFSection( + Name, isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA, XCOFF::XTY_ER, + SC, SectionKind::getMetadata()); +} + MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { assert(!TM.getFunctionSections() && !TM.getDataSections() && @@ -1850,16 +2043,13 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( } if (Kind.isMergeableCString()) { - if (!Kind.isMergeable1ByteCString()) - report_fatal_error("Unhandled multi-byte mergeable string kind."); - - unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment( + Align Alignment = GO->getParent()->getDataLayout().getPreferredAlign( cast<GlobalVariable>(GO)); unsigned EntrySize = getEntrySizeForKind(Kind); std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; SmallString<128> Name; - Name = SizeSpec + utostr(Align); + Name = SizeSpec + utostr(Alignment.value()); return getContext().getXCOFFSection( Name, XCOFF::XMC_RO, XCOFF::XTY_SD, @@ -1906,7 +2096,7 @@ bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection( /// information, return a section that it should be placed in. MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant( const DataLayout &DL, SectionKind Kind, const Constant *C, - unsigned &Align) const { + Align &Alignment) const { //TODO: Enable emiting constant pool to unique sections when we support it. return ReadOnlySection; } @@ -1943,11 +2133,41 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal( return XCOFF::C_HIDEXT; case GlobalValue::ExternalLinkage: case GlobalValue::CommonLinkage: + case GlobalValue::AvailableExternallyLinkage: return XCOFF::C_EXT; case GlobalValue::ExternalWeakLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: return XCOFF::C_WEAKEXT; - default: + case GlobalValue::AppendingLinkage: report_fatal_error( - "Unhandled linkage when mapping linkage to StorageClass."); + "There is no mapping that implements AppendingLinkage for XCOFF."); } + llvm_unreachable("Unknown linkage type!"); +} + +MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol( + const Function *F, const TargetMachine &TM) const { + SmallString<128> NameStr; + NameStr.push_back('.'); + getNameWithPrefix(NameStr, F, TM); + return getContext().getOrCreateSymbol(NameStr); +} + +MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor( + const Function *F, const TargetMachine &TM) const { + SmallString<128> NameStr; + getNameWithPrefix(NameStr, F, TM); + return getContext().getXCOFFSection(NameStr, XCOFF::XMC_DS, XCOFF::XTY_SD, + getStorageClassForGlobal(F), + SectionKind::getData()); +} + +MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( + const MCSymbol *Sym) const { + return getContext().getXCOFFSection( + cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), XCOFF::XMC_TC, + XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData()); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp index d794a261ecb2..4866d4c171c0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -45,3 +45,9 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { bool TargetOptions::HonorSignDependentRoundingFPMath() const { return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; } + +/// NOTE: There are targets that still do not support the debug entry values +/// production. +bool TargetOptions::ShouldEmitDebugEntryValues() const { + return SupportsDebugEntryValues || EnableDebugEntryValues; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp index d08d05d4b2ed..e0fdb0cefcb8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -114,6 +114,12 @@ static cl::opt<cl::boolOrDefault> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::ZeroOrMore); +static cl::opt<cl::boolOrDefault> DebugifyAndStripAll( + "debugify-and-strip-all-safe", cl::Hidden, + cl::desc( + "Debugify MIR before and Strip debug after " + "each pass except those known to be unsafe when debug info is present"), + cl::ZeroOrMore); enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault }; // Enable or disable the MachineOutliner. static cl::opt<RunOutliner> EnableMachineOutliner( @@ -466,7 +472,7 @@ bool TargetPassConfig::hasLimitedCodeGenPipeline() { } std::string -TargetPassConfig::getLimitedCodeGenPipelineReason(const char *Separator) const { +TargetPassConfig::getLimitedCodeGenPipelineReason(const char *Separator) { if (!hasLimitedCodeGenPipeline()) return std::string(); std::string Res; @@ -530,17 +536,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum) Stopped = true; if (Started && !Stopped) { + if (AddingMachinePasses) + addMachinePrePasses(); std::string Banner; // Construct banner message before PM->add() as that may delete the pass. if (AddingMachinePasses && (printAfter || verifyAfter)) Banner = std::string("After ") + std::string(P->getPassName()); PM->add(P); - if (AddingMachinePasses) { - if (printAfter) - addPrintPass(Banner); - if (verifyAfter) - addVerifyPass(Banner); - } + if (AddingMachinePasses) + addMachinePostPasses(Banner, /*AllowPrint*/ printAfter, + /*AllowVerify*/ verifyAfter); // Add the passes after the pass P if there is any. for (auto IP : Impl->InsertedPasses) { @@ -606,45 +611,71 @@ void TargetPassConfig::addVerifyPass(const std::string &Banner) { PM->add(createMachineVerifierPass(Banner)); } +void TargetPassConfig::addDebugifyPass() { + PM->add(createDebugifyMachineModulePass()); +} + +void TargetPassConfig::addStripDebugPass() { + PM->add(createStripDebugMachineModulePass(/*OnlyDebugified=*/true)); +} + +void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) { + if (AllowDebugify && DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe) + addDebugifyPass(); +} + +void TargetPassConfig::addMachinePostPasses(const std::string &Banner, + bool AllowPrint, bool AllowVerify, + bool AllowStrip) { + if (DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe) + addStripDebugPass(); + if (AllowPrint) + addPrintPass(Banner); + if (AllowVerify) + addVerifyPass(Banner); +} + /// Add common target configurable passes that perform LLVM IR to IR transforms /// following machine independent optimization. void TargetPassConfig::addIRPasses() { - switch (UseCFLAA) { - case CFLAAType::Steensgaard: - addPass(createCFLSteensAAWrapperPass()); - break; - case CFLAAType::Andersen: - addPass(createCFLAndersAAWrapperPass()); - break; - case CFLAAType::Both: - addPass(createCFLAndersAAWrapperPass()); - addPass(createCFLSteensAAWrapperPass()); - break; - default: - break; - } - - // Basic AliasAnalysis support. - // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that - // BasicAliasAnalysis wins if they disagree. This is intended to help - // support "obvious" type-punning idioms. - addPass(createTypeBasedAAWrapperPass()); - addPass(createScopedNoAliasAAWrapperPass()); - addPass(createBasicAAWrapperPass()); - // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. if (!DisableVerify) addPass(createVerifierPass()); - // Run loop strength reduction before anything else. - if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { - addPass(createLoopStrengthReducePass()); - if (PrintLSR) - addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); - } - if (getOptLevel() != CodeGenOpt::None) { + switch (UseCFLAA) { + case CFLAAType::Steensgaard: + addPass(createCFLSteensAAWrapperPass()); + break; + case CFLAAType::Andersen: + addPass(createCFLAndersAAWrapperPass()); + break; + case CFLAAType::Both: + addPass(createCFLAndersAAWrapperPass()); + addPass(createCFLSteensAAWrapperPass()); + break; + default: + break; + } + + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + addPass(createTypeBasedAAWrapperPass()); + addPass(createScopedNoAliasAAWrapperPass()); + addPass(createBasicAAWrapperPass()); + + // Run loop strength reduction before anything else. + if (!DisableLSR) { + addPass(createCanonicalizeFreezeInLoopsPass()); + addPass(createLoopStrengthReducePass()); + if (PrintLSR) + addPass(createPrintFunctionPass(dbgs(), + "\n\n*** Code after LSR ***\n")); + } + // The MergeICmpsPass tries to create memcmp calls by grouping sequences of // loads and compares. ExpandMemCmpPass then tries to expand those calls // into optimally-sized loads and compares. The transforms are enabled by a @@ -695,18 +726,18 @@ void TargetPassConfig::addPassesToHandleExceptions() { // removed from the parent invoke(s). This could happen when a landing // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. - addPass(createSjLjEHPreparePass()); + addPass(createSjLjEHPreparePass(TM)); LLVM_FALLTHROUGH; case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - addPass(createDwarfEHPass()); + addPass(createDwarfEHPass(getOptLevel())); break; case ExceptionHandling::WinEH: // We support using both GCC-style and MSVC-style exceptions on Windows, so // add both preparation passes. Each pass will only actually run if it // recognizes the personality function. addPass(createWinEHPass()); - addPass(createDwarfEHPass()); + addPass(createDwarfEHPass(getOptLevel())); break; case ExceptionHandling::Wasm: // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs @@ -785,6 +816,19 @@ bool TargetPassConfig::addCoreISelPasses() { TM->setGlobalISel(true); } + // FIXME: Injecting into the DAGISel pipeline seems to cause issues with + // analyses needing to be re-run. This can result in being unable to + // schedule passes (particularly with 'Function Alias Analysis + // Results'). It's not entirely clear why but AFAICT this seems to be + // due to one FunctionPassManager not being able to use analyses from a + // previous one. As we're injecting a ModulePass we break the usual + // pass manager into two. GlobalISel with the fallback path disabled + // and -run-pass seem to be unaffected. The majority of GlobalISel + // testing uses -run-pass so this probably isn't too bad. + SaveAndRestore<bool> SavedDebugifyIsSafe(DebugifyIsSafe); + if (Selector != SelectorType::GlobalISel || !isGlobalISelAbortEnabled()) + DebugifyIsSafe = false; + // Add instruction selector passes. if (Selector == SelectorType::GlobalISel) { SaveAndRestore<bool> SavedAddingMachinePasses(AddingMachinePasses, true); @@ -892,7 +936,7 @@ void TargetPassConfig::addMachinePasses() { } else { // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(&LocalStackSlotAllocationID, false); + addPass(&LocalStackSlotAllocationID); } if (TM->Options.EnableIPRA) @@ -901,6 +945,11 @@ void TargetPassConfig::addMachinePasses() { // Run pre-ra passes. addPreRegAlloc(); + // Debugifying the register allocator passes seems to provoke some + // non-determinism that affects CodeGen and there doesn't seem to be a point + // where it becomes safe again so stop debugifying here. + DebugifyIsSafe = false; + // Run register allocation and passes that are tightly coupled with it, // including phi elimination and scheduling. if (getOptimizeRegAlloc()) @@ -911,6 +960,8 @@ void TargetPassConfig::addMachinePasses() { // Run post-ra passes. addPostRegAlloc(); + addPass(&FixupStatepointCallerSavedID); + // Insert prolog/epilog code. Eliminate abstract frame index references... if (getOptLevel() != CodeGenOpt::None) { addPass(&PostRAMachineSinkingID); @@ -957,10 +1008,10 @@ void TargetPassConfig::addMachinePasses() { addBlockPlacement(); // Insert before XRay Instrumentation. - addPass(&FEntryInserterID, false); + addPass(&FEntryInserterID); - addPass(&XRayInstrumentationID, false); - addPass(&PatchableFunctionID, false); + addPass(&XRayInstrumentationID); + addPass(&PatchableFunctionID); addPreEmitPass(); @@ -969,6 +1020,8 @@ void TargetPassConfig::addMachinePasses() { // clobbered registers, to be used to optimize call sites. addPass(createRegUsageInfoCollector()); + // FIXME: Some backends are incompatible with running the verifier after + // addPreEmitPass. Maybe only pass "false" here for those targets? addPass(&FuncletLayoutID, false); addPass(&StackMapLivenessID, false); @@ -983,6 +1036,9 @@ void TargetPassConfig::addMachinePasses() { addPass(createMachineOutlinerPass(RunOnAllFunctions)); } + if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) + addPass(llvm::createBBSectionsPreparePass(TM->getBBSectionsFuncListBuf())); + // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); @@ -996,15 +1052,15 @@ void TargetPassConfig::addMachineSSAOptimization() { // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. - addPass(&OptimizePHIsID, false); + addPass(&OptimizePHIsID); // This pass merges large allocas. StackSlotColoring is a different pass // which merges spill slots. - addPass(&StackColoringID, false); + addPass(&StackColoringID); // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(&LocalStackSlotAllocationID, false); + addPass(&LocalStackSlotAllocationID); // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only @@ -1017,8 +1073,8 @@ void TargetPassConfig::addMachineSSAOptimization() { // loop info, just like LICM and CSE below. addILPOpts(); - addPass(&EarlyMachineLICMID, false); - addPass(&MachineCSEID, false); + addPass(&EarlyMachineLICMID); + addPass(&MachineCSEID); addPass(&MachineSinkingID); @@ -1110,6 +1166,7 @@ bool TargetPassConfig::addRegAssignmentOptimized() { // Finally rewrite virtual registers. addPass(&VirtRegRewriterID); + // Perform stack slot coloring and post-ra machine LICM. // // FIXME: Re-enable coloring with register when it's capable of adding diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp index e5592c31098a..e2ef12d8ac77 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -13,19 +13,22 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MachineValueType.h" @@ -39,6 +42,12 @@ using namespace llvm; +static cl::opt<unsigned> + HugeSizeForSplit("huge-size-for-split", cl::Hidden, + cl::desc("A threshold of live range size which may cause " + "high compile time cost in global splitting."), + cl::init(5000)); + TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, const char *const *SRINames, @@ -55,8 +64,19 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, TargetRegisterInfo::~TargetRegisterInfo() = default; -void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg) - const { +bool TargetRegisterInfo::shouldRegionSplitForVirtReg( + const MachineFunction &MF, const LiveInterval &VirtReg) const { + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg); + if (MI && TII->isTriviallyReMaterializable(*MI) && + VirtReg.size() > HugeSizeForSplit) + return false; + return true; +} + +void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, + MCRegister Reg) const { for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI) RegisterSet.set(*AI); } @@ -150,7 +170,7 @@ Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo, +Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo, const TargetRegisterInfo *TRI) { return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) { if (RegInfo.getRegClassOrNull(Reg)) @@ -187,7 +207,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { /// register of the given type, picking the most sub register class of /// the right type that contains this physreg. const TargetRegisterClass * -TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { +TargetRegisterInfo::getMinimalPhysRegClass(MCRegister reg, MVT VT) const { assert(Register::isPhysicalRegister(reg) && "reg must be a physical register"); @@ -379,18 +399,15 @@ bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, } // Compute target-independent register allocator hints to help eliminate copies. -bool -TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, - ArrayRef<MCPhysReg> Order, - SmallVectorImpl<MCPhysReg> &Hints, - const MachineFunction &MF, - const VirtRegMap *VRM, - const LiveRegMatrix *Matrix) const { +bool TargetRegisterInfo::getRegAllocationHints( + Register VirtReg, ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, + const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI = + const std::pair<Register, SmallVector<Register, 4>> &Hints_MRI = MRI.getRegAllocationHints(VirtReg); - SmallSet<unsigned, 32> HintedRegs; + SmallSet<Register, 32> HintedRegs; // First hint may be a target hint. bool Skip = (Hints_MRI.first != 0); for (auto Reg : Hints_MRI.second) { @@ -400,8 +417,8 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, } // Target-independent hints are either a physical or a virtual register. - unsigned Phys = Reg; - if (VRM && Register::isVirtualRegister(Phys)) + Register Phys = Reg; + if (VRM && Phys.isVirtual()) Phys = VRM->getPhys(Phys); // Don't add the same reg twice (Hints_MRI may contain multiple virtual @@ -409,7 +426,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (!HintedRegs.insert(Phys).second) continue; // Check that Phys is a valid hint in VirtReg's register class. - if (!Register::isPhysicalRegister(Phys)) + if (!Phys.isPhysical()) continue; if (MRI.isReserved(Phys)) continue; @@ -426,7 +443,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, } bool TargetRegisterInfo::isCalleeSavedPhysReg( - unsigned PhysReg, const MachineFunction &MF) const { + MCRegister PhysReg, const MachineFunction &MF) const { if (PhysReg == 0) return false; const uint32_t *callerPreservedRegs = @@ -448,8 +465,8 @@ bool TargetRegisterInfo::needsStackRealignment( const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const Function &F = MF.getFunction(); - unsigned StackAlign = TFI->getStackAlignment(); - bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) || + Align StackAlign = TFI->getStackAlign(); + bool requiresRealignment = ((MFI.getMaxAlign() > StackAlign) || F.hasFnAttribute(Attribute::StackAlignment)); if (F.hasFnAttribute("stackrealign") || requiresRealignment) { if (canRealignStack(MF)) @@ -469,10 +486,11 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, return true; } -unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg, - const MachineRegisterInfo &MRI) const { +unsigned +TargetRegisterInfo::getRegSizeInBits(Register Reg, + const MachineRegisterInfo &MRI) const { const TargetRegisterClass *RC{}; - if (Register::isPhysicalRegister(Reg)) { + if (Reg.isPhysical()) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. @@ -491,15 +509,15 @@ unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg, return getRegSizeInBits(*RC); } -unsigned -TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg, +Register +TargetRegisterInfo::lookThruCopyLike(Register SrcReg, const MachineRegisterInfo *MRI) const { while (true) { const MachineInstr *MI = MRI->getVRegDef(SrcReg); if (!MI->isCopyLike()) return SrcReg; - unsigned CopySrcReg; + Register CopySrcReg; if (MI->isCopy()) CopySrcReg = MI->getOperand(1).getReg(); else { @@ -507,7 +525,7 @@ TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg, CopySrcReg = MI->getOperand(2).getReg(); } - if (!Register::isVirtualRegister(CopySrcReg)) + if (!CopySrcReg.isVirtual()) return CopySrcReg; SrcReg = CopySrcReg; @@ -516,7 +534,7 @@ TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg, #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD -void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, +void TargetRegisterInfo::dumpReg(Register Reg, unsigned SubRegIndex, const TargetRegisterInfo *TRI) { dbgs() << printReg(Reg, TRI, SubRegIndex) << "\n"; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 2b1ffab74b6f..de336abe607a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1238,21 +1238,18 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, Dist)) { MadeChange = true; ++NumCommuted; - if (AggressiveCommute) { + if (AggressiveCommute) ++NumAggrCommuted; - // There might be more than two commutable operands, update BaseOp and - // continue scanning. - // FIXME: This assumes that the new instruction's operands are in the - // same positions and were simply swapped. - BaseOpReg = OtherOpReg; - BaseOpKilled = OtherOpKilled; - // Resamples OpsNum in case the number of operands was reduced. This - // happens with X86. - OpsNum = MI->getDesc().getNumOperands(); - continue; - } - // If this was a commute based on kill, we won't do better continuing. - return MadeChange; + + // There might be more than two commutable operands, update BaseOp and + // continue scanning. + // FIXME: This assumes that the new instruction's operands are in the + // same positions and were simply swapped. + BaseOpReg = OtherOpReg; + BaseOpKilled = OtherOpKilled; + // Resamples OpsNum in case the number of operands was reduced. This + // happens with X86. + OpsNum = MI->getDesc().getNumOperands(); } } return MadeChange; @@ -1422,7 +1419,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, LV->addVirtualRegisterKilled(Reg, *NewMIs[1]); } - SmallVector<unsigned, 4> OrigRegs; + SmallVector<Register, 4> OrigRegs; if (LIS) { for (const MachineOperand &MO : MI.operands()) { if (MO.isReg()) @@ -1690,6 +1687,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // This pass takes the function out of SSA form. MRI->leaveSSA(); + // This pass will rewrite the tied-def to meet the RegConstraint. + MF->getProperties() + .set(MachineFunctionProperties::Property::TiedOpsRewritten); + TiedOperandMap TiedOperands; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { @@ -1805,7 +1806,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { llvm_unreachable(nullptr); } - SmallVector<unsigned, 4> OrigRegs; + SmallVector<Register, 4> OrigRegs; if (LIS) { OrigRegs.push_back(MI.getOperand(0).getReg()); for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp index e8b39c037693..807babdcaf25 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp @@ -40,6 +40,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "type-promotion" #define PASS_NAME "Type Promotion" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp index b770e1d94488..f5dc589a98cb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -81,7 +81,7 @@ namespace { class UnreachableMachineBlockElim : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override; - MachineModuleInfo *MMI; + public: static char ID; // Pass identification, replacement for typeid UnreachableMachineBlockElim() : MachineFunctionPass(ID) {} @@ -104,8 +104,6 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { df_iterator_default_set<MachineBasicBlock*> Reachable; bool ModifiedPHI = false; - auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); - MMI = MMIWP ? &MMIWP->getMMI() : nullptr; MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); @@ -151,7 +149,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { // Remove any call site information for calls in the block. for (auto &I : DeadBlocks[i]->instrs()) - if (I.isCall(MachineInstr::IgnoreBundle)) + if (I.shouldUpdateCallSiteInfo()) DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I); DeadBlocks[i]->eraseFromParent(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp index 41cbdf035558..66bcdd9b2c4a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp @@ -22,7 +22,13 @@ EVT EVT::changeExtendedTypeToInteger() const { EVT EVT::changeExtendedVectorElementTypeToInteger() const { LLVMContext &Context = LLVMTy->getContext(); EVT IntTy = getIntegerVT(Context, getScalarSizeInBits()); - return getVectorVT(Context, IntTy, getVectorNumElements()); + return getVectorVT(Context, IntTy, getVectorNumElements(), + isScalableVector()); +} + +EVT EVT::changeExtendedVectorElementType(EVT EltVT) const { + LLVMContext &Context = LLVMTy->getContext(); + return getVectorVT(Context, EltVT, getVectorElementCount()); } EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) { @@ -32,10 +38,19 @@ EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) { return VT; } -EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, - unsigned NumElements) { +EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, + bool IsScalable) { + EVT ResultVT; + ResultVT.LLVMTy = + VectorType::get(VT.getTypeForEVT(Context), NumElements, IsScalable); + assert(ResultVT.isExtended() && "Type is not extended!"); + return ResultVT; +} + +EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) { EVT ResultVT; - ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements); + ResultVT.LLVMTy = + VectorType::get(VT.getTypeForEVT(Context), {EC.Min, EC.Scalable}); assert(ResultVT.isExtended() && "Type is not extended!"); return ResultVT; } @@ -92,6 +107,14 @@ bool EVT::isExtended2048BitVector() const { return isExtendedVector() && getExtendedSizeInBits() == 2048; } +bool EVT::isExtendedFixedLengthVector() const { + return isExtendedVector() && isa<FixedVectorType>(LLVMTy); +} + +bool EVT::isExtendedScalableVector() const { + return isExtendedVector() && isa<ScalableVectorType>(LLVMTy); +} + EVT EVT::getExtendedVectorElementType() const { assert(isExtended() && "Type is not extended!"); return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType()); @@ -99,7 +122,19 @@ EVT EVT::getExtendedVectorElementType() const { unsigned EVT::getExtendedVectorNumElements() const { assert(isExtended() && "Type is not extended!"); - return cast<VectorType>(LLVMTy)->getNumElements(); + ElementCount EC = cast<VectorType>(LLVMTy)->getElementCount(); + if (EC.Scalable) { + WithColor::warning() + << "The code that requested the fixed number of elements has made the " + "assumption that this vector is not scalable. This assumption was " + "not correct, and this may lead to broken code\n"; + } + return EC.Min; +} + +ElementCount EVT::getExtendedVectorElementCount() const { + assert(isExtended() && "Type is not extended!"); + return cast<VectorType>(LLVMTy)->getElementCount(); } TypeSize EVT::getExtendedSizeInBits() const { @@ -116,13 +151,15 @@ std::string EVT::getEVTString() const { switch (V.SimpleTy) { default: if (isVector()) - return (isScalableVector() ? "nxv" : "v") + utostr(getVectorNumElements()) + return (isScalableVector() ? "nxv" : "v") + + utostr(getVectorElementCount().Min) + getVectorElementType().getEVTString(); if (isInteger()) return "i" + utostr(getSizeInBits()); if (isFloatingPoint()) return "f" + utostr(getSizeInBits()); llvm_unreachable("Invalid EVT!"); + case MVT::bf16: return "bf16"; case MVT::ppcf128: return "ppcf128"; case MVT::isVoid: return "isVoid"; case MVT::Other: return "ch"; @@ -150,170 +187,285 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::i64: return Type::getInt64Ty(Context); case MVT::i128: return IntegerType::get(Context, 128); case MVT::f16: return Type::getHalfTy(Context); + case MVT::bf16: return Type::getBFloatTy(Context); case MVT::f32: return Type::getFloatTy(Context); case MVT::f64: return Type::getDoubleTy(Context); case MVT::f80: return Type::getX86_FP80Ty(Context); case MVT::f128: return Type::getFP128Ty(Context); case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); - case MVT::v1i1: return VectorType::get(Type::getInt1Ty(Context), 1); - case MVT::v2i1: return VectorType::get(Type::getInt1Ty(Context), 2); - case MVT::v4i1: return VectorType::get(Type::getInt1Ty(Context), 4); - case MVT::v8i1: return VectorType::get(Type::getInt1Ty(Context), 8); - case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16); - case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); - case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); - case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128); - case MVT::v256i1: return VectorType::get(Type::getInt1Ty(Context), 256); - case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512); - case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024); - case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); - case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2); - case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4); - case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8); - case MVT::v16i8: return VectorType::get(Type::getInt8Ty(Context), 16); - case MVT::v32i8: return VectorType::get(Type::getInt8Ty(Context), 32); - case MVT::v64i8: return VectorType::get(Type::getInt8Ty(Context), 64); - case MVT::v128i8: return VectorType::get(Type::getInt8Ty(Context), 128); - case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256); - case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1); - case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2); - case MVT::v3i16: return VectorType::get(Type::getInt16Ty(Context), 3); - case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4); - case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8); - case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16); - case MVT::v32i16: return VectorType::get(Type::getInt16Ty(Context), 32); - case MVT::v64i16: return VectorType::get(Type::getInt16Ty(Context), 64); - case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128); - case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1); - case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2); - case MVT::v3i32: return VectorType::get(Type::getInt32Ty(Context), 3); - case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4); - case MVT::v5i32: return VectorType::get(Type::getInt32Ty(Context), 5); - case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8); - case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16); - case MVT::v32i32: return VectorType::get(Type::getInt32Ty(Context), 32); - case MVT::v64i32: return VectorType::get(Type::getInt32Ty(Context), 64); - case MVT::v128i32: return VectorType::get(Type::getInt32Ty(Context), 128); - case MVT::v256i32: return VectorType::get(Type::getInt32Ty(Context), 256); - case MVT::v512i32: return VectorType::get(Type::getInt32Ty(Context), 512); - case MVT::v1024i32:return VectorType::get(Type::getInt32Ty(Context), 1024); - case MVT::v2048i32:return VectorType::get(Type::getInt32Ty(Context), 2048); - case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); - case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); - case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); - case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); - case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16); - case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32); - case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1); - case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); - case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3); - case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4); - case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); - case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16); - case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32); - case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); - case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); - case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3); - case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); - case MVT::v5f32: return VectorType::get(Type::getFloatTy(Context), 5); - case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); - case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16); - case MVT::v32f32: return VectorType::get(Type::getFloatTy(Context), 32); - case MVT::v64f32: return VectorType::get(Type::getFloatTy(Context), 64); - case MVT::v128f32: return VectorType::get(Type::getFloatTy(Context), 128); - case MVT::v256f32: return VectorType::get(Type::getFloatTy(Context), 256); - case MVT::v512f32: return VectorType::get(Type::getFloatTy(Context), 512); - case MVT::v1024f32:return VectorType::get(Type::getFloatTy(Context), 1024); - case MVT::v2048f32:return VectorType::get(Type::getFloatTy(Context), 2048); - case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1); - case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); - case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); - case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); - case MVT::nxv1i1: - return VectorType::get(Type::getInt1Ty(Context), 1, /*Scalable=*/ true); - case MVT::nxv2i1: - return VectorType::get(Type::getInt1Ty(Context), 2, /*Scalable=*/ true); - case MVT::nxv4i1: - return VectorType::get(Type::getInt1Ty(Context), 4, /*Scalable=*/ true); - case MVT::nxv8i1: - return VectorType::get(Type::getInt1Ty(Context), 8, /*Scalable=*/ true); - case MVT::nxv16i1: - return VectorType::get(Type::getInt1Ty(Context), 16, /*Scalable=*/ true); - case MVT::nxv32i1: - return VectorType::get(Type::getInt1Ty(Context), 32, /*Scalable=*/ true); - case MVT::nxv1i8: - return VectorType::get(Type::getInt8Ty(Context), 1, /*Scalable=*/ true); - case MVT::nxv2i8: - return VectorType::get(Type::getInt8Ty(Context), 2, /*Scalable=*/ true); - case MVT::nxv4i8: - return VectorType::get(Type::getInt8Ty(Context), 4, /*Scalable=*/ true); - case MVT::nxv8i8: - return VectorType::get(Type::getInt8Ty(Context), 8, /*Scalable=*/ true); - case MVT::nxv16i8: - return VectorType::get(Type::getInt8Ty(Context), 16, /*Scalable=*/ true); - case MVT::nxv32i8: - return VectorType::get(Type::getInt8Ty(Context), 32, /*Scalable=*/ true); - case MVT::nxv1i16: - return VectorType::get(Type::getInt16Ty(Context), 1, /*Scalable=*/ true); - case MVT::nxv2i16: - return VectorType::get(Type::getInt16Ty(Context), 2, /*Scalable=*/ true); - case MVT::nxv4i16: - return VectorType::get(Type::getInt16Ty(Context), 4, /*Scalable=*/ true); - case MVT::nxv8i16: - return VectorType::get(Type::getInt16Ty(Context), 8, /*Scalable=*/ true); + case MVT::v1i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 1); + case MVT::v2i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 2); + case MVT::v4i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 4); + case MVT::v8i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 8); + case MVT::v16i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 16); + case MVT::v32i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 32); + case MVT::v64i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 64); + case MVT::v128i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 128); + case MVT::v256i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 256); + case MVT::v512i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 512); + case MVT::v1024i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 1024); + case MVT::v1i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 1); + case MVT::v2i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 2); + case MVT::v4i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 4); + case MVT::v8i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 8); + case MVT::v16i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 16); + case MVT::v32i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 32); + case MVT::v64i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 64); + case MVT::v128i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 128); + case MVT::v256i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 256); + case MVT::v1i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 1); + case MVT::v2i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 2); + case MVT::v3i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 3); + case MVT::v4i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 4); + case MVT::v8i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 8); + case MVT::v16i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 16); + case MVT::v32i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 32); + case MVT::v64i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 64); + case MVT::v128i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 128); + case MVT::v1i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 1); + case MVT::v2i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 2); + case MVT::v3i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 3); + case MVT::v4i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 4); + case MVT::v5i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 5); + case MVT::v8i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 8); + case MVT::v16i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 16); + case MVT::v32i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 32); + case MVT::v64i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 64); + case MVT::v128i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 128); + case MVT::v256i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 256); + case MVT::v512i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 512); + case MVT::v1024i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 1024); + case MVT::v2048i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 2048); + case MVT::v1i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 1); + case MVT::v2i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 2); + case MVT::v4i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 4); + case MVT::v8i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 8); + case MVT::v16i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 16); + case MVT::v32i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 32); + case MVT::v1i128: + return FixedVectorType::get(Type::getInt128Ty(Context), 1); + case MVT::v2f16: + return FixedVectorType::get(Type::getHalfTy(Context), 2); + case MVT::v3f16: + return FixedVectorType::get(Type::getHalfTy(Context), 3); + case MVT::v4f16: + return FixedVectorType::get(Type::getHalfTy(Context), 4); + case MVT::v8f16: + return FixedVectorType::get(Type::getHalfTy(Context), 8); + case MVT::v16f16: + return FixedVectorType::get(Type::getHalfTy(Context), 16); + case MVT::v32f16: + return FixedVectorType::get(Type::getHalfTy(Context), 32); + case MVT::v64f16: + return FixedVectorType::get(Type::getBFloatTy(Context), 64); + case MVT::v128f16: + return FixedVectorType::get(Type::getBFloatTy(Context), 128); + case MVT::v2bf16: + return FixedVectorType::get(Type::getBFloatTy(Context), 2); + case MVT::v3bf16: + return FixedVectorType::get(Type::getBFloatTy(Context), 3); + case MVT::v4bf16: + return FixedVectorType::get(Type::getBFloatTy(Context), 4); + case MVT::v8bf16: + return FixedVectorType::get(Type::getBFloatTy(Context), 8); + case MVT::v16bf16: + return FixedVectorType::get(Type::getBFloatTy(Context), 16); + case MVT::v32bf16: + return FixedVectorType::get(Type::getBFloatTy(Context), 32); + case MVT::v64bf16: + return FixedVectorType::get(Type::getBFloatTy(Context), 64); + case MVT::v128bf16: + return FixedVectorType::get(Type::getBFloatTy(Context), 128); + case MVT::v1f32: + return FixedVectorType::get(Type::getFloatTy(Context), 1); + case MVT::v2f32: + return FixedVectorType::get(Type::getFloatTy(Context), 2); + case MVT::v3f32: + return FixedVectorType::get(Type::getFloatTy(Context), 3); + case MVT::v4f32: + return FixedVectorType::get(Type::getFloatTy(Context), 4); + case MVT::v5f32: + return FixedVectorType::get(Type::getFloatTy(Context), 5); + case MVT::v8f32: + return FixedVectorType::get(Type::getFloatTy(Context), 8); + case MVT::v16f32: + return FixedVectorType::get(Type::getFloatTy(Context), 16); + case MVT::v32f32: + return FixedVectorType::get(Type::getFloatTy(Context), 32); + case MVT::v64f32: + return FixedVectorType::get(Type::getFloatTy(Context), 64); + case MVT::v128f32: + return FixedVectorType::get(Type::getFloatTy(Context), 128); + case MVT::v256f32: + return FixedVectorType::get(Type::getFloatTy(Context), 256); + case MVT::v512f32: + return FixedVectorType::get(Type::getFloatTy(Context), 512); + case MVT::v1024f32: + return FixedVectorType::get(Type::getFloatTy(Context), 1024); + case MVT::v2048f32: + return FixedVectorType::get(Type::getFloatTy(Context), 2048); + case MVT::v1f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 1); + case MVT::v2f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 2); + case MVT::v4f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 4); + case MVT::v8f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 8); + case MVT::v16f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 16); + case MVT::v32f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 32); + case MVT::nxv1i1: + return ScalableVectorType::get(Type::getInt1Ty(Context), 1); + case MVT::nxv2i1: + return ScalableVectorType::get(Type::getInt1Ty(Context), 2); + case MVT::nxv4i1: + return ScalableVectorType::get(Type::getInt1Ty(Context), 4); + case MVT::nxv8i1: + return ScalableVectorType::get(Type::getInt1Ty(Context), 8); + case MVT::nxv16i1: + return ScalableVectorType::get(Type::getInt1Ty(Context), 16); + case MVT::nxv32i1: + return ScalableVectorType::get(Type::getInt1Ty(Context), 32); + case MVT::nxv64i1: + return ScalableVectorType::get(Type::getInt1Ty(Context), 64); + case MVT::nxv1i8: + return ScalableVectorType::get(Type::getInt8Ty(Context), 1); + case MVT::nxv2i8: + return ScalableVectorType::get(Type::getInt8Ty(Context), 2); + case MVT::nxv4i8: + return ScalableVectorType::get(Type::getInt8Ty(Context), 4); + case MVT::nxv8i8: + return ScalableVectorType::get(Type::getInt8Ty(Context), 8); + case MVT::nxv16i8: + return ScalableVectorType::get(Type::getInt8Ty(Context), 16); + case MVT::nxv32i8: + return ScalableVectorType::get(Type::getInt8Ty(Context), 32); + case MVT::nxv64i8: + return ScalableVectorType::get(Type::getInt8Ty(Context), 64); + case MVT::nxv1i16: + return ScalableVectorType::get(Type::getInt16Ty(Context), 1); + case MVT::nxv2i16: + return ScalableVectorType::get(Type::getInt16Ty(Context), 2); + case MVT::nxv4i16: + return ScalableVectorType::get(Type::getInt16Ty(Context), 4); + case MVT::nxv8i16: + return ScalableVectorType::get(Type::getInt16Ty(Context), 8); case MVT::nxv16i16: - return VectorType::get(Type::getInt16Ty(Context), 16, /*Scalable=*/ true); + return ScalableVectorType::get(Type::getInt16Ty(Context), 16); case MVT::nxv32i16: - return VectorType::get(Type::getInt16Ty(Context), 32, /*Scalable=*/ true); - case MVT::nxv1i32: - return VectorType::get(Type::getInt32Ty(Context), 1, /*Scalable=*/ true); - case MVT::nxv2i32: - return VectorType::get(Type::getInt32Ty(Context), 2, /*Scalable=*/ true); - case MVT::nxv4i32: - return VectorType::get(Type::getInt32Ty(Context), 4, /*Scalable=*/ true); - case MVT::nxv8i32: - return VectorType::get(Type::getInt32Ty(Context), 8, /*Scalable=*/ true); + return ScalableVectorType::get(Type::getInt16Ty(Context), 32); + case MVT::nxv1i32: + return ScalableVectorType::get(Type::getInt32Ty(Context), 1); + case MVT::nxv2i32: + return ScalableVectorType::get(Type::getInt32Ty(Context), 2); + case MVT::nxv4i32: + return ScalableVectorType::get(Type::getInt32Ty(Context), 4); + case MVT::nxv8i32: + return ScalableVectorType::get(Type::getInt32Ty(Context), 8); case MVT::nxv16i32: - return VectorType::get(Type::getInt32Ty(Context), 16,/*Scalable=*/ true); + return ScalableVectorType::get(Type::getInt32Ty(Context), 16); case MVT::nxv32i32: - return VectorType::get(Type::getInt32Ty(Context), 32,/*Scalable=*/ true); - case MVT::nxv1i64: - return VectorType::get(Type::getInt64Ty(Context), 1, /*Scalable=*/ true); - case MVT::nxv2i64: - return VectorType::get(Type::getInt64Ty(Context), 2, /*Scalable=*/ true); - case MVT::nxv4i64: - return VectorType::get(Type::getInt64Ty(Context), 4, /*Scalable=*/ true); - case MVT::nxv8i64: - return VectorType::get(Type::getInt64Ty(Context), 8, /*Scalable=*/ true); + return ScalableVectorType::get(Type::getInt32Ty(Context), 32); + case MVT::nxv1i64: + return ScalableVectorType::get(Type::getInt64Ty(Context), 1); + case MVT::nxv2i64: + return ScalableVectorType::get(Type::getInt64Ty(Context), 2); + case MVT::nxv4i64: + return ScalableVectorType::get(Type::getInt64Ty(Context), 4); + case MVT::nxv8i64: + return ScalableVectorType::get(Type::getInt64Ty(Context), 8); case MVT::nxv16i64: - return VectorType::get(Type::getInt64Ty(Context), 16, /*Scalable=*/ true); + return ScalableVectorType::get(Type::getInt64Ty(Context), 16); case MVT::nxv32i64: - return VectorType::get(Type::getInt64Ty(Context), 32, /*Scalable=*/ true); - case MVT::nxv2f16: - return VectorType::get(Type::getHalfTy(Context), 2, /*Scalable=*/ true); - case MVT::nxv4f16: - return VectorType::get(Type::getHalfTy(Context), 4, /*Scalable=*/ true); - case MVT::nxv8f16: - return VectorType::get(Type::getHalfTy(Context), 8, /*Scalable=*/ true); - case MVT::nxv1f32: - return VectorType::get(Type::getFloatTy(Context), 1, /*Scalable=*/ true); - case MVT::nxv2f32: - return VectorType::get(Type::getFloatTy(Context), 2, /*Scalable=*/ true); - case MVT::nxv4f32: - return VectorType::get(Type::getFloatTy(Context), 4, /*Scalable=*/ true); - case MVT::nxv8f32: - return VectorType::get(Type::getFloatTy(Context), 8, /*Scalable=*/ true); + return ScalableVectorType::get(Type::getInt64Ty(Context), 32); + case MVT::nxv1f16: + return ScalableVectorType::get(Type::getHalfTy(Context), 1); + case MVT::nxv2f16: + return ScalableVectorType::get(Type::getHalfTy(Context), 2); + case MVT::nxv4f16: + return ScalableVectorType::get(Type::getHalfTy(Context), 4); + case MVT::nxv8f16: + return ScalableVectorType::get(Type::getHalfTy(Context), 8); + case MVT::nxv16f16: + return ScalableVectorType::get(Type::getHalfTy(Context), 16); + case MVT::nxv32f16: + return ScalableVectorType::get(Type::getHalfTy(Context), 32); + case MVT::nxv2bf16: + return ScalableVectorType::get(Type::getBFloatTy(Context), 2); + case MVT::nxv4bf16: + return ScalableVectorType::get(Type::getBFloatTy(Context), 4); + case MVT::nxv8bf16: + return ScalableVectorType::get(Type::getBFloatTy(Context), 8); + case MVT::nxv1f32: + return ScalableVectorType::get(Type::getFloatTy(Context), 1); + case MVT::nxv2f32: + return ScalableVectorType::get(Type::getFloatTy(Context), 2); + case MVT::nxv4f32: + return ScalableVectorType::get(Type::getFloatTy(Context), 4); + case MVT::nxv8f32: + return ScalableVectorType::get(Type::getFloatTy(Context), 8); case MVT::nxv16f32: - return VectorType::get(Type::getFloatTy(Context), 16, /*Scalable=*/ true); - case MVT::nxv1f64: - return VectorType::get(Type::getDoubleTy(Context), 1, /*Scalable=*/ true); - case MVT::nxv2f64: - return VectorType::get(Type::getDoubleTy(Context), 2, /*Scalable=*/ true); - case MVT::nxv4f64: - return VectorType::get(Type::getDoubleTy(Context), 4, /*Scalable=*/ true); - case MVT::nxv8f64: - return VectorType::get(Type::getDoubleTy(Context), 8, /*Scalable=*/ true); + return ScalableVectorType::get(Type::getFloatTy(Context), 16); + case MVT::nxv1f64: + return ScalableVectorType::get(Type::getDoubleTy(Context), 1); + case MVT::nxv2f64: + return ScalableVectorType::get(Type::getDoubleTy(Context), 2); + case MVT::nxv4f64: + return ScalableVectorType::get(Type::getDoubleTy(Context), 4); + case MVT::nxv8f64: + return ScalableVectorType::get(Type::getDoubleTy(Context), 8); case MVT::Metadata: return Type::getMetadataTy(Context); } } @@ -331,6 +483,7 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ case Type::IntegerTyID: return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth()); case Type::HalfTyID: return MVT(MVT::f16); + case Type::BFloatTyID: return MVT(MVT::bf16); case Type::FloatTyID: return MVT(MVT::f32); case Type::DoubleTyID: return MVT(MVT::f64); case Type::X86_FP80TyID: return MVT(MVT::f80); @@ -338,7 +491,8 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ case Type::FP128TyID: return MVT(MVT::f128); case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); case Type::PointerTyID: return MVT(MVT::iPTR); - case Type::VectorTyID: { + case Type::FixedVectorTyID: + case Type::ScalableVectorTyID: { VectorType *VTy = cast<VectorType>(Ty); return getVectorVT( getVT(VTy->getElementType(), /*HandleUnknown=*/ false), @@ -356,7 +510,8 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ return MVT::getVT(Ty, HandleUnknown); case Type::IntegerTyID: return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth()); - case Type::VectorTyID: { + case Type::FixedVectorTyID: + case Type::ScalableVectorTyID: { VectorType *VTy = cast<VectorType>(Ty); return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), /*HandleUnknown=*/ false), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp index 5312e2eea96b..2c83f13b651b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp @@ -92,8 +92,8 @@ void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) { unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { unsigned Size = TRI->getSpillSize(*RC); - unsigned Align = TRI->getSpillAlignment(*RC); - int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Align); + Align Alignment = TRI->getSpillAlign(*RC); + int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Alignment); ++NumSpillSlots; return SS; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp index 1582f12ad580..44f4fe2ff9b1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -77,9 +77,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/BreadthFirstIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -118,14 +120,17 @@ class WasmEHPrepare : public FunctionPass { bool prepareEHPads(Function &F); bool prepareThrows(Function &F); - void prepareEHPad(BasicBlock *BB, bool NeedLSDA, unsigned Index = 0); + bool IsEHPadFunctionsSetUp = false; + void setupEHPadFunctions(Function &F); + void prepareEHPad(BasicBlock *BB, bool NeedPersonality, bool NeedLSDA = false, + unsigned Index = 0); void prepareTerminateCleanupPad(BasicBlock *BB); public: static char ID; // Pass identification, replacement for typeid WasmEHPrepare() : FunctionPass(ID) {} - + void getAnalysisUsage(AnalysisUsage &AU) const override; bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; @@ -136,11 +141,18 @@ public: } // end anonymous namespace char WasmEHPrepare::ID = 0; -INITIALIZE_PASS(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions", - false, false) +INITIALIZE_PASS_BEGIN(WasmEHPrepare, DEBUG_TYPE, + "Prepare WebAssembly exceptions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions", + false, false) FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); } +void WasmEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); +} + bool WasmEHPrepare::doInitialization(Module &M) { IRBuilder<> IRB(M.getContext()); LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index @@ -153,18 +165,19 @@ bool WasmEHPrepare::doInitialization(Module &M) { // Erase the specified BBs if the BB does not have any remaining predecessors, // and also all its dead children. template <typename Container> -static void eraseDeadBBsAndChildren(const Container &BBs) { +static void eraseDeadBBsAndChildren(const Container &BBs, DomTreeUpdater *DTU) { SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end()); while (!WL.empty()) { auto *BB = WL.pop_back_val(); if (pred_begin(BB) != pred_end(BB)) continue; WL.append(succ_begin(BB), succ_end(BB)); - DeleteDeadBlock(BB); + DeleteDeadBlock(BB, DTU); } } bool WasmEHPrepare::runOnFunction(Function &F) { + IsEHPadFunctionsSetUp = false; bool Changed = false; Changed |= prepareThrows(F); Changed |= prepareEHPads(F); @@ -172,6 +185,9 @@ bool WasmEHPrepare::runOnFunction(Function &F) { } bool WasmEHPrepare::prepareThrows(Function &F) { + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + DomTreeUpdater DTU(&DT, /*PostDominatorTree*/ nullptr, + DomTreeUpdater::UpdateStrategy::Eager); Module &M = *F.getParent(); IRBuilder<> IRB(F.getContext()); bool Changed = false; @@ -194,30 +210,102 @@ bool WasmEHPrepare::prepareThrows(Function &F) { InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end()); IRB.SetInsertPoint(BB); IRB.CreateUnreachable(); - eraseDeadBBsAndChildren(Succs); + eraseDeadBBsAndChildren(Succs, &DTU); } return Changed; } bool WasmEHPrepare::prepareEHPads(Function &F) { - Module &M = *F.getParent(); - IRBuilder<> IRB(F.getContext()); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + bool Changed = false; - SmallVector<BasicBlock *, 16> CatchPads; - SmallVector<BasicBlock *, 16> CleanupPads; - for (BasicBlock &BB : F) { - if (!BB.isEHPad()) + // There are two things to decide: whether we need a personality function call + // and whether we need a `wasm.lsda()` call and its store. + // + // For the personality function call, catchpads with `catch (...)` and + // cleanuppads don't need it, because exceptions are always caught. Others all + // need it. + // + // For `wasm.lsda()` and its store, in order to minimize the number of them, + // we need a way to figure out whether we have encountered `wasm.lsda()` call + // in any of EH pads that dominates the current EH pad. To figure that out, we + // now visit EH pads in BFS order in the dominator tree so that we visit + // parent BBs first before visiting its child BBs in the domtree. + // + // We keep a set named `ExecutedLSDA`, which basically means "Do we have + // `wasm.lsda() either in the current EH pad or any of its parent EH pads in + // the dominator tree?". This is to prevent scanning the domtree up to the + // root every time we examine an EH pad, in the worst case: each EH pad only + // needs to check its immediate parent EH pad. + // + // - If any of its parent EH pads in the domtree has `wasm.lsda`, this means + // we don't need `wasm.lsda()` in the current EH pad. We also insert the + // current EH pad in `ExecutedLSDA` set. + // - If none of its parent EH pad has `wasm.lsda()`, + // - If the current EH pad is a `catch (...)` or a cleanuppad, done. + // - If the current EH pad is neither a `catch (...)` nor a cleanuppad, + // add `wasm.lsda()` and the store in the current EH pad, and add the + // current EH pad to `ExecutedLSDA` set. + // + // TODO Can we not store LSDA address in user function but make libcxxabi + // compute it? + DenseSet<Value *> ExecutedLSDA; + unsigned Index = 0; + for (auto DomNode : breadth_first(&DT)) { + auto *BB = DomNode->getBlock(); + auto *Pad = BB->getFirstNonPHI(); + if (!Pad || (!isa<CatchPadInst>(Pad) && !isa<CleanupPadInst>(Pad))) continue; - auto *Pad = BB.getFirstNonPHI(); - if (isa<CatchPadInst>(Pad)) - CatchPads.push_back(&BB); - else if (isa<CleanupPadInst>(Pad)) - CleanupPads.push_back(&BB); + Changed = true; + + Value *ParentPad = nullptr; + if (CatchPadInst *CPI = dyn_cast<CatchPadInst>(Pad)) { + ParentPad = CPI->getCatchSwitch()->getParentPad(); + if (ExecutedLSDA.count(ParentPad)) { + ExecutedLSDA.insert(CPI); + // We insert its associated catchswitch too, because + // FuncletPadInst::getParentPad() returns a CatchSwitchInst if the child + // FuncletPadInst is a CleanupPadInst. + ExecutedLSDA.insert(CPI->getCatchSwitch()); + } + } else { // CleanupPadInst + ParentPad = cast<CleanupPadInst>(Pad)->getParentPad(); + if (ExecutedLSDA.count(ParentPad)) + ExecutedLSDA.insert(Pad); + } + + if (CatchPadInst *CPI = dyn_cast<CatchPadInst>(Pad)) { + if (CPI->getNumArgOperands() == 1 && + cast<Constant>(CPI->getArgOperand(0))->isNullValue()) + // In case of a single catch (...), we need neither personality call nor + // wasm.lsda() call + prepareEHPad(BB, false); + else { + if (ExecutedLSDA.count(CPI)) + // catch (type), but one of parents already has wasm.lsda() call + prepareEHPad(BB, true, false, Index++); + else { + // catch (type), and none of parents has wasm.lsda() call. We have to + // add the call in this EH pad, and record this EH pad in + // ExecutedLSDA. + ExecutedLSDA.insert(CPI); + ExecutedLSDA.insert(CPI->getCatchSwitch()); + prepareEHPad(BB, true, true, Index++); + } + } + } else if (isa<CleanupPadInst>(Pad)) { + // Cleanup pads need neither personality call nor wasm.lsda() call + prepareEHPad(BB, false); + } } - if (CatchPads.empty() && CleanupPads.empty()) - return false; + return Changed; +} + +void WasmEHPrepare::setupEHPadFunctions(Function &F) { + Module &M = *F.getParent(); + IRBuilder<> IRB(F.getContext()); assert(F.hasPersonalityFn() && "Personality function not found"); // __wasm_lpad_context global variable @@ -252,29 +340,16 @@ bool WasmEHPrepare::prepareEHPads(Function &F) { "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy()); if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee())) F->setDoesNotThrow(); - - unsigned Index = 0; - for (auto *BB : CatchPads) { - auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI()); - // In case of a single catch (...), we don't need to emit LSDA - if (CPI->getNumArgOperands() == 1 && - cast<Constant>(CPI->getArgOperand(0))->isNullValue()) - prepareEHPad(BB, false); - else - prepareEHPad(BB, true, Index++); - } - - // Cleanup pads don't need LSDA. - for (auto *BB : CleanupPads) - prepareEHPad(BB, false); - - return true; } -// Prepare an EH pad for Wasm EH handling. If NeedLSDA is false, Index is +// Prepare an EH pad for Wasm EH handling. If NeedPersonality is false, Index is // ignored. -void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA, - unsigned Index) { +void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, + bool NeedLSDA, unsigned Index) { + if (!IsEHPadFunctionsSetUp) { + IsEHPadFunctionsSetUp = true; + setupEHPadFunctions(*BB->getParent()); + } assert(BB->isEHPad() && "BB is not an EHPad!"); IRBuilder<> IRB(BB->getContext()); IRB.SetInsertPoint(&*BB->getFirstInsertionPt()); @@ -283,9 +358,9 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA, Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr; for (auto &U : FPI->uses()) { if (auto *CI = dyn_cast<CallInst>(U.getUser())) { - if (CI->getCalledValue() == GetExnF) + if (CI->getCalledOperand() == GetExnF) GetExnCI = CI; - if (CI->getCalledValue() == GetSelectorF) + if (CI->getCalledOperand() == GetSelectorF) GetSelectorCI = CI; } } @@ -304,7 +379,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA, // In case it is a catchpad with single catch (...) or a cleanuppad, we don't // need to call personality function because we don't need a selector. - if (!NeedLSDA) { + if (!NeedPersonality) { if (GetSelectorCI) { assert(GetSelectorCI->use_empty() && "wasm.get.ehselector() still has uses!"); @@ -322,14 +397,8 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA, // Pseudocode: __wasm_lpad_context.lpad_index = index; IRB.CreateStore(IRB.getInt32(Index), LPadIndexField); - // Store LSDA address only if this catchpad belongs to a top-level - // catchswitch. If there is another catchpad that dominates this pad, we don't - // need to store LSDA address again, because they are the same throughout the - // function and have been already stored before. - // TODO Can we not store LSDA address in user function but make libcxxabi - // compute it? auto *CPI = cast<CatchPadInst>(FPI); - if (isa<ConstantTokenNone>(CPI->getCatchSwitch()->getParentPad())) + if (NeedLSDA) // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda(); IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp index 87958a738c67..5a25234ba850 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -234,6 +235,9 @@ static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB, return CleanupPad->getParent(); } +// Starting from a EHPad, Backward walk through control-flow graph +// to produce two primary outputs: +// FuncInfo.EHPadStateMap[] and FuncInfo.CxxUnwindMap[] static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, const Instruction *FirstNonPHI, int ParentState) { @@ -260,6 +264,16 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, // catchpads are separate funclets in C++ EH due to the way rethrow works. int TryHigh = CatchLow - 1; + + // MSVC FrameHandler3/4 on x64&Arm64 expect Catch Handlers in $tryMap$ + // stored in pre-order (outer first, inner next), not post-order + // Add to map here. Fix the CatchHigh after children are processed + const Module *Mod = BB->getParent()->getParent(); + bool IsPreOrder = Triple(Mod->getTargetTriple()).isArch64Bit(); + if (IsPreOrder) + addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchLow, Handlers); + unsigned TBMEIdx = FuncInfo.TryBlockMap.size() - 1; + for (const auto *CatchPad : Handlers) { FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow; for (const User *U : CatchPad->users()) { @@ -280,7 +294,12 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, } } int CatchHigh = FuncInfo.getLastStateNumber(); - addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers); + // Now child Catches are processed, update CatchHigh + if (IsPreOrder) + FuncInfo.TryBlockMap[TBMEIdx].CatchHigh = CatchHigh; + else // PostOrder + addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers); + LLVM_DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n'); LLVM_DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n'); @@ -336,6 +355,9 @@ static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState, return FuncInfo.SEHUnwindMap.size() - 1; } +// Starting from a EHPad, Backward walk through control-flow graph +// to produce two primary outputs: +// FuncInfo.EHPadStateMap[] and FuncInfo.SEHUnwindMap[] static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo, const Instruction *FirstNonPHI, int ParentState) { @@ -942,12 +964,12 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) { for (BasicBlock *BB : BlocksInFunclet) { for (Instruction &I : *BB) { - CallSite CS(&I); - if (!CS) + auto *CB = dyn_cast<CallBase>(&I); + if (!CB) continue; Value *FuncletBundleOperand = nullptr; - if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet)) + if (auto BU = CB->getOperandBundle(LLVMContext::OB_funclet)) FuncletBundleOperand = BU->Inputs.front(); if (FuncletBundleOperand == FuncletPad) @@ -955,13 +977,13 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) { // Skip call sites which are nounwind intrinsics or inline asm. auto *CalledFn = - dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); - if (CalledFn && ((CalledFn->isIntrinsic() && CS.doesNotThrow()) || - CS.isInlineAsm())) + dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); + if (CalledFn && ((CalledFn->isIntrinsic() && CB->doesNotThrow()) || + CB->isInlineAsm())) continue; // This call site was not part of this funclet, remove it. - if (CS.isInvoke()) { + if (isa<InvokeInst>(CB)) { // Remove the unwind edge if it was an invoke. removeUnwindEdge(BB); // Get a pointer to the new call. @@ -1050,10 +1072,10 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) { DemoteCatchSwitchPHIOnlyOpt); if (!DisableCleanups) { - LLVM_DEBUG(verifyFunction(F)); + assert(!verifyFunction(F, &dbgs())); removeImplausibleInstructions(F); - LLVM_DEBUG(verifyFunction(F)); + assert(!verifyFunction(F, &dbgs())); cleanupPreparedFunclets(F); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp index 4847a0c3e842..ab9c0e81ebdc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -111,7 +111,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet( for (auto &MO : T.operands()) MIB.add(MO); Terminators.push_back(&T); - if (T.isCall()) + if (T.shouldUpdateCallSiteInfo()) MF.eraseCallSiteInfo(&T); } } @@ -148,40 +148,51 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) && InstrAttr.isStringAttribute() && InstrAttr.getValueAsString() == "xray-always"; - Attribute Attr = F.getFnAttribute("xray-instruction-threshold"); - unsigned XRayThreshold = 0; + auto ThresholdAttr = F.getFnAttribute("xray-instruction-threshold"); + auto IgnoreLoopsAttr = F.getFnAttribute("xray-ignore-loops"); + unsigned int XRayThreshold = 0; if (!AlwaysInstrument) { - if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute()) + if (ThresholdAttr.hasAttribute(Attribute::None) || + !ThresholdAttr.isStringAttribute()) return false; // XRay threshold attribute not found. - if (Attr.getValueAsString().getAsInteger(10, XRayThreshold)) + if (ThresholdAttr.getValueAsString().getAsInteger(10, XRayThreshold)) return false; // Invalid value for threshold. + bool IgnoreLoops = !IgnoreLoopsAttr.hasAttribute(Attribute::None); + // Count the number of MachineInstr`s in MachineFunction int64_t MICount = 0; for (const auto &MBB : MF) MICount += MBB.size(); - // Get MachineDominatorTree or compute it on the fly if it's unavailable - auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); - MachineDominatorTree ComputedMDT; - if (!MDT) { - ComputedMDT.getBase().recalculate(MF); - MDT = &ComputedMDT; - } + bool TooFewInstrs = MICount < XRayThreshold; - // Get MachineLoopInfo or compute it on the fly if it's unavailable - auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); - MachineLoopInfo ComputedMLI; - if (!MLI) { - ComputedMLI.getBase().analyze(MDT->getBase()); - MLI = &ComputedMLI; - } + if (!IgnoreLoops) { + // Get MachineDominatorTree or compute it on the fly if it's unavailable + auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + MachineDominatorTree ComputedMDT; + if (!MDT) { + ComputedMDT.getBase().recalculate(MF); + MDT = &ComputedMDT; + } - // Check if we have a loop. - // FIXME: Maybe make this smarter, and see whether the loops are dependent - // on inputs or side-effects? - if (MLI->empty() && MICount < XRayThreshold) - return false; // Function is too small and has no loops. + // Get MachineLoopInfo or compute it on the fly if it's unavailable + auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + MachineLoopInfo ComputedMLI; + if (!MLI) { + ComputedMLI.getBase().analyze(MDT->getBase()); + MLI = &ComputedMLI; + } + + // Check if we have a loop. + // FIXME: Maybe make this smarter, and see whether the loops are dependent + // on inputs or side-effects? + if (MLI->empty() && TooFewInstrs) + return false; // Function is too small and has no loops. + } else if (TooFewInstrs) { + // Function is too small + return false; + } } // We look for the first non-empty MachineBasicBlock, so that we can insert @@ -201,43 +212,47 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { return false; } - // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the - // MachineFunction. - BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), - TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); - - switch (MF.getTarget().getTargetTriple().getArch()) { - case Triple::ArchType::arm: - case Triple::ArchType::thumb: - case Triple::ArchType::aarch64: - case Triple::ArchType::mips: - case Triple::ArchType::mipsel: - case Triple::ArchType::mips64: - case Triple::ArchType::mips64el: { - // For the architectures which don't have a single return instruction - InstrumentationOptions op; - op.HandleTailcall = false; - op.HandleAllReturns = true; - prependRetWithPatchableExit(MF, TII, op); - break; - } - case Triple::ArchType::ppc64le: { - // PPC has conditional returns. Turn them into branch and plain returns. - InstrumentationOptions op; - op.HandleTailcall = false; - op.HandleAllReturns = true; - replaceRetWithPatchableRet(MF, TII, op); - break; - } - default: { - // For the architectures that have a single return instruction (such as - // RETQ on x86_64). - InstrumentationOptions op; - op.HandleTailcall = true; - op.HandleAllReturns = false; - replaceRetWithPatchableRet(MF, TII, op); - break; + if (!F.hasFnAttribute("xray-skip-entry")) { + // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the + // MachineFunction. + BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); } + + if (!F.hasFnAttribute("xray-skip-exit")) { + switch (MF.getTarget().getTargetTriple().getArch()) { + case Triple::ArchType::arm: + case Triple::ArchType::thumb: + case Triple::ArchType::aarch64: + case Triple::ArchType::mips: + case Triple::ArchType::mipsel: + case Triple::ArchType::mips64: + case Triple::ArchType::mips64el: { + // For the architectures which don't have a single return instruction + InstrumentationOptions op; + op.HandleTailcall = false; + op.HandleAllReturns = true; + prependRetWithPatchableExit(MF, TII, op); + break; + } + case Triple::ArchType::ppc64le: { + // PPC has conditional returns. Turn them into branch and plain returns. + InstrumentationOptions op; + op.HandleTailcall = false; + op.HandleAllReturns = true; + replaceRetWithPatchableRet(MF, TII, op); + break; + } + default: { + // For the architectures that have a single return instruction (such as + // RETQ on x86_64). + InstrumentationOptions op; + op.HandleTailcall = true; + op.HandleAllReturns = false; + replaceRetWithPatchableRet(MF, TII, op); + break; + } + } } return true; } |