diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
commit | 1d5ae1026e831016fc29fd927877c86af904481f (patch) | |
tree | 2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/CodeGen | |
parent | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff) | |
download | src-1d5ae1026e831016fc29fd927877c86af904481f.tar.gz src-1d5ae1026e831016fc29fd927877c86af904481f.zip |
Vendor import of stripped llvm trunk r375505, the last commit before thevendor/llvm/llvm-trunk-r375505vendor/llvm
upstream Subversion repository was made read-only, and the LLVM project
migrated to GitHub:
https://llvm.org/svn/llvm-project/llvm/trunk@375505
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=353940
svn path=/vendor/llvm/llvm-r375505/; revision=353941; tag=vendor/llvm/llvm-trunk-r375505
Diffstat (limited to 'lib/CodeGen')
186 files changed, 13421 insertions, 6660 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 444f618d8b8c..f64b775a8b77 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -232,7 +232,7 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI, if (!MO.isReg() || !MO.isImplicit()) return false; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) return false; @@ -252,7 +252,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs( if (!MO.isReg()) continue; if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) || IsImplicitDefUse(MI, MO)) { - const unsigned Reg = MO.getReg(); + const Register Reg = MO.getReg(); for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) PassthruRegs.insert(*SubRegs); @@ -365,7 +365,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); @@ -375,7 +375,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" @@ -418,7 +418,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // Ignore KILLs and passthru registers for liveness... if (MI.isKill() || (PassthruRegs.count(Reg) != 0)) @@ -471,7 +471,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" @@ -506,7 +506,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (FirstReg != 0) { @@ -790,7 +790,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( CriticalPathSU = SU; } } - + assert(CriticalPathSU && "Failed to find SUnit critical path"); CriticalPathMI = CriticalPathSU->getInstr(); } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index d158e70b86ac..4f24f077d120 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -309,7 +309,8 @@ static const Value *getNoopInput(const Value *V, NoopInput = Op; } else if (isa<TruncInst>(I) && TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { - DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + DataBits = std::min((uint64_t)DataBits, + I->getType()->getPrimitiveSizeInBits().getFixedSize()); NoopInput = Op; } else if (auto CS = ImmutableCallSite(I)) { const Value *ReturnedOp = CS.getReturnedArgOperand(); @@ -523,7 +524,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) + ((!TM.Options.GuaranteedTailCallOpt && + CS.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term))) return false; // If I will have a chain, make sure no other instruction that will have a @@ -536,9 +538,11 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { // Debug info intrinsics do not get in the way of tail call optimization. if (isa<DbgInfoIntrinsic>(BBI)) continue; - // A lifetime end intrinsic should not stop tail call optimization. + // A lifetime end or assume intrinsic should not stop tail call + // optimization. if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI)) - if (II->getIntrinsicID() == Intrinsic::lifetime_end) + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::assume) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || !isSafeToSpeculativelyExecute(&*BBI)) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 54f6cc2d5571..73c53d6c4af5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -91,10 +91,12 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" @@ -159,30 +161,30 @@ static gcp_map_type &getGCMap(void *&P) { return *(gcp_map_type*)P; } -/// getGVAlignmentLog2 - Return the alignment to use for the specified global -/// value in log2 form. This rounds up to the preferred alignment if possible -/// and legal. -static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL, - unsigned InBits = 0) { - unsigned NumBits = 0; +/// getGVAlignment - Return the alignment to use for the specified global +/// value. This rounds up to the preferred alignment if possible and legal. +Align AsmPrinter::getGVAlignment(const GlobalValue *GV, const DataLayout &DL, + Align InAlign) { + Align Alignment; if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - NumBits = DL.getPreferredAlignmentLog(GVar); + Alignment = Align(DL.getPreferredAlignment(GVar)); - // If InBits is specified, round it to it. - if (InBits > NumBits) - NumBits = InBits; + // If InAlign is specified, round it to it. + if (InAlign > Alignment) + Alignment = InAlign; // If the GV has a specified alignment, take it into account. - if (GV->getAlignment() == 0) - return NumBits; + const MaybeAlign GVAlign(GV->getAlignment()); + if (!GVAlign) + return Alignment; - unsigned GVAlign = Log2_32(GV->getAlignment()); + assert(GVAlign && "GVAlign must be set"); // If the GVAlign is larger than NumBits, or if we are required to obey // NumBits because the GV has an assigned section, obey it. - if (GVAlign > NumBits || GV->hasSection()) - NumBits = GVAlign; - return NumBits; + if (*GVAlign > Alignment || GV->hasSection()) + Alignment = *GVAlign; + return Alignment; } AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) @@ -248,13 +250,14 @@ const MCSection *AsmPrinter::getCurrentSection() const { void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<MachineModuleInfo>(); + AU.addRequired<MachineModuleInfoWrapperPass>(); AU.addRequired<MachineOptimizationRemarkEmitterPass>(); AU.addRequired<GCModuleInfo>(); } bool AsmPrinter::doInitialization(Module &M) { - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); + MMI = MMIWP ? &MMIWP->getMMI() : nullptr; // Initialize TargetLoweringObjectFile. const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) @@ -311,7 +314,7 @@ bool AsmPrinter::doInitialization(Module &M) { if (MAI->doesSupportDebugInformation()) { bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); if (EmitCodeView && TM.getTargetTriple().isOSWindows()) { - Handlers.emplace_back(llvm::make_unique<CodeViewDebug>(this), + Handlers.emplace_back(std::make_unique<CodeViewDebug>(this), DbgTimerName, DbgTimerDescription, CodeViewLineTablesGroupName, CodeViewLineTablesGroupDescription); @@ -380,7 +383,7 @@ bool AsmPrinter::doInitialization(Module &M) { if (mdconst::extract_or_null<ConstantInt>( MMI->getModule()->getModuleFlag("cfguardtable"))) - Handlers.emplace_back(llvm::make_unique<WinCFGuard>(this), CFGuardName, + Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName, CFGuardDescription, DWARFGroupName, DWARFGroupDescription); @@ -425,7 +428,10 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); return; case GlobalValue::PrivateLinkage: + return; case GlobalValue::InternalLinkage: + if (MAI->hasDotLGloblDirective()) + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_LGlobal); return; case GlobalValue::AppendingLinkage: case GlobalValue::AvailableExternallyLinkage: @@ -501,7 +507,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, DL); + const Align Alignment = getGVAlignment(GV, DL); for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, @@ -513,12 +519,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle common symbols if (GVKind.isCommon()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - unsigned Align = 1 << AlignLog; - if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) - Align = 0; - // .comm _foo, 42, 4 - OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + const bool SupportsAlignment = + getObjFileLowering().getCommDirectiveSupportsAlignment(); + OutStreamer->EmitCommonSymbol(GVSym, Size, + SupportsAlignment ? Alignment.value() : 0); return; } @@ -531,10 +536,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { TheSection->isVirtualSection()) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. - unsigned Align = 1 << AlignLog; EmitLinkage(GV, GVSym); // .zerofill __DATA, __bss, _foo, 400, 5 - OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align); + OutStreamer->EmitZerofill(TheSection, GVSym, Size, Alignment.value()); return; } @@ -544,7 +548,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { getObjFileLowering().getBSSSection() == TheSection) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - unsigned Align = 1 << AlignLog; // Use .lcomm only if it supports user-specified alignment. // Otherwise, while it would still be correct to use .lcomm in some @@ -554,17 +557,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Prefer to simply fall back to .local / .comm in this case. if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 - OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align); + OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Alignment.value()); return; } - if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) - Align = 0; - // .local _foo OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 - OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + const bool SupportsAlignment = + getObjFileLowering().getCommDirectiveSupportsAlignment(); + OutStreamer->EmitCommonSymbol(GVSym, Size, + SupportsAlignment ? Alignment.value() : 0); return; } @@ -585,11 +588,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isThreadBSS()) { TheSection = getObjFileLowering().getTLSBSSSection(); - OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); + OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, Alignment.value()); } else if (GVKind.isThreadData()) { OutStreamer->SwitchSection(TheSection); - EmitAlignment(AlignLog, GV); + EmitAlignment(Alignment, GV); OutStreamer->EmitLabel(MangSym); EmitGlobalConstant(GV->getParent()->getDataLayout(), @@ -625,7 +628,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer->SwitchSection(TheSection); EmitLinkage(GV, EmittedInitSym); - EmitAlignment(AlignLog, GV); + EmitAlignment(Alignment, GV); OutStreamer->EmitLabel(EmittedInitSym); @@ -664,6 +667,10 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM)); EmitVisibility(CurrentFnSym, F.getVisibility()); + if (MAI->needsFunctionDescriptors() && + F.getLinkage() != GlobalValue::InternalLinkage) + EmitLinkage(&F, CurrentFnDescSym); + EmitLinkage(&F, CurrentFnSym); if (MAI->hasFunctionAlignment()) EmitAlignment(MF->getAlignment(), &F); @@ -699,8 +706,13 @@ void AsmPrinter::EmitFunctionHeader() { } } - // Emit the CurrentFnSym. This is a virtual function to allow targets to - // do their wild and crazy things as required. + // Emit the function descriptor. This is a virtual function to allow targets + // to emit their specific function descriptor. + if (MAI->needsFunctionDescriptors()) + EmitFunctionDescriptor(); + + // Emit the CurrentFnSym. This is a virtual function to allow targets to do + // their wild and crazy things as required. EmitFunctionEntryLabel(); // If the function had address-taken blocks that got deleted, then we have @@ -783,7 +795,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { - unsigned RegNo = MI->getOperand(0).getReg(); + Register RegNo = MI->getOperand(0).getReg(); SmallString<128> Str; raw_svector_ostream OS(Str); @@ -910,7 +922,8 @@ static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) { OS << "DEBUG_LABEL: "; const DILabel *V = MI->getDebugLabel(); - if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) { + if (auto *SP = dyn_cast<DISubprogram>( + V->getScope()->getNonLexicalBlockFileScope())) { StringRef Name = SP->getName(); if (!Name.empty()) OS << Name << ":"; @@ -1024,7 +1037,7 @@ void AsmPrinter::EmitFunctionBody() { // Get MachineDominatorTree or compute it on the fly if it's unavailable MDT = getAnalysisIfAvailable<MachineDominatorTree>(); if (!MDT) { - OwnedMDT = make_unique<MachineDominatorTree>(); + OwnedMDT = std::make_unique<MachineDominatorTree>(); OwnedMDT->getBase().recalculate(*MF); MDT = OwnedMDT.get(); } @@ -1032,7 +1045,7 @@ void AsmPrinter::EmitFunctionBody() { // Get MachineLoopInfo or compute it on the fly if it's unavailable MLI = getAnalysisIfAvailable<MachineLoopInfo>(); if (!MLI) { - OwnedMLI = make_unique<MachineLoopInfo>(); + OwnedMLI = std::make_unique<MachineLoopInfo>(); OwnedMLI->getBase().analyze(MDT->getBase()); MLI = OwnedMLI.get(); } @@ -1052,9 +1065,13 @@ void AsmPrinter::EmitFunctionBody() { ++NumInstsInFunction; } - // If there is a pre-instruction symbol, emit a label for it here. + // If there is a pre-instruction symbol, emit a label for it here. If the + // instruction was duplicated and the label has already been emitted, + // don't re-emit the same label. + // FIXME: Consider strengthening that to an assertion. if (MCSymbol *S = MI.getPreInstrSymbol()) - OutStreamer->EmitLabel(S); + if (S->isUndefined()) + OutStreamer->EmitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1107,9 +1124,13 @@ void AsmPrinter::EmitFunctionBody() { break; } - // If there is a post-instruction symbol, emit a label for it here. + // If there is a post-instruction symbol, emit a label for it here. If + // the instruction was duplicated and the label has already been emitted, + // don't re-emit the same label. + // FIXME: Consider strengthening that to an assertion. if (MCSymbol *S = MI.getPostInstrSymbol()) - OutStreamer->EmitLabel(S); + if (S->isUndefined()) + OutStreamer->EmitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1313,11 +1334,10 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, // Set the symbol type to function if the alias has a function type. // This affects codegen when the aliasee is not a function. - if (IsFunction) { - OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); - if (isa<GlobalIFunc>(GIS)) - OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction); - } + if (IsFunction) + OutStreamer->EmitSymbolAttribute(Name, isa<GlobalIFunc>(GIS) + ? MCSA_ELF_TypeIndFunction + : MCSA_ELF_TypeFunction); EmitVisibility(Name, GIS.getVisibility()); @@ -1349,60 +1369,28 @@ void AsmPrinter::emitRemarksSection(Module &M) { RemarkStreamer *RS = M.getContext().getRemarkStreamer(); if (!RS) return; - const remarks::Serializer &Serializer = RS->getSerializer(); + remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer(); + + Optional<SmallString<128>> Filename; + if (Optional<StringRef> FilenameRef = RS->getFilename()) { + Filename = *FilenameRef; + sys::fs::make_absolute(*Filename); + assert(!Filename->empty() && "The filename can't be empty."); + } + + std::string Buf; + raw_string_ostream OS(Buf); + std::unique_ptr<remarks::MetaSerializer> MetaSerializer = + Filename ? RemarkSerializer.metaSerializer(OS, StringRef(*Filename)) + : RemarkSerializer.metaSerializer(OS); + MetaSerializer->emit(); // Switch to the right section: .remarks/__remarks. MCSection *RemarksSection = OutContext.getObjectFileInfo()->getRemarksSection(); OutStreamer->SwitchSection(RemarksSection); - // Emit the magic number. - OutStreamer->EmitBytes(remarks::Magic); - // Explicitly emit a '\0'. - OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); - - // Emit the version number: little-endian uint64_t. - // The version number is located at the offset 0x0 in the section. - std::array<char, 8> Version; - support::endian::write64le(Version.data(), remarks::Version); - OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size())); - - // Emit the string table in the section. - // Note: we need to use the streamer here to emit it in the section. We can't - // just use the serialize function with a raw_ostream because of the way - // MCStreamers work. - uint64_t StrTabSize = - Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0; - // Emit the total size of the string table (the size itself excluded): - // little-endian uint64_t. - // The total size is located after the version number. - // Note: even if no string table is used, emit 0. - std::array<char, 8> StrTabSizeBuf; - support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); - OutStreamer->EmitBinaryData( - StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size())); - - if (const Optional<remarks::StringTable> &StrTab = Serializer.StrTab) { - std::vector<StringRef> StrTabStrings = StrTab->serialize(); - // Emit a list of null-terminated strings. - // Note: the order is important here: the ID used in the remarks corresponds - // to the position of the string in the section. - for (StringRef Str : StrTabStrings) { - OutStreamer->EmitBytes(Str); - // Explicitly emit a '\0'. - OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); - } - } - - // Emit the null-terminated absolute path to the remark file. - // The path is located at the offset 0x4 in the section. - StringRef FilenameRef = RS->getFilename(); - SmallString<128> Filename = FilenameRef; - sys::fs::make_absolute(Filename); - assert(!Filename.empty() && "The filename can't be empty."); - OutStreamer->EmitBytes(Filename); - // Explicitly emit a '\0'. - OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); + OutStreamer->EmitBinaryData(OS.str()); } bool AsmPrinter::doFinalization(Module &M) { @@ -1455,7 +1443,7 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer->SwitchSection(TLOF.getDataSection()); const DataLayout &DL = M.getDataLayout(); - EmitAlignment(Log2_32(DL.getPointerSize())); + EmitAlignment(Align(DL.getPointerSize())); for (const auto &Stub : Stubs) { OutStreamer->EmitLabel(Stub.first); OutStreamer->EmitSymbolValue(Stub.second.getPointer(), @@ -1482,7 +1470,7 @@ bool AsmPrinter::doFinalization(Module &M) { COFF::IMAGE_SCN_LNK_COMDAT, SectionKind::getReadOnly(), Stub.first->getName(), COFF::IMAGE_COMDAT_SELECT_ANY)); - EmitAlignment(Log2_32(DL.getPointerSize())); + EmitAlignment(Align(DL.getPointerSize())); OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global); OutStreamer->EmitLabel(Stub.first); OutStreamer->EmitSymbolValue(Stub.second.getPointer(), @@ -1607,8 +1595,7 @@ bool AsmPrinter::doFinalization(Module &M) { "expected llvm.used to be an array type"); if (const auto *A = cast<ConstantArray>(LU->getInitializer())) { for (const Value *Op : A->operands()) { - const auto *GV = - cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases()); + const auto *GV = cast<GlobalValue>(Op->stripPointerCasts()); // Global symbols with internal or private linkage are not visible to // the linker, and thus would cause an error when the linker tried to // preserve the symbol due to the `/include:` directive. @@ -1679,8 +1666,27 @@ MCSymbol *AsmPrinter::getCurExceptionSym() { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; + // Get the function symbol. - CurrentFnSym = getSymbol(&MF.getFunction()); + if (MAI->needsFunctionDescriptors()) { + assert(TM.getTargetTriple().isOSAIX() && "Function descriptor is only" + " supported on AIX."); + assert(CurrentFnDescSym && "The function descriptor symbol needs to be" + " initalized first."); + + // Get the function entry point symbol. + CurrentFnSym = + OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName()); + + const Function &F = MF.getFunction(); + MCSectionXCOFF *FnEntryPointSec = + cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM)); + // Set the containing csect. + cast<MCSymbolXCOFF>(CurrentFnSym)->setContainingCsect(FnEntryPointSec); + } else { + CurrentFnSym = getSymbol(&MF.getFunction()); + } + CurrentFnSymForSize = CurrentFnSym; CurrentFnBegin = nullptr; CurExceptionSym = nullptr; @@ -1765,7 +1771,7 @@ void AsmPrinter::EmitConstantPool() { if (CurSection != CPSections[i].S) { OutStreamer->SwitchSection(CPSections[i].S); - EmitAlignment(Log2_32(CPSections[i].Alignment)); + EmitAlignment(Align(CPSections[i].Alignment)); CurSection = CPSections[i].S; Offset = 0; } @@ -1812,7 +1818,7 @@ void AsmPrinter::EmitJumpTableInfo() { OutStreamer->SwitchSection(ReadOnlySection); } - EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL))); + EmitAlignment(Align(MJTI->getEntryAlignment(DL))); // Jump tables in code sections are marked with a data_region directive // where that's supported. @@ -2025,10 +2031,10 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, } // Emit the function pointers in the target-specific order - unsigned Align = Log2_32(DL.getPointerPrefAlignment()); llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) { return L.Priority < R.Priority; }); + const Align Align = DL.getPointerPrefAlignment(); for (Structor &S : Structors) { const TargetLoweringObjectFile &Obj = getObjFileLowering(); const MCSymbol *KeySym = nullptr; @@ -2149,23 +2155,20 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, //===----------------------------------------------------------------------===// // EmitAlignment - Emit an alignment directive to the specified power of -// two boundary. For example, if you pass in 3 here, you will get an 8 -// byte alignment. If a global value is specified, and if that global has +// two boundary. If a global value is specified, and if that global has // an explicit alignment requested, it will override the alignment request // if required for correctness. -void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { +void AsmPrinter::EmitAlignment(Align Alignment, const GlobalObject *GV) const { if (GV) - NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits); + Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment); - if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. + if (Alignment == Align::None()) + return; // 1-byte aligned: no need to emit alignment. - assert(NumBits < - static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && - "undefined behavior"); if (getCurrentSection()->getKind().isText()) - OutStreamer->EmitCodeAlignment(1u << NumBits); + OutStreamer->EmitCodeAlignment(Alignment.value()); else - OutStreamer->EmitValueToAlignment(1u << NumBits); + OutStreamer->EmitValueToAlignment(Alignment.value()); } //===----------------------------------------------------------------------===// @@ -2481,6 +2484,7 @@ static void emitGlobalConstantStruct(const DataLayout &DL, } static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { + assert(ET && "Unknown float type"); APInt API = APF.bitcastToAPInt(); // First print a comment with what we think the original floating-point value @@ -2488,11 +2492,7 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { if (AP.isVerbose()) { SmallString<8> StrVal; APF.toString(StrVal); - - if (ET) - ET->print(AP.OutStreamer->GetCommentOS()); - else - AP.OutStreamer->GetCommentOS() << "Printing <null> Type"; + ET->print(AP.OutStreamer->GetCommentOS()); AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; } @@ -2670,7 +2670,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0)); const MCSymbol *FinalSym = AP.getSymbol(FinalGV); *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel( - FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer); + FinalGV, FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer); // Update GOT equivalent usage information --NumUses; @@ -2930,7 +2930,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. -void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { // End the previous funclet and start a new one. if (MBB.isEHFuncletEntry()) { for (const HandlerInfo &HI : Handlers) { @@ -2940,8 +2940,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { } // Emit an alignment directive for this block, if needed. - if (unsigned Align = MBB.getAlignment()) - EmitAlignment(Align); + const Align Alignment = MBB.getAlignment(); + if (Alignment != Align::None()) + EmitAlignment(Alignment); MCCodePaddingContext Context; setupCodePaddingContext(MBB, Context); OutStreamer->EmitCodePaddingBasicBlockStart(Context); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 7721e996aca5..420df26a2b8b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -72,7 +72,7 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, const MDNode *LocMDNode) const { if (!DiagInfo) { - DiagInfo = make_unique<SrcMgrDiagInfo>(); + DiagInfo = std::make_unique<SrcMgrDiagInfo>(); MCContext &Context = MMI->getContext(); Context.setInlineSourceManager(&DiagInfo->SrcMgr); @@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); Sym->print(OS, AP->MAI); + MMI->getContext().registerInlineAsmLabel(Sym); } else if (MI->getOperand(OpNo).isMBB()) { const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); Sym->print(OS, AP->MAI); diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index db2ff458eb2e..09f7496cd4ef 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -73,18 +73,18 @@ class HashingByteStreamer final : public ByteStreamer { class BufferByteStreamer final : public ByteStreamer { private: SmallVectorImpl<char> &Buffer; - SmallVectorImpl<std::string> &Comments; + std::vector<std::string> &Comments; +public: /// Only verbose textual output needs comments. This will be set to /// true for that case, and false otherwise. If false, comments passed in to /// the emit methods will be ignored. - bool GenerateComments; + const bool GenerateComments; -public: BufferByteStreamer(SmallVectorImpl<char> &Buffer, - SmallVectorImpl<std::string> &Comments, - bool GenerateComments) - : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {} + std::vector<std::string> &Comments, bool GenerateComments) + : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) { + } void EmitInt8(uint8_t Byte, const Twine &Comment) override { Buffer.push_back(Byte); if (GenerateComments) diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 932959c311fa..c6457f3626d1 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -98,7 +98,8 @@ using namespace llvm::codeview; namespace { class CVMCAdapter : public CodeViewRecordStreamer { public: - CVMCAdapter(MCStreamer &OS) : OS(&OS) {} + CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable) + : OS(&OS), TypeTable(TypeTable) {} void EmitBytes(StringRef Data) { OS->EmitBytes(Data); } @@ -110,8 +111,24 @@ public: void AddComment(const Twine &T) { OS->AddComment(T); } + void AddRawComment(const Twine &T) { OS->emitRawComment(T); } + + bool isVerboseAsm() { return OS->isVerboseAsm(); } + + std::string getTypeName(TypeIndex TI) { + std::string TypeName; + if (!TI.isNoneType()) { + if (TI.isSimple()) + TypeName = TypeIndex::simpleTypeName(TI); + else + TypeName = TypeTable.getTypeName(TI); + } + return TypeName; + } + private: MCStreamer *OS = nullptr; + TypeCollection &TypeTable; }; } // namespace @@ -617,13 +634,6 @@ emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, OS.EmitBytes(NullTerminatedString); } -static StringRef getTypeLeafName(TypeLeafKind TypeKind) { - for (const EnumEntry<TypeLeafKind> &EE : getTypeLeafNames()) - if (EE.Value == TypeKind) - return EE.Name; - return ""; -} - void CodeViewDebug::emitTypeInformation() { if (TypeTable.empty()) return; @@ -632,30 +642,11 @@ void CodeViewDebug::emitTypeInformation() { OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); emitCodeViewMagicVersion(); - SmallString<8> CommentPrefix; - if (OS.isVerboseAsm()) { - CommentPrefix += '\t'; - CommentPrefix += Asm->MAI->getCommentString(); - CommentPrefix += ' '; - } - TypeTableCollection Table(TypeTable.records()); - SmallString<512> CommentBlock; - raw_svector_ostream CommentOS(CommentBlock); - std::unique_ptr<ScopedPrinter> SP; - std::unique_ptr<TypeDumpVisitor> TDV; TypeVisitorCallbackPipeline Pipeline; - if (OS.isVerboseAsm()) { - // To construct block comment describing the type record for readability. - SP = llvm::make_unique<ScopedPrinter>(CommentOS); - SP->setPrefix(CommentPrefix); - TDV = llvm::make_unique<TypeDumpVisitor>(Table, SP.get(), false); - Pipeline.addCallbackToPipeline(*TDV); - } - // To emit type record using Codeview MCStreamer adapter - CVMCAdapter CVMCOS(OS); + CVMCAdapter CVMCOS(OS, Table); TypeRecordMapping typeMapping(CVMCOS); Pipeline.addCallbackToPipeline(typeMapping); @@ -664,17 +655,6 @@ void CodeViewDebug::emitTypeInformation() { // This will fail if the record data is invalid. CVType Record = Table.getType(*B); - CommentBlock.clear(); - - auto RecordLen = Record.length(); - auto RecordKind = Record.kind(); - if (OS.isVerboseAsm()) - CVMCOS.AddComment("Record length"); - CVMCOS.EmitIntValue(RecordLen - 2, 2); - if (OS.isVerboseAsm()) - CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind)); - CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind)); - Error E = codeview::visitTypeRecord(Record, *B, Pipeline); if (E) { @@ -682,13 +662,6 @@ void CodeViewDebug::emitTypeInformation() { llvm_unreachable("produced malformed type record"); } - if (OS.isVerboseAsm()) { - // emitRawComment will insert its own tab and comment string before - // the first line, so strip off our first one. It also prints its own - // newline. - OS.emitRawComment( - CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim()); - } B = Table.getNext(*B); } } @@ -1135,7 +1108,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, if (!BeginLabel->isDefined() || !EndLabel->isDefined()) continue; - DIType *DITy = std::get<2>(HeapAllocSite); + const DIType *DITy = std::get<2>(HeapAllocSite); MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE); OS.AddComment("Call site offset"); OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0); @@ -1363,7 +1336,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { const TargetRegisterInfo *TRI = TSI.getRegisterInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); const Function &GV = MF->getFunction(); - auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()}); + auto Insertion = FnDebugInfo.insert({&GV, std::make_unique<FunctionInfo>()}); assert(Insertion.second && "function already has info"); CurFn = Insertion.first->second.get(); CurFn->FuncId = NextFuncId++; @@ -2633,17 +2606,6 @@ void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI, emitLocalVariable(FI, L); } -/// Only call this on endian-specific types like ulittle16_t and little32_t, or -/// structs composed of them. -template <typename T> -static void copyBytesForDefRange(SmallString<20> &BytePrefix, - SymbolKind SymKind, const T &DefRangeHeader) { - BytePrefix.resize(2 + sizeof(T)); - ulittle16_t SymKindLE = ulittle16_t(SymKind); - memcpy(&BytePrefix[0], &SymKindLE, 2); - memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T)); -} - void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, const LocalVariable &Var) { // LocalSym record, see SymbolRecord.h for more info. @@ -2692,8 +2654,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, (bool(Flags & LocalSymFlags::IsParameter) ? (EncFP == FI.EncodedParamFramePtrReg) : (EncFP == FI.EncodedLocalFramePtrReg))) { - little32_t FPOffset = little32_t(Offset); - copyBytesForDefRange(BytePrefix, S_DEFRANGE_FRAMEPOINTER_REL, FPOffset); + DefRangeFramePointerRelHeader DRHdr; + DRHdr.Offset = Offset; + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { uint16_t RegRelFlags = 0; if (DefRange.IsSubfield) { @@ -2701,28 +2664,27 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, (DefRange.StructOffset << DefRangeRegisterRelSym::OffsetInParentShift); } - DefRangeRegisterRelSym::Header DRHdr; + DefRangeRegisterRelHeader DRHdr; DRHdr.Register = Reg; DRHdr.Flags = RegRelFlags; DRHdr.BasePointerOffset = Offset; - copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER_REL, DRHdr); + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); if (DefRange.IsSubfield) { - DefRangeSubfieldRegisterSym::Header DRHdr; + DefRangeSubfieldRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; DRHdr.OffsetInParent = DefRange.StructOffset; - copyBytesForDefRange(BytePrefix, S_DEFRANGE_SUBFIELD_REGISTER, DRHdr); + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { - DefRangeRegisterSym::Header DRHdr; + DefRangeRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; - copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER, DRHdr); + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } } - OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix); } } @@ -2896,6 +2858,14 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { CurFn = nullptr; } +// Usable locations are valid with non-zero line numbers. A line number of zero +// corresponds to optimized code that doesn't have a distinct source location. +// In this case, we try to use the previous or next source location depending on +// the context. +static bool isUsableDebugLoc(DebugLoc DL) { + return DL && DL.getLine() != 0; +} + void CodeViewDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); @@ -2907,19 +2877,21 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { // If the first instruction of a new MBB has no location, find the first // instruction with a location and use that. DebugLoc DL = MI->getDebugLoc(); - if (!DL && MI->getParent() != PrevInstBB) { + if (!isUsableDebugLoc(DL) && MI->getParent() != PrevInstBB) { for (const auto &NextMI : *MI->getParent()) { if (NextMI.isDebugInstr()) continue; DL = NextMI.getDebugLoc(); - if (DL) + if (isUsableDebugLoc(DL)) break; } + // FIXME: Handle the case where the BB has no valid locations. This would + // probably require doing a real dataflow analysis. } PrevInstBB = MI->getParent(); // If we still don't have a debug location, don't record a location. - if (!DL) + if (!isUsableDebugLoc(DL)) return; maybeRecordLocation(DL, Asm->MF); @@ -3026,7 +2998,7 @@ void CodeViewDebug::collectGlobalVariableInfo() { auto Insertion = ScopeGlobals.insert( {Scope, std::unique_ptr<GlobalVariableList>()}); if (Insertion.second) - Insertion.first->second = llvm::make_unique<GlobalVariableList>(); + Insertion.first->second = std::make_unique<GlobalVariableList>(); VariableList = Insertion.first->second.get(); } else if (GV->hasComdat()) // Emit this global variable into a COMDAT section. diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index ce57b789d7fa..7ffd77926cf7 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -148,7 +148,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector<LexicalBlock *, 1> ChildBlocks; std::vector<std::pair<MCSymbol *, MDNode *>> Annotations; - std::vector<std::tuple<MCSymbol *, MCSymbol *, DIType *>> HeapAllocSites; + std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>> + HeapAllocSites; const MCSymbol *Begin = nullptr; const MCSymbol *End = nullptr; diff --git a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index ddd60575b6c0..7f9d6c618ad3 100644 --- a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -41,7 +41,7 @@ using EntryIndex = DbgValueHistoryMap::EntryIndex; static Register isDescribedByReg(const MachineInstr &MI) { assert(MI.isDebugValue()); assert(MI.getNumOperands() == 4); - // If the location of variable is an entry value (DW_OP_entry_value) + // If the location of variable is an entry value (DW_OP_LLVM_entry_value) // do not consider it as a register location. if (MI.getDebugExpression()->isEntryValue()) return 0; @@ -177,13 +177,13 @@ static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV, IndicesToErase.push_back(Index); Entry.endEntry(NewIndex); } - if (unsigned Reg = isDescribedByReg(DV)) + if (Register Reg = isDescribedByReg(DV)) TrackedRegs[Reg] |= !Overlaps; } // If the new debug value is described by a register, add tracking of // that register if it is not already tracked. - if (unsigned NewReg = isDescribedByReg(DV)) { + if (Register NewReg = isDescribedByReg(DV)) { if (!TrackedRegs.count(NewReg)) addRegDescribedVar(RegVars, NewReg, Var); LiveEntries[Var].insert(NewIndex); @@ -234,7 +234,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, DbgLabelInstrMap &DbgLabels) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - unsigned FrameReg = TRI->getFrameRegister(*MF); + Register FrameReg = TRI->getFrameRegister(*MF); RegDescribedVarsMap RegVars; DbgValueEntriesMap LiveEntries; for (const auto &MBB : *MF) { @@ -275,7 +275,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, continue; // If this is a virtual register, only clobber it since it doesn't // have aliases. - if (TRI->isVirtualRegister(MO.getReg())) + if (Register::isVirtualRegister(MO.getReg())) clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries, MI); // If this is a register def operand, it may end a debug value @@ -296,7 +296,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, // Don't consider SP to be clobbered by register masks. for (auto It : RegVars) { unsigned int Reg = It.first; - if (Reg != SP && TRI->isPhysicalRegister(Reg) && + if (Reg != SP && Register::isPhysicalRegister(Reg) && MO.clobbersPhysReg(Reg)) RegsToClobber.push_back(Reg); } diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h index 789291771b5a..0db86b09d19a 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -38,21 +38,18 @@ public: : CU(CU), EntryOffset(EntryOffset) {} }; struct Entry { - const MCSymbol *BeginSym; - const MCSymbol *EndSym; + const MCSymbol *Begin; + const MCSymbol *End; size_t ByteOffset; size_t CommentOffset; - Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset, - size_t CommentOffset) - : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset), - CommentOffset(CommentOffset) {} }; private: SmallVector<List, 4> Lists; SmallVector<Entry, 32> Entries; SmallString<256> DWARFBytes; - SmallVector<std::string, 32> Comments; + std::vector<std::string> Comments; + MCSymbol *Sym; /// Only verbose textual output needs comments. This will be set to /// true for that case, and false otherwise. @@ -63,6 +60,12 @@ public: size_t getNumLists() const { return Lists.size(); } const List &getList(size_t LI) const { return Lists[LI]; } ArrayRef<List> getLists() const { return Lists; } + MCSymbol *getSym() const { + return Sym; + } + void setSym(MCSymbol *Sym) { + this->Sym = Sym; + } class ListBuilder; class EntryBuilder; @@ -93,7 +96,7 @@ private: /// Until the next call, bytes added to the stream will be added to this /// entry. void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) { - Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size()); + Entries.push_back({BeginSym, EndSym, DWARFBytes.size(), Comments.size()}); } /// Finalize a .debug_loc entry, deleting if it's empty. diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 9548ad9918c1..a61c98ec1c18 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -208,7 +208,7 @@ void DwarfCompileUnit::addLocationAttribute( if (!Loc) { addToAccelTable = true; Loc = new (DIEValueAllocator) DIELoc; - DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc); + DwarfExpr = std::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc); } if (Expr) { @@ -326,14 +326,13 @@ void DwarfCompileUnit::addRange(RangeSpan Range) { // emitted into and the subprogram was contained within. If these are the // same then extend our current range, otherwise add this as a new range. if (CURanges.empty() || !SameAsPrevCU || - (&CURanges.back().getEnd()->getSection() != - &Range.getEnd()->getSection())) { + (&CURanges.back().End->getSection() != + &Range.End->getSection())) { CURanges.push_back(Range); - DD->addSectionLabel(Range.getStart()); return; } - CURanges.back().setEnd(Range.getEnd()); + CURanges.back().End = Range.End; } void DwarfCompileUnit::initStmtList() { @@ -399,7 +398,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { } else { const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - if (RI->isPhysicalRegister(Location.getReg())) + if (Register::isPhysicalRegister(Location.getReg())) addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); } } @@ -468,14 +467,6 @@ void DwarfCompileUnit::constructScopeDIE( void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range) { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - // Emit the offset into .debug_ranges or .debug_rnglists as a relocatable - // label. emitDIE() will handle emitting it appropriately. - const MCSymbol *RangeSectionSym = - DD->getDwarfVersion() >= 5 - ? TLOF.getDwarfRnglistsSection()->getBeginSymbol() - : TLOF.getDwarfRangesSection()->getBeginSymbol(); HasRangeLists = true; @@ -494,12 +485,17 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, // (DW_RLE_startx_endx etc.). if (DD->getDwarfVersion() >= 5) addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index); - else if (isDwoUnit()) - addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), - RangeSectionSym); - else - addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), - RangeSectionSym); + else { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const MCSymbol *RangeSectionSym = + TLOF.getDwarfRangesSection()->getBeginSymbol(); + if (isDwoUnit()) + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + else + addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + } } void DwarfCompileUnit::attachRangesOrLowHighPC( @@ -507,7 +503,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( if (Ranges.size() == 1 || !DD->useRangesSection()) { const RangeSpan &Front = Ranges.front(); const RangeSpan &Back = Ranges.back(); - attachLowHighPC(Die, Front.getStart(), Back.getEnd()); + attachLowHighPC(Die, Front.Begin, Back.End); } else addScopeRangeList(Die, std::move(Ranges)); } @@ -517,8 +513,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( SmallVector<RangeSpan, 2> List; List.reserve(Ranges.size()); for (const InsnRange &R : Ranges) - List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first), - DD->getLabelAfterInsn(R.second))); + List.push_back( + {DD->getLabelBeforeInsn(R.first), DD->getLabelAfterInsn(R.second)}); attachRangesOrLowHighPC(Die, std::move(List)); } @@ -647,8 +643,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); DwarfExpr.addFragmentOffset(Expr); SmallVector<uint64_t, 8> Ops; - Ops.push_back(dwarf::DW_OP_plus_uconst); - Ops.push_back(Offset); + DIExpression::appendOffset(Ops, Offset); // According to // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf // cuda-gdb requires DW_AT_address_class for all variables to be able to @@ -892,32 +887,117 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } -DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, - const DISubprogram &CalleeSP, - bool IsTail, - const MCExpr *PCOffset) { +/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location atom. +static bool useGNUAnalogForDwarf5Feature(DwarfDebug *DD) { + return DD->getDwarfVersion() == 4 && DD->tuneForGDB(); +} + +dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Tag; + switch (Tag) { + case dwarf::DW_TAG_call_site: + return dwarf::DW_TAG_GNU_call_site; + case dwarf::DW_TAG_call_site_parameter: + return dwarf::DW_TAG_GNU_call_site_parameter; + default: + llvm_unreachable("DWARF5 tag with no GNU analog"); + } +} + +dwarf::Attribute +DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Attr; + switch (Attr) { + case dwarf::DW_AT_call_all_calls: + return dwarf::DW_AT_GNU_all_call_sites; + case dwarf::DW_AT_call_target: + return dwarf::DW_AT_GNU_call_site_target; + case dwarf::DW_AT_call_origin: + return dwarf::DW_AT_abstract_origin; + case dwarf::DW_AT_call_pc: + return dwarf::DW_AT_low_pc; + case dwarf::DW_AT_call_value: + return dwarf::DW_AT_GNU_call_site_value; + case dwarf::DW_AT_call_tail_call: + return dwarf::DW_AT_GNU_tail_call; + default: + llvm_unreachable("DWARF5 attribute with no GNU analog"); + } +} + +dwarf::LocationAtom +DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Loc; + switch (Loc) { + case dwarf::DW_OP_entry_value: + return dwarf::DW_OP_GNU_entry_value; + default: + llvm_unreachable("DWARF5 location atom with no GNU analog"); + } +} + +DIE &DwarfCompileUnit::constructCallSiteEntryDIE( + DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail, + const MCSymbol *PCAddr, const MCExpr *PCOffset, unsigned CallReg) { // Insert a call site entry DIE within ScopeDIE. - DIE &CallSiteDIE = - createAndAddDIE(dwarf::DW_TAG_call_site, ScopeDIE, nullptr); + DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site), + ScopeDIE, nullptr); - // For the purposes of showing tail call frames in backtraces, a key piece of - // information is DW_AT_call_origin, a pointer to the callee DIE. - DIE *CalleeDIE = getOrCreateSubprogramDIE(&CalleeSP); - assert(CalleeDIE && "Could not create DIE for call site entry origin"); - addDIEEntry(CallSiteDIE, dwarf::DW_AT_call_origin, *CalleeDIE); + if (CallReg) { + // Indirect call. + addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), + MachineLocation(CallReg)); + } else { + DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); + assert(CalleeDIE && "Could not create DIE for call site entry origin"); + addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), + *CalleeDIE); + } - if (IsTail) { + if (IsTail) // Attach DW_AT_call_tail_call to tail calls for standards compliance. - addFlag(CallSiteDIE, dwarf::DW_AT_call_tail_call); - } else { - // Attach the return PC to allow the debugger to disambiguate call paths - // from one function to another. + addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call)); + + // Attach the return PC to allow the debugger to disambiguate call paths + // from one function to another. + if (DD->getDwarfVersion() == 4 && DD->tuneForGDB()) { + assert(PCAddr && "Missing PC information for a call"); + addLabelAddress(CallSiteDIE, dwarf::DW_AT_low_pc, PCAddr); + } else if (!IsTail || DD->tuneForGDB()) { assert(PCOffset && "Missing return PC information for a call"); addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset); } + return CallSiteDIE; } +void DwarfCompileUnit::constructCallSiteParmEntryDIEs( + DIE &CallSiteDIE, SmallVector<DbgCallSiteParam, 4> &Params) { + for (const auto &Param : Params) { + unsigned Register = Param.getRegister(); + auto CallSiteDieParam = + DIE::get(DIEValueAllocator, + getDwarf5OrGNUTag(dwarf::DW_TAG_call_site_parameter)); + insertDIE(CallSiteDieParam); + addAddress(*CallSiteDieParam, dwarf::DW_AT_location, + MachineLocation(Register)); + + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + DwarfExpr.setCallSiteParamValueFlag(); + + DwarfDebug::emitDebugLocValue(*Asm, nullptr, Param.getValue(), DwarfExpr); + + addBlock(*CallSiteDieParam, getDwarf5OrGNUAttr(dwarf::DW_AT_call_value), + DwarfExpr.finalize()); + + CallSiteDIE.addChild(CallSiteDieParam); + } +} + DIE *DwarfCompileUnit::constructImportedEntityDIE( const DIImportedEntity *Module) { DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); @@ -997,11 +1077,11 @@ void DwarfCompileUnit::createAbstractEntity(const DINode *Node, assert(Scope && Scope->isAbstractScope()); auto &Entity = getAbstractEntities()[Node]; if (isa<const DILocalVariable>(Node)) { - Entity = llvm::make_unique<DbgVariable>( + Entity = std::make_unique<DbgVariable>( cast<const DILocalVariable>(Node), nullptr /* IA */);; DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get())); } else if (isa<const DILabel>(Node)) { - Entity = llvm::make_unique<DbgLabel>( + Entity = std::make_unique<DbgLabel>( cast<const DILabel>(Node), nullptr /* IA */); DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get())); } @@ -1081,16 +1161,8 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie())); } -/// addVariableAddress - Add DW_AT_location attribute for a -/// DbgVariable based on provided MachineLocation. void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, MachineLocation Location) { - // addBlockByrefAddress is obsolete and will be removed soon. - // The clang frontend always generates block byref variables with a - // complex expression that encodes exactly what addBlockByrefAddress - // would do. - assert((!DV.isBlockByrefVariable() || DV.hasComplexAddress()) && - "block byref variable without a complex expression"); if (DV.hasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); else @@ -1133,7 +1205,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, if (DIExpr->isEntryValue()) { DwarfExpr.setEntryValueFlag(); - DwarfExpr.addEntryValueExpression(Cursor); + DwarfExpr.beginEntryValueExpression(Cursor); } const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index ea980dfda17e..1b7ea2673ac0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -227,12 +227,35 @@ public: void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + /// This takes a DWARF 5 tag and returns it or a GNU analog. + dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const; + + /// This takes a DWARF 5 attribute and returns it or a GNU analog. + dwarf::Attribute getDwarf5OrGNUAttr(dwarf::Attribute Attr) const; + + /// This takes a DWARF 5 location atom and either returns it or a GNU analog. + dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const; + /// Construct a call site entry DIE describing a call within \p Scope to a - /// callee described by \p CalleeSP. \p IsTail specifies whether the call is - /// a tail call. \p PCOffset must be non-zero for non-tail calls or be the + /// callee described by \p CalleeSP. + /// \p IsTail specifies whether the call is a tail call. + /// \p PCAddr (used for GDB + DWARF 4 tuning) points to the PC value after + /// the call instruction. + /// \p PCOffset (used for cases other than GDB + DWARF 4 tuning) must be + /// non-zero for non-tail calls (in the case of non-gdb tuning, since for + /// GDB + DWARF 5 tuning we still generate PC info for tail calls) or be the /// function-local offset to PC value after the call instruction. - DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram &CalleeSP, - bool IsTail, const MCExpr *PCOffset); + /// \p CallReg is a register location for an indirect call. For direct calls + /// the \p CallReg is set to 0. + DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP, + bool IsTail, const MCSymbol *PCAddr, + const MCExpr *PCOffset, unsigned CallReg); + /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params + /// were collected by the \ref collectCallSiteParameters. + /// Note: The order of parameters does not matter, since debuggers recognize + /// call site parameters by the DW_AT_location attribute. + void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE, + SmallVector<DbgCallSiteParam, 4> &Params); /// Construct import_module DIE. DIE *constructImportedEntityDIE(const DIImportedEntity *Module); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 71bb2b0858cc..c505e77e5acd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -39,6 +40,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" @@ -83,6 +85,8 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" +STATISTIC(NumCSParams, "Number of dbg call site params created"); + static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); @@ -166,26 +170,26 @@ static const char *const DbgTimerDescription = "DWARF Debug Writer"; static constexpr unsigned ULEB128PadSize = 4; void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) { - BS.EmitInt8( + getActiveStreamer().EmitInt8( Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) : dwarf::OperationEncodingString(Op)); } void DebugLocDwarfExpression::emitSigned(int64_t Value) { - BS.EmitSLEB128(Value, Twine(Value)); + getActiveStreamer().EmitSLEB128(Value, Twine(Value)); } void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) { - BS.EmitULEB128(Value, Twine(Value)); + getActiveStreamer().EmitULEB128(Value, Twine(Value)); } void DebugLocDwarfExpression::emitData1(uint8_t Value) { - BS.EmitInt8(Value, Twine(Value)); + getActiveStreamer().EmitInt8(Value, Twine(Value)); } void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) { assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit"); - BS.EmitULEB128(Idx, Twine(Idx), ULEB128PadSize); + getActiveStreamer().EmitULEB128(Idx, Twine(Idx), ULEB128PadSize); } bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, @@ -194,54 +198,34 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, return false; } -bool DbgVariable::isBlockByrefVariable() const { - assert(getVariable() && "Invalid complex DbgVariable!"); - return getVariable()->getType()->isBlockByrefStruct(); +void DebugLocDwarfExpression::enableTemporaryBuffer() { + assert(!IsBuffering && "Already buffering?"); + if (!TmpBuf) + TmpBuf = std::make_unique<TempBuffer>(OutBS.GenerateComments); + IsBuffering = true; } -const DIType *DbgVariable::getType() const { - DIType *Ty = getVariable()->getType(); - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (Ty->isBlockByrefStruct()) { - /* Byref variables, in Blocks, are declared by the programmer as - "SomeType VarName;", but the compiler creates a - __Block_byref_x_VarName struct, and gives the variable VarName - either the struct, or a pointer to the struct, as its type. This - is necessary for various behind-the-scenes things the compiler - needs to do with by-reference variables in blocks. - - However, as far as the original *programmer* is concerned, the - variable should still have type 'SomeType', as originally declared. - - The following function dives into the __Block_byref_x_VarName - struct to find the original type of the variable. This will be - passed back to the code generating the type for the Debug - Information Entry for the variable 'VarName'. 'VarName' will then - have the original type 'SomeType' in its debug information. - - The original type 'SomeType' will be the type of the field named - 'VarName' inside the __Block_byref_x_VarName struct. - - NOTE: In order for this to not completely fail on the debugger - side, the Debug Information Entry for the variable VarName needs to - have a DW_AT_location that tells the debugger how to unwind through - the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function addBlockByrefType does this. */ - DIType *subType = Ty; - uint16_t tag = Ty->getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) - subType = cast<DIDerivedType>(Ty)->getBaseType(); - - auto Elements = cast<DICompositeType>(subType)->getElements(); - for (unsigned i = 0, N = Elements.size(); i < N; ++i) { - auto *DT = cast<DIDerivedType>(Elements[i]); - if (getName() == DT->getName()) - return DT->getBaseType(); - } +void DebugLocDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; } + +unsigned DebugLocDwarfExpression::getTemporaryBufferSize() { + return TmpBuf ? TmpBuf->Bytes.size() : 0; +} + +void DebugLocDwarfExpression::commitTemporaryBuffer() { + if (!TmpBuf) + return; + for (auto Byte : enumerate(TmpBuf->Bytes)) { + const char *Comment = (Byte.index() < TmpBuf->Comments.size()) + ? TmpBuf->Comments[Byte.index()].c_str() + : ""; + OutBS.EmitInt8(Byte.value(), Comment); } - return Ty; + TmpBuf->Bytes.clear(); + TmpBuf->Comments.clear(); +} + +const DIType *DbgVariable::getType() const { + return getVariable()->getType(); } /// Get .debug_loc entry for the instruction range starting at MI. @@ -275,7 +259,7 @@ void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) { assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at"); - ValueLoc = llvm::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue)); + ValueLoc = std::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue)); if (auto *E = DbgValue->getDebugExpression()) if (E->getNumElements()) FrameIndexExprs.push_back({0, E}); @@ -551,6 +535,157 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } +/// Try to interpret values loaded into registers that forward parameters +/// for \p CallMI. Store parameters with interpreted value into \p Params. +static void collectCallSiteParameters(const MachineInstr *CallMI, + ParamSet &Params) { + auto *MF = CallMI->getMF(); + auto CalleesMap = MF->getCallSitesInfo(); + auto CallFwdRegsInfo = CalleesMap.find(CallMI); + + // There is no information for the call instruction. + if (CallFwdRegsInfo == CalleesMap.end()) + return; + + auto *MBB = CallMI->getParent(); + const auto &TRI = MF->getSubtarget().getRegisterInfo(); + const auto &TII = MF->getSubtarget().getInstrInfo(); + const auto &TLI = MF->getSubtarget().getTargetLowering(); + + // Skip the call instruction. + auto I = std::next(CallMI->getReverseIterator()); + + DenseSet<unsigned> ForwardedRegWorklist; + // Add all the forwarding registers into the ForwardedRegWorklist. + for (auto ArgReg : CallFwdRegsInfo->second) { + bool InsertedReg = ForwardedRegWorklist.insert(ArgReg.Reg).second; + assert(InsertedReg && "Single register used to forward two arguments?"); + (void)InsertedReg; + } + + // We erase, from the ForwardedRegWorklist, those forwarding registers for + // which we successfully describe a loaded value (by using + // the describeLoadedValue()). For those remaining arguments in the working + // list, for which we do not describe a loaded value by + // the describeLoadedValue(), we try to generate an entry value expression + // for their call site value desctipion, if the call is within the entry MBB. + // The RegsForEntryValues maps a forwarding register into the register holding + // the entry value. + // TODO: Handle situations when call site parameter value can be described + // as the entry value within basic blocks other then the first one. + bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin(); + DenseMap<unsigned, unsigned> RegsForEntryValues; + + // If the MI is an instruction defining one or more parameters' forwarding + // registers, add those defines. We can currently only describe forwarded + // registers that are explicitly defined, but keep track of implicit defines + // also to remove those registers from the work list. + auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI, + SmallVectorImpl<unsigned> &Explicit, + SmallVectorImpl<unsigned> &Implicit) { + if (MI.isDebugInstr()) + return; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && + Register::isPhysicalRegister(MO.getReg())) { + for (auto FwdReg : ForwardedRegWorklist) { + if (TRI->regsOverlap(FwdReg, MO.getReg())) { + if (MO.isImplicit()) + Implicit.push_back(FwdReg); + else + Explicit.push_back(FwdReg); + break; + } + } + } + } + }; + + auto finishCallSiteParam = [&](DbgValueLoc DbgLocVal, unsigned Reg) { + unsigned FwdReg = Reg; + if (ShouldTryEmitEntryVals) { + auto EntryValReg = RegsForEntryValues.find(Reg); + if (EntryValReg != RegsForEntryValues.end()) + FwdReg = EntryValReg->second; + } + + DbgCallSiteParam CSParm(FwdReg, DbgLocVal); + Params.push_back(CSParm); + ++NumCSParams; + }; + + // Search for a loading value in forwaring registers. + for (; I != MBB->rend(); ++I) { + // If the next instruction is a call we can not interpret parameter's + // forwarding registers or we finished the interpretation of all parameters. + if (I->isCall()) + return; + + if (ForwardedRegWorklist.empty()) + return; + + SmallVector<unsigned, 4> ExplicitFwdRegDefs; + SmallVector<unsigned, 4> ImplicitFwdRegDefs; + getForwardingRegsDefinedByMI(*I, ExplicitFwdRegDefs, ImplicitFwdRegDefs); + if (ExplicitFwdRegDefs.empty() && ImplicitFwdRegDefs.empty()) + continue; + + // If the MI clobbers more then one forwarding register we must remove + // all of them from the working list. + for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs)) + ForwardedRegWorklist.erase(Reg); + + // The describeLoadedValue() hook currently does not have any information + // about which register it should describe in case of multiple defines, so + // for now we only handle instructions where a forwarded register is (at + // least partially) defined by the instruction's single explicit define. + if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty()) + continue; + unsigned Reg = ExplicitFwdRegDefs[0]; + + if (auto ParamValue = TII->describeLoadedValue(*I)) { + if (ParamValue->first.isImm()) { + int64_t Val = ParamValue->first.getImm(); + DbgValueLoc DbgLocVal(ParamValue->second, Val); + finishCallSiteParam(DbgLocVal, Reg); + } else if (ParamValue->first.isReg()) { + Register RegLoc = ParamValue->first.getReg(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register FP = TRI->getFrameRegister(*MF); + bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); + if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { + DbgValueLoc DbgLocVal(ParamValue->second, + MachineLocation(RegLoc, + /*IsIndirect=*/IsSPorFP)); + finishCallSiteParam(DbgLocVal, Reg); + } else if (ShouldTryEmitEntryVals) { + ForwardedRegWorklist.insert(RegLoc); + RegsForEntryValues[RegLoc] = Reg; + } + } + } + } + + // Emit the call site parameter's value as an entry value. + if (ShouldTryEmitEntryVals) { + // Create an expression where the register's entry value is used. + DIExpression *EntryExpr = DIExpression::get( + MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1}); + for (auto RegEntry : ForwardedRegWorklist) { + unsigned FwdReg = RegEntry; + auto EntryValReg = RegsForEntryValues.find(RegEntry); + if (EntryValReg != RegsForEntryValues.end()) + FwdReg = EntryValReg->second; + + DbgValueLoc DbgLocVal(EntryExpr, MachineLocation(RegEntry)); + DbgCallSiteParam CSParm(FwdReg, DbgLocVal); + Params.push_back(CSParm); + ++NumCSParams; + } + } +} + void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, DIE &ScopeDIE, const MachineFunction &MF) { @@ -563,10 +698,11 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls // because one of its requirements is not met: call site entries for // optimized-out calls are elided. - CU.addFlag(ScopeDIE, dwarf::DW_AT_call_all_calls); + CU.addFlag(ScopeDIE, CU.getDwarf5OrGNUAttr(dwarf::DW_AT_call_all_calls)); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); assert(TII && "TargetInstrInfo not found: cannot label tail calls"); + bool ApplyGNUExtensions = getDwarfVersion() == 4 && tuneForGDB(); // Emit call site entries for each call or tail call in the function. for (const MachineBasicBlock &MBB : MF) { @@ -581,30 +717,66 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, return; // If this is a direct call, find the callee's subprogram. + // In the case of an indirect call find the register that holds + // the callee. const MachineOperand &CalleeOp = MI.getOperand(0); - if (!CalleeOp.isGlobal()) - continue; - const Function *CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal()); - if (!CalleeDecl || !CalleeDecl->getSubprogram()) + if (!CalleeOp.isGlobal() && !CalleeOp.isReg()) continue; + unsigned CallReg = 0; + const DISubprogram *CalleeSP = nullptr; + const Function *CalleeDecl = nullptr; + if (CalleeOp.isReg()) { + CallReg = CalleeOp.getReg(); + if (!CallReg) + continue; + } else { + CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal()); + if (!CalleeDecl || !CalleeDecl->getSubprogram()) + continue; + CalleeSP = CalleeDecl->getSubprogram(); + } + // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). - // TODO: Add support for indirect calls. bool IsTail = TII->isTailCall(MI); - // For tail calls, no return PC information is needed. For regular calls, - // the return PC is needed to disambiguate paths in the call graph which - // could lead to some target function. + // For tail calls, for non-gdb tuning, no return PC information is needed. + // For regular calls (and tail calls in GDB tuning), the return PC + // is needed to disambiguate paths in the call graph which could lead to + // some target function. const MCExpr *PCOffset = - IsTail ? nullptr : getFunctionLocalOffsetAfterInsn(&MI); + (IsTail && !tuneForGDB()) ? nullptr + : getFunctionLocalOffsetAfterInsn(&MI); + + // Address of a call-like instruction for a normal call or a jump-like + // instruction for a tail call. This is needed for GDB + DWARF 4 tuning. + const MCSymbol *PCAddr = + ApplyGNUExtensions ? const_cast<MCSymbol*>(getLabelAfterInsn(&MI)) + : nullptr; + + assert((IsTail || PCOffset || PCAddr) && + "Call without return PC information"); - assert((IsTail || PCOffset) && "Call without return PC information"); LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> " - << CalleeDecl->getName() << (IsTail ? " [tail]" : "") - << "\n"); - CU.constructCallSiteEntryDIE(ScopeDIE, *CalleeDecl->getSubprogram(), - IsTail, PCOffset); + << (CalleeDecl ? CalleeDecl->getName() + : StringRef(MF.getSubtarget() + .getRegisterInfo() + ->getName(CallReg))) + << (IsTail ? " [IsTail]" : "") << "\n"); + + DIE &CallSiteDIE = + CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, IsTail, PCAddr, + PCOffset, CallReg); + + // GDB and LLDB support call site parameter debug info. + if (Asm->TM.Options.EnableDebugEntryValues && + (tuneForGDB() || tuneForLLDB())) { + ParamSet Params; + // Try to interpret values of call site parameters. + collectCallSiteParameters(&MI, Params); + CU.constructCallSiteParmEntryDIEs(CallSiteDIE, Params); + } } } } @@ -680,7 +852,7 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { CompilationDir = DIUnit->getDirectory(); - auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>( + auto OwnedUnit = std::make_unique<DwarfCompileUnit>( InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); DwarfCompileUnit &NewCU = *OwnedUnit; InfoHolder.addUnit(std::move(OwnedUnit)); @@ -793,8 +965,6 @@ void DwarfDebug::beginModule() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Holder.setRnglistsTableBaseSym( Asm->createTempSymbol("rnglists_table_base")); - Holder.setLoclistsTableBaseSym( - Asm->createTempSymbol("loclists_table_base")); if (useSplitDwarf()) InfoHolder.setRnglistsTableBaseSym( @@ -907,7 +1077,7 @@ void DwarfDebug::finalizeModuleInfo() { // If we're splitting the dwarf out now that we've got the entire // CU then add the dwo id to it. auto *SkCU = TheCU.getSkeleton(); - if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) { + if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) { finishUnitAttributes(TheCU.getCUNode(), TheCU); TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name, Asm->TM.Options.MCOptions.SplitDwarfFile); @@ -951,7 +1121,7 @@ void DwarfDebug::finalizeModuleInfo() { // 2.17.3). U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); else - U.setBaseAddress(TheCU.getRanges().front().getStart()); + U.setBaseAddress(TheCU.getRanges().front().Begin); U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } @@ -959,15 +1129,19 @@ void DwarfDebug::finalizeModuleInfo() { // is a bit pessimistic under LTO. if (!AddrPool.isEmpty() && (getDwarfVersion() >= 5 || - (SkCU && !empty(TheCU.getUnitDie().children())))) + (SkCU && !TheCU.getUnitDie().children().empty()))) U.addAddrTableBase(); if (getDwarfVersion() >= 5) { if (U.hasRangeLists()) U.addRnglistsBase(); - if (!DebugLocs.getLists().empty() && !useSplitDwarf()) - U.addLoclistsBase(); + if (!DebugLocs.getLists().empty() && !useSplitDwarf()) { + DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base")); + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base, + DebugLocs.getSym(), + TLOF.getDwarfLoclistsSection()->getBeginSymbol()); + } } auto *CUNode = cast<DICompileUnit>(P.first); @@ -1105,7 +1279,7 @@ void DwarfDebug::collectVariableInfoFromMFTable( continue; ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode()); - auto RegVar = llvm::make_unique<DbgVariable>( + auto RegVar = std::make_unique<DbgVariable>( cast<DILocalVariable>(Var.first), Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); if (DbgVariable *DbgVar = MFVars.lookup(Var)) @@ -1316,13 +1490,13 @@ DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode()); if (isa<const DILocalVariable>(Node)) { ConcreteEntities.push_back( - llvm::make_unique<DbgVariable>(cast<const DILocalVariable>(Node), + std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node), Location)); InfoHolder.addScopeVariable(&Scope, cast<DbgVariable>(ConcreteEntities.back().get())); } else if (isa<const DILabel>(Node)) { ConcreteEntities.push_back( - llvm::make_unique<DbgLabel>(cast<const DILabel>(Node), + std::make_unique<DbgLabel>(cast<const DILabel>(Node), Location, Sym)); InfoHolder.addScopeLabel(&Scope, cast<DbgLabel>(ConcreteEntities.back().get())); @@ -1419,11 +1593,14 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, LexicalScope *Scope = nullptr; const DILabel *Label = cast<DILabel>(IL.first); + // The scope could have an extra lexical block file. + const DILocalScope *LocalScope = + Label->getScope()->getNonLexicalBlockFileScope(); // Get inlined DILocation if it is inlined label. if (const DILocation *IA = IL.second) - Scope = LScopes.findInlinedScope(Label->getScope(), IA); + Scope = LScopes.findInlinedScope(LocalScope, IA); else - Scope = LScopes.findLexicalScope(Label->getScope()); + Scope = LScopes.findLexicalScope(LocalScope); // If label scope is not found then skip this label. if (!Scope) continue; @@ -1607,6 +1784,9 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; + SectionLabels.insert(std::make_pair(&Asm->getFunctionBegin()->getSection(), + Asm->getFunctionBegin())); + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function @@ -1654,7 +1834,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { collectEntityInfo(TheCU, SP, Processed); // Add the range of this function to the list of ranges for the CU. - TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd())); + TheCU.addRange({Asm->getFunctionBegin(), Asm->getFunctionEnd()}); // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. But with -fdebug-info-for-profiling, the subprogram @@ -1836,9 +2016,10 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_enumeration_type: return dwarf::PubIndexEntryDescriptor( - dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus - ? dwarf::GIEL_STATIC - : dwarf::GIEL_EXTERNAL); + dwarf::GIEK_TYPE, + dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage()) + ? dwarf::GIEL_EXTERNAL + : dwarf::GIEL_STATIC); case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_base_type: case dwarf::DW_TAG_subrange_type: @@ -1967,7 +2148,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, DWARFExpression Expr(Data, getDwarfVersion(), PtrSize); using Encoding = DWARFExpression::Operation::Encoding; - uint32_t Offset = 0; + uint64_t Offset = 0; for (auto &Op : Expr) { assert(Op.getCode() != dwarf::DW_OP_const_type && "3 operand ops not yet supported"); @@ -1990,7 +2171,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, if (Comment != End) Comment++; } else { - for (uint32_t J = Offset; J < Op.getOperandEndOffset(I); ++J) + for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J) Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); } Offset = Op.getOperandEndOffset(I); @@ -2020,7 +2201,7 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, if (DIExpr->isEntryValue()) { DwarfExpr.setEntryValueFlag(); - DwarfExpr.addEntryValueExpression(Cursor); + DwarfExpr.beginEntryValueExpression(Cursor); } const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); @@ -2083,7 +2264,7 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, } // Emit the common part of the DWARF 5 range/locations list tables header. -static void emitListsTableHeaderStart(AsmPrinter *Asm, const DwarfFile &Holder, +static void emitListsTableHeaderStart(AsmPrinter *Asm, MCSymbol *TableStart, MCSymbol *TableEnd) { // Build the table header, which starts with the length field. @@ -2108,7 +2289,7 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, const DwarfFile &Holder) { MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start"); MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end"); - emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd); + emitListsTableHeaderStart(Asm, TableStart, TableEnd); Asm->OutStreamer->AddComment("Offset entry count"); Asm->emitInt32(Holder.getRangeLists().size()); @@ -2125,94 +2306,147 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, // designates the end of the table for the caller to emit when the table is // complete. static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm, - const DwarfFile &Holder) { + const DwarfDebug &DD) { MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start"); MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end"); - emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd); + emitListsTableHeaderStart(Asm, TableStart, TableEnd); + + const auto &DebugLocs = DD.getDebugLocs(); // FIXME: Generate the offsets table and use DW_FORM_loclistx with the // DW_AT_loclists_base attribute. Until then set the number of offsets to 0. Asm->OutStreamer->AddComment("Offset entry count"); Asm->emitInt32(0); - Asm->OutStreamer->EmitLabel(Holder.getLoclistsTableBaseSym()); + Asm->OutStreamer->EmitLabel(DebugLocs.getSym()); return TableEnd; } -// Emit locations into the .debug_loc/.debug_rnglists section. -void DwarfDebug::emitDebugLoc() { - if (DebugLocs.getLists().empty()) - return; +template <typename Ranges, typename PayloadEmitter> +static void emitRangeList( + DwarfDebug &DD, AsmPrinter *Asm, MCSymbol *Sym, const Ranges &R, + const DwarfCompileUnit &CU, unsigned BaseAddressx, unsigned OffsetPair, + unsigned StartxLength, unsigned EndOfList, + StringRef (*StringifyEnum)(unsigned), + bool ShouldUseBaseAddress, + PayloadEmitter EmitPayload) { - bool IsLocLists = getDwarfVersion() >= 5; - MCSymbol *TableEnd = nullptr; - if (IsLocLists) { - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfLoclistsSection()); - TableEnd = emitLoclistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder - : InfoHolder); - } else { - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfLocSection()); - } + auto Size = Asm->MAI->getCodePointerSize(); + bool UseDwarf5 = DD.getDwarfVersion() >= 5; - unsigned char Size = Asm->MAI->getCodePointerSize(); - for (const auto &List : DebugLocs.getLists()) { - Asm->OutStreamer->EmitLabel(List.Label); + // Emit our symbol so we can find the beginning of the range. + Asm->OutStreamer->EmitLabel(Sym); - const DwarfCompileUnit *CU = List.CU; - const MCSymbol *Base = CU->getBaseAddress(); - for (const auto &Entry : DebugLocs.getEntries(List)) { + // Gather all the ranges that apply to the same section so they can share + // a base address entry. + MapVector<const MCSection *, std::vector<decltype(&*R.begin())>> SectionRanges; + + for (const auto &Range : R) + SectionRanges[&Range.Begin->getSection()].push_back(&Range); + + const MCSymbol *CUBase = CU.getBaseAddress(); + bool BaseIsSet = false; + for (const auto &P : SectionRanges) { + auto *Base = CUBase; + if (!Base && ShouldUseBaseAddress) { + const MCSymbol *Begin = P.second.front()->Begin; + const MCSymbol *NewBase = DD.getSectionLabel(&Begin->getSection()); + if (!UseDwarf5) { + Base = NewBase; + BaseIsSet = true; + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->AddComment(" base address"); + Asm->OutStreamer->EmitSymbolValue(Base, Size); + } else if (NewBase != Begin || P.second.size() > 1) { + // Only use a base address if + // * the existing pool address doesn't match (NewBase != Begin) + // * or, there's more than one entry to share the base address + Base = NewBase; + BaseIsSet = true; + Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx)); + Asm->emitInt8(BaseAddressx); + Asm->OutStreamer->AddComment(" base address index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Base)); + } + } else if (BaseIsSet && !UseDwarf5) { + BaseIsSet = false; + assert(!Base); + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } + + for (const auto *RS : P.second) { + const MCSymbol *Begin = RS->Begin; + const MCSymbol *End = RS->End; + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); if (Base) { - // Set up the range. This range is relative to the entry point of the - // compile unit. This is a hard coded 0 for low_pc when we're emitting - // ranges, or the DW_AT_low_pc on the compile unit otherwise. - if (IsLocLists) { - Asm->OutStreamer->AddComment("DW_LLE_offset_pair"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_offset_pair, 1); + if (UseDwarf5) { + // Emit offset_pair when we have a base. + Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair)); + Asm->emitInt8(OffsetPair); Asm->OutStreamer->AddComment(" starting offset"); - Asm->EmitLabelDifferenceAsULEB128(Entry.BeginSym, Base); + Asm->EmitLabelDifferenceAsULEB128(Begin, Base); Asm->OutStreamer->AddComment(" ending offset"); - Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Base); + Asm->EmitLabelDifferenceAsULEB128(End, Base); } else { - Asm->EmitLabelDifference(Entry.BeginSym, Base, Size); - Asm->EmitLabelDifference(Entry.EndSym, Base, Size); + Asm->EmitLabelDifference(Begin, Base, Size); + Asm->EmitLabelDifference(End, Base, Size); } - - emitDebugLocEntryLocation(Entry, CU); - continue; - } - - // We have no base address. - if (IsLocLists) { - // TODO: Use DW_LLE_base_addressx + DW_LLE_offset_pair, or - // DW_LLE_startx_length in case if there is only a single range. - // That should reduce the size of the debug data emited. - // For now just use the DW_LLE_startx_length for all cases. - Asm->OutStreamer->AddComment("DW_LLE_startx_length"); - Asm->emitInt8(dwarf::DW_LLE_startx_length); - Asm->OutStreamer->AddComment(" start idx"); - Asm->EmitULEB128(AddrPool.getIndex(Entry.BeginSym)); + } else if (UseDwarf5) { + Asm->OutStreamer->AddComment(StringifyEnum(StartxLength)); + Asm->emitInt8(StartxLength); + Asm->OutStreamer->AddComment(" start index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin)); Asm->OutStreamer->AddComment(" length"); - Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Entry.BeginSym); + Asm->EmitLabelDifferenceAsULEB128(End, Begin); } else { - Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size); - Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size); + Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->EmitSymbolValue(End, Size); } - - emitDebugLocEntryLocation(Entry, CU); + EmitPayload(*RS); } + } - if (IsLocLists) { - // .debug_loclists section ends with DW_LLE_end_of_list. - Asm->OutStreamer->AddComment("DW_LLE_end_of_list"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_end_of_list, 1); - } else { - // Terminate the .debug_loc list with two 0 values. - Asm->OutStreamer->EmitIntValue(0, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } + if (UseDwarf5) { + Asm->OutStreamer->AddComment(StringifyEnum(EndOfList)); + Asm->emitInt8(EndOfList); + } else { + // Terminate the list with two 0 values. + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); } +} + +static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) { + emitRangeList( + DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU, + dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair, + dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list, + llvm::dwarf::LocListEncodingString, + /* ShouldUseBaseAddress */ true, + [&](const DebugLocStream::Entry &E) { + DD.emitDebugLocEntryLocation(E, List.CU); + }); +} + +// Emit locations into the .debug_loc/.debug_rnglists section. +void DwarfDebug::emitDebugLoc() { + if (DebugLocs.getLists().empty()) + return; + + MCSymbol *TableEnd = nullptr; + if (getDwarfVersion() >= 5) { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLoclistsSection()); + TableEnd = emitLoclistsTableHeader(Asm, *this); + } else { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); + } + + for (const auto &List : DebugLocs.getLists()) + emitLocList(*this, Asm, List); if (TableEnd) Asm->OutStreamer->EmitLabel(TableEnd); @@ -2232,9 +2466,9 @@ void DwarfDebug::emitDebugLocDWO() { // Ideally/in v5, this could use SectionLabels to reuse existing addresses // in the address pool to minimize object size/relocations. Asm->emitInt8(dwarf::DW_LLE_startx_length); - unsigned idx = AddrPool.getIndex(Entry.BeginSym); + unsigned idx = AddrPool.getIndex(Entry.Begin); Asm->EmitULEB128(idx); - Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4); + Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4); emitDebugLocEntryLocation(Entry, List.CU); } @@ -2360,7 +2594,7 @@ void DwarfDebug::emitDebugARanges() { // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. unsigned Padding = - OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize); + offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize)); ContentSize += Padding; ContentSize += (List.size() + 1) * TupleSize; @@ -2405,93 +2639,13 @@ void DwarfDebug::emitDebugARanges() { /// Emit a single range list. We handle both DWARF v5 and earlier. static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm, const RangeSpanList &List) { - - auto DwarfVersion = DD.getDwarfVersion(); - // Emit our symbol so we can find the beginning of the range. - Asm->OutStreamer->EmitLabel(List.getSym()); - // Gather all the ranges that apply to the same section so they can share - // a base address entry. - MapVector<const MCSection *, std::vector<const RangeSpan *>> SectionRanges; - // Size for our labels. - auto Size = Asm->MAI->getCodePointerSize(); - - for (const RangeSpan &Range : List.getRanges()) - SectionRanges[&Range.getStart()->getSection()].push_back(&Range); - - const DwarfCompileUnit &CU = List.getCU(); - const MCSymbol *CUBase = CU.getBaseAddress(); - bool BaseIsSet = false; - for (const auto &P : SectionRanges) { - // Don't bother with a base address entry if there's only one range in - // this section in this range list - for example ranges for a CU will - // usually consist of single regions from each of many sections - // (-ffunction-sections, or just C++ inline functions) except under LTO - // or optnone where there may be holes in a single CU's section - // contributions. - auto *Base = CUBase; - if (!Base && (P.second.size() > 1 || DwarfVersion < 5) && - (CU.getCUNode()->getRangesBaseAddress() || DwarfVersion >= 5)) { - BaseIsSet = true; - // FIXME/use care: This may not be a useful base address if it's not - // the lowest address/range in this object. - Base = P.second.front()->getStart(); - if (DwarfVersion >= 5) { - Base = DD.getSectionLabel(&Base->getSection()); - Asm->OutStreamer->AddComment("DW_RLE_base_addressx"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_addressx, 1); - Asm->OutStreamer->AddComment(" base address index"); - Asm->EmitULEB128(DD.getAddressPool().getIndex(Base)); - } else { - Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->AddComment(" base address"); - Asm->OutStreamer->EmitSymbolValue(Base, Size); - } - } else if (BaseIsSet && DwarfVersion < 5) { - BaseIsSet = false; - assert(!Base); - Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } - - for (const auto *RS : P.second) { - const MCSymbol *Begin = RS->getStart(); - const MCSymbol *End = RS->getEnd(); - assert(Begin && "Range without a begin symbol?"); - assert(End && "Range without an end symbol?"); - if (Base) { - if (DwarfVersion >= 5) { - // Emit DW_RLE_offset_pair when we have a base. - Asm->OutStreamer->AddComment("DW_RLE_offset_pair"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_offset_pair, 1); - Asm->OutStreamer->AddComment(" starting offset"); - Asm->EmitLabelDifferenceAsULEB128(Begin, Base); - Asm->OutStreamer->AddComment(" ending offset"); - Asm->EmitLabelDifferenceAsULEB128(End, Base); - } else { - Asm->EmitLabelDifference(Begin, Base, Size); - Asm->EmitLabelDifference(End, Base, Size); - } - } else if (DwarfVersion >= 5) { - Asm->OutStreamer->AddComment("DW_RLE_startx_length"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_startx_length, 1); - Asm->OutStreamer->AddComment(" start index"); - Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin)); - Asm->OutStreamer->AddComment(" length"); - Asm->EmitLabelDifferenceAsULEB128(End, Begin); - } else { - Asm->OutStreamer->EmitSymbolValue(Begin, Size); - Asm->OutStreamer->EmitSymbolValue(End, Size); - } - } - } - if (DwarfVersion >= 5) { - Asm->OutStreamer->AddComment("DW_RLE_end_of_list"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_end_of_list, 1); - } else { - // Terminate the list with two 0 values. - Asm->OutStreamer->EmitIntValue(0, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } + emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(), + dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair, + dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list, + llvm::dwarf::RangeListEncodingString, + List.getCU().getCUNode()->getRangesBaseAddress() || + DD.getDwarfVersion() >= 5, + [](auto) {}); } static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm, @@ -2637,7 +2791,7 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { - auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>( + auto OwnedUnit = std::make_unique<DwarfCompileUnit>( CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); DwarfCompileUnit &NewCU = *OwnedUnit; NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); @@ -2737,7 +2891,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); - auto OwnedUnit = llvm::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder, + auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder, getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); @@ -2879,10 +3033,6 @@ uint16_t DwarfDebug::getDwarfVersion() const { return Asm->OutStreamer->getContext().getDwarfVersion(); } -void DwarfDebug::addSectionLabel(const MCSymbol *Sym) { - SectionLabels.insert(std::make_pair(&Sym->getSection(), Sym)); -} - const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) { return SectionLabels.find(S)->second; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 3ac474e2bdda..c8c511f67c2a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -153,7 +153,7 @@ public: assert(!ValueLoc && "Already initialized?"); assert(!Value.getExpression()->isFragment() && "Fragments not supported."); - ValueLoc = llvm::make_unique<DbgValueLoc>(Value); + ValueLoc = std::make_unique<DbgValueLoc>(Value); if (auto *E = ValueLoc->getExpression()) if (E->getNumElements()) FrameIndexExprs.push_back({0, E}); @@ -216,7 +216,6 @@ public: return !FrameIndexExprs.empty(); } - bool isBlockByrefVariable() const; const DIType *getType() const; static bool classof(const DbgEntity *N) { @@ -254,6 +253,25 @@ public: } }; +/// Used for tracking debug info about call site parameters. +class DbgCallSiteParam { +private: + unsigned Register; ///< Parameter register at the callee entry point. + DbgValueLoc Value; ///< Corresponding location for the parameter value at + ///< the call site. +public: + DbgCallSiteParam(unsigned Reg, DbgValueLoc Val) + : Register(Reg), Value(Val) { + assert(Reg && "Parameter register cannot be undef"); + } + + unsigned getRegister() const { return Register; } + DbgValueLoc getValue() const { return Value; } +}; + +/// Collection used for storing debug call site parameters. +using ParamSet = SmallVector<DbgCallSiteParam, 4>; + /// Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 2858afaa1cf1..1c5a244d7c5d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/ErrorHandling.h" @@ -97,7 +98,7 @@ void DwarfExpression::addAnd(unsigned Mask) { bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg, unsigned MaxSize) { - if (!TRI.isPhysicalRegister(MachineReg)) { + if (!llvm::Register::isPhysicalRegister(MachineReg)) { if (isFrameRegister(TRI, MachineReg)) { DwarfRegs.push_back({-1, 0, nullptr}); return true; @@ -241,15 +242,22 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return false; } - // Handle simple register locations. - if (!isMemoryLocation() && !HasComplexExpression) { + // Handle simple register locations. If we are supposed to emit + // a call site parameter expression and if that expression is just a register + // location, emit it with addBReg and offset 0, because we should emit a DWARF + // expression representing a value, rather than a location. + if (!isMemoryLocation() && !HasComplexExpression && + (!isParameterValue() || isEntryValue())) { for (auto &Reg : DwarfRegs) { if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); addOpPiece(Reg.Size); } - if (isEntryValue() && DwarfVersion >= 4) + if (isEntryValue()) + finalizeEntryValue(); + + if (isEntryValue() && !isParameterValue() && DwarfVersion >= 4) emitOp(dwarf::DW_OP_stack_value); DwarfRegs.clear(); @@ -275,19 +283,27 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // Pattern-match combinations for which more efficient representations exist. // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset]. if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) { - SignedOffset = Op->getArg(0); - ExprCursor.take(); + uint64_t Offset = Op->getArg(0); + uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max()); + if (Offset <= IntMax) { + SignedOffset = Offset; + ExprCursor.take(); + } } // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset] // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset] // If Reg is a subregister we need to mask it out before subtracting. if (Op && Op->getOp() == dwarf::DW_OP_constu) { + uint64_t Offset = Op->getArg(0); + uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max()); auto N = ExprCursor.peekNext(); - if (N && (N->getOp() == dwarf::DW_OP_plus || - (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { - int Offset = Op->getArg(0); - SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset; + if (N && N->getOp() == dwarf::DW_OP_plus && Offset <= IntMax) { + SignedOffset = Offset; + ExprCursor.consume(2); + } else if (N && N->getOp() == dwarf::DW_OP_minus && + !SubRegisterSizeInBits && Offset <= IntMax + 1) { + SignedOffset = -static_cast<int64_t>(Offset); ExprCursor.consume(2); } } @@ -300,17 +316,34 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return true; } -void DwarfExpression::addEntryValueExpression(DIExpressionCursor &ExprCursor) { +void DwarfExpression::beginEntryValueExpression( + DIExpressionCursor &ExprCursor) { auto Op = ExprCursor.take(); - assert(Op && Op->getOp() == dwarf::DW_OP_entry_value); + (void)Op; + assert(Op && Op->getOp() == dwarf::DW_OP_LLVM_entry_value); assert(!isMemoryLocation() && "We don't support entry values of memory locations yet"); + assert(!IsEmittingEntryValue && "Already emitting entry value?"); + assert(Op->getArg(0) == 1 && + "Can currently only emit entry values covering a single operation"); - if (DwarfVersion >= 5) - emitOp(dwarf::DW_OP_entry_value); - else - emitOp(dwarf::DW_OP_GNU_entry_value); - emitUnsigned(Op->getArg(0)); + emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value)); + IsEmittingEntryValue = true; + enableTemporaryBuffer(); +} + +void DwarfExpression::finalizeEntryValue() { + assert(IsEmittingEntryValue && "Entry value not open?"); + disableTemporaryBuffer(); + + // Emit the entry value's size operand. + unsigned Size = getTemporaryBufferSize(); + emitUnsigned(Size); + + // Emit the entry value's DWARF block operand. + commitTemporaryBuffer(); + + IsEmittingEntryValue = false; } /// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?". @@ -340,7 +373,17 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, while (ExprCursor) { auto Op = ExprCursor.take(); - switch (Op->getOp()) { + uint64_t OpNum = Op->getOp(); + + if (OpNum >= dwarf::DW_OP_reg0 && OpNum <= dwarf::DW_OP_reg31) { + emitOp(OpNum); + continue; + } else if (OpNum >= dwarf::DW_OP_breg0 && OpNum <= dwarf::DW_OP_breg31) { + addBReg(OpNum - dwarf::DW_OP_breg0, Op->getArg(0)); + continue; + } + + switch (OpNum) { case dwarf::DW_OP_LLVM_fragment: { unsigned SizeInBits = Op->getArg(1); unsigned FragmentOffset = Op->getArg(0); @@ -389,10 +432,13 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, case dwarf::DW_OP_lit0: case dwarf::DW_OP_not: case dwarf::DW_OP_dup: - emitOp(Op->getOp()); + emitOp(OpNum); break; case dwarf::DW_OP_deref: assert(!isRegisterLocation()); + // For more detailed explanation see llvm.org/PR43343. + assert(!isParameterValue() && "Parameter entry values should not be " + "dereferenced due to safety reasons."); if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor)) // Turning this into a memory location description makes the deref // implicit. @@ -458,12 +504,21 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, case dwarf::DW_OP_LLVM_tag_offset: TagOffset = Op->getArg(0); break; + case dwarf::DW_OP_regx: + emitOp(dwarf::DW_OP_regx); + emitUnsigned(Op->getArg(0)); + break; + case dwarf::DW_OP_bregx: + emitOp(dwarf::DW_OP_bregx); + emitUnsigned(Op->getArg(0)); + emitSigned(Op->getArg(1)); + break; default: llvm_unreachable("unhandled opcode found in expression"); } } - if (isImplicitLocation()) + if (isImplicitLocation() && !isParameterValue()) // Turn this into an implicit location description. addStackValue(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index ec2ef6e575f7..1ad46669f9b2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H +#include "ByteStreamer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -26,7 +27,6 @@ namespace llvm { class AsmPrinter; class APInt; -class ByteStreamer; class DwarfCompileUnit; class DIELoc; class TargetRegisterInfo; @@ -95,6 +95,13 @@ public: /// Base class containing the logic for constructing DWARF expressions /// independently of whether they are emitted into a DIE or into a .debug_loc /// entry. +/// +/// Some DWARF operations, e.g. DW_OP_entry_value, need to calculate the size +/// of a succeeding DWARF block before the latter is emitted to the output. +/// To handle such cases, data can conditionally be emitted to a temporary +/// buffer, which can later on be committed to the main output. The size of the +/// temporary buffer is queryable, allowing for the size of the data to be +/// emitted before the data is committed. class DwarfExpression { protected: /// Holds information about all subregisters comprising a register location. @@ -104,6 +111,9 @@ protected: const char *Comment; }; + /// Whether we are currently emitting an entry value operation. + bool IsEmittingEntryValue = false; + DwarfCompileUnit &CU; /// The register location, if any. @@ -120,7 +130,7 @@ protected: enum { Unknown = 0, Register, Memory, Implicit }; /// The flags of location description being produced. - enum { EntryValue = 1 }; + enum { EntryValue = 1, CallSiteParamValue }; unsigned LocationKind : 3; unsigned LocationFlags : 2; @@ -147,6 +157,10 @@ public: return LocationFlags & EntryValue; } + bool isParameterValue() { + return LocationFlags & CallSiteParamValue; + } + Optional<uint8_t> TagOffset; protected: @@ -174,6 +188,22 @@ protected: virtual void emitBaseTypeRef(uint64_t Idx) = 0; + /// Start emitting data to the temporary buffer. The data stored in the + /// temporary buffer can be committed to the main output using + /// commitTemporaryBuffer(). + virtual void enableTemporaryBuffer() = 0; + + /// Disable emission to the temporary buffer. This does not commit data + /// in the temporary buffer to the main output. + virtual void disableTemporaryBuffer() = 0; + + /// Return the emitted size, in number of bytes, for the data stored in the + /// temporary buffer. + virtual unsigned getTemporaryBufferSize() = 0; + + /// Commit the data stored in the temporary buffer to the main output. + virtual void commitTemporaryBuffer() = 0; + /// Emit a normalized unsigned constant. void emitConstu(uint64_t Value); @@ -233,6 +263,10 @@ protected: /// expression. See PR21176 for more details. void addStackValue(); + /// Finalize an entry value by emitting its size operand, and committing the + /// DWARF block which has been emitted to the temporary buffer. + void finalizeEntryValue(); + ~DwarfExpression() = default; public: @@ -264,6 +298,11 @@ public: LocationFlags |= EntryValue; } + /// Lock this down to become a call site parameter location. + void setCallSiteParamValueFlag() { + LocationFlags |= CallSiteParamValue; + } + /// Emit a machine register location. As an optimization this may also consume /// the prefix of a DwarfExpression if a more efficient representation for /// combining the register location and the first operation exists. @@ -278,8 +317,11 @@ public: DIExpressionCursor &Expr, unsigned MachineReg, unsigned FragmentOffsetInBits = 0); - /// Emit entry value dwarf operation. - void addEntryValueExpression(DIExpressionCursor &ExprCursor); + /// Begin emission of an entry value dwarf operation. The entry value's + /// first operand is the size of the DWARF block (its second operand), + /// which needs to be calculated at time of emission, so we don't emit + /// any operands here. + void beginEntryValueExpression(DIExpressionCursor &ExprCursor); /// Emit all remaining operations in the DIExpressionCursor. /// @@ -299,31 +341,62 @@ public: /// DwarfExpression implementation for .debug_loc entries. class DebugLocDwarfExpression final : public DwarfExpression { - ByteStreamer &BS; + + struct TempBuffer { + SmallString<32> Bytes; + std::vector<std::string> Comments; + BufferByteStreamer BS; + + TempBuffer(bool GenerateComments) : BS(Bytes, Comments, GenerateComments) {} + }; + + std::unique_ptr<TempBuffer> TmpBuf; + BufferByteStreamer &OutBS; + bool IsBuffering = false; + + /// Return the byte streamer that currently is being emitted to. + ByteStreamer &getActiveStreamer() { return IsBuffering ? TmpBuf->BS : OutBS; } void emitOp(uint8_t Op, const char *Comment = nullptr) override; void emitSigned(int64_t Value) override; void emitUnsigned(uint64_t Value) override; void emitData1(uint8_t Value) override; void emitBaseTypeRef(uint64_t Idx) override; + + void enableTemporaryBuffer() override; + void disableTemporaryBuffer() override; + unsigned getTemporaryBufferSize() override; + void commitTemporaryBuffer() override; + bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; - public: - DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS, DwarfCompileUnit &CU) - : DwarfExpression(DwarfVersion, CU), BS(BS) {} + DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS, + DwarfCompileUnit &CU) + : DwarfExpression(DwarfVersion, CU), OutBS(BS) {} }; /// DwarfExpression implementation for singular DW_AT_location. class DIEDwarfExpression final : public DwarfExpression { -const AsmPrinter &AP; - DIELoc &DIE; + const AsmPrinter &AP; + DIELoc &OutDIE; + DIELoc TmpDIE; + bool IsBuffering = false; + + /// Return the DIE that currently is being emitted to. + DIELoc &getActiveDIE() { return IsBuffering ? TmpDIE : OutDIE; } void emitOp(uint8_t Op, const char *Comment = nullptr) override; void emitSigned(int64_t Value) override; void emitUnsigned(uint64_t Value) override; void emitData1(uint8_t Value) override; void emitBaseTypeRef(uint64_t Idx) override; + + void enableTemporaryBuffer() override; + void disableTemporaryBuffer() override; + unsigned getTemporaryBufferSize() override; + void commitTemporaryBuffer() override; + bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; public: @@ -331,7 +404,7 @@ public: DIELoc *finalize() { DwarfExpression::finalize(); - return &DIE; + return &OutDIE; } }; diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 244678ce9dc1..35fa51fb24c4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -32,15 +32,9 @@ class LexicalScope; class MCSection; // Data structure to hold a range for range lists. -class RangeSpan { -public: - RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {} - const MCSymbol *getStart() const { return Start; } - const MCSymbol *getEnd() const { return End; } - void setEnd(const MCSymbol *E) { End = E; } - -private: - const MCSymbol *Start, *End; +struct RangeSpan { + const MCSymbol *Begin; + const MCSymbol *End; }; class RangeSpanList { @@ -86,10 +80,6 @@ class DwarfFile { /// The table is shared by all units. MCSymbol *RnglistsTableBaseSym = nullptr; - /// DWARF v5: The symbol that designates the base of the locations list table. - /// The table is shared by all units. - MCSymbol *LoclistsTableBaseSym = nullptr; - /// The variables of a lexical scope. struct ScopeVars { /// We need to sort Args by ArgNo and check for duplicates. This could also @@ -167,9 +157,6 @@ public: MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; } void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; } - MCSymbol *getLoclistsTableBaseSym() const { return LoclistsTableBaseSym; } - void setLoclistsTableBaseSym(MCSymbol *Sym) { LoclistsTableBaseSym = Sym; } - /// \returns false if the variable was merged with a previous one. bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 991ab94b50ab..37c68c085792 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -47,31 +47,42 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, - DwarfCompileUnit &CU, - DIELoc &DIE) - : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), - DIE(DIE) {} + DwarfCompileUnit &CU, DIELoc &DIE) + : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), OutDIE(DIE) {} void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) { - CU.addUInt(DIE, dwarf::DW_FORM_data1, Op); + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Op); } void DIEDwarfExpression::emitSigned(int64_t Value) { - CU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); + CU.addSInt(getActiveDIE(), dwarf::DW_FORM_sdata, Value); } void DIEDwarfExpression::emitUnsigned(uint64_t Value) { - CU.addUInt(DIE, dwarf::DW_FORM_udata, Value); + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_udata, Value); } void DIEDwarfExpression::emitData1(uint8_t Value) { - CU.addUInt(DIE, dwarf::DW_FORM_data1, Value); + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Value); } void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) { - CU.addBaseTypeRef(DIE, Idx); + CU.addBaseTypeRef(getActiveDIE(), Idx); } +void DIEDwarfExpression::enableTemporaryBuffer() { + assert(!IsBuffering && "Already buffering?"); + IsBuffering = true; +} + +void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; } + +unsigned DIEDwarfExpression::getTemporaryBufferSize() { + return TmpDIE.ComputeSize(&AP); +} + +void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); } + bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) { return MachineReg == TRI.getFrameRegister(*AP.MF); @@ -205,6 +216,10 @@ void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) { MDNodeToDieMap.insert(std::make_pair(Desc, D)); } +void DwarfUnit::insertDIE(DIE *D) { + MDNodeToDieMap.insert(std::make_pair(nullptr, D)); +} + void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { if (DD->getDwarfVersion() >= 4) Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present, @@ -718,7 +733,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const { return ""; // FIXME: Decide whether to implement this for non-C++ languages. - if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage())) return ""; std::string CS; @@ -942,6 +957,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (CTy->isAppleBlockExtension()) addFlag(Buffer, dwarf::DW_AT_APPLE_block); + if (CTy->getExportSymbols()) + addFlag(Buffer, dwarf::DW_AT_export_symbols); + // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type // inside C++ composite types to point to the base class with the vtable. // Rust uses DW_AT_containing_type to link a vtable to the type @@ -1696,15 +1714,6 @@ void DwarfUnit::addRnglistsBase() { TLOF.getDwarfRnglistsSection()->getBeginSymbol()); } -void DwarfUnit::addLoclistsBase() { - assert(DD->getDwarfVersion() >= 5 && - "DW_AT_loclists_base requires DWARF version 5 or later"); - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - addSectionLabel(getUnitDie(), dwarf::DW_AT_loclists_base, - DU->getLoclistsTableBaseSym(), - TLOF.getDwarfLoclistsSection()->getBeginSymbol()); -} - void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { addFlag(D, dwarf::DW_AT_declaration); StringRef Name = CTy->getName(); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 56c934a35ae8..46c52a1faf4b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -127,6 +127,8 @@ public: /// the mappings are kept in DwarfDebug. void insertDIE(const DINode *Desc, DIE *D); + void insertDIE(DIE *D); + /// Add a flag that is true to the DIE. void addFlag(DIE &Die, dwarf::Attribute Attribute); @@ -214,15 +216,6 @@ public: /// Add thrown types. void addThrownTypes(DIE &Die, DINodeArray ThrownTypes); - // FIXME: Should be reformulated in terms of addComplexAddress. - /// Start with the address based on the location provided, and generate the - /// DWARF information necessary to find the actual Block variable (navigating - /// the Block struct) based on the starting location. Add the DWARF - /// information to the die. Obsolete, please use addComplexAddress instead. - void addBlockByrefAddress(const DbgVariable &DV, DIE &Die, - dwarf::Attribute Attribute, - const MachineLocation &Location); - /// Add a new type attribute to the specified entity. /// /// This takes and attribute parameter because DW_AT_friend attributes are @@ -279,9 +272,6 @@ public: /// Add the DW_AT_rnglists_base attribute to the unit DIE. void addRnglistsBase(); - /// Add the DW_AT_loclists_base attribute to the unit DIE. - void addLoclistsBase(); - virtual DwarfCompileUnit &getCU() = 0; void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 99e3687b36b8..31dfaaac836e 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -426,7 +426,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { // EHABI). In this case LSDASection will be NULL. if (LSDASection) Asm->OutStreamer->SwitchSection(LSDASection); - Asm->EmitAlignment(2); + Asm->EmitAlignment(Align(4)); // Emit the LSDA. MCSymbol *GCCETSym = @@ -602,11 +602,11 @@ MCSymbol *EHStreamer::emitExceptionTable() { } if (HaveTTData) { - Asm->EmitAlignment(2); + Asm->EmitAlignment(Align(4)); emitTypeInfos(TTypeEncoding, TTBaseLabel); } - Asm->EmitAlignment(2); + Asm->EmitAlignment(Align(4)); return GCCETSym; } diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 39392b79e960..3849644d1584 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -72,7 +72,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, **/ // Align to address width. - AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); // Emit PointCount. OS.AddComment("safe point count"); diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 3145cc90dc73..b4eda5fa8c58 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, report_fatal_error(" Too much descriptor for ocaml GC"); } AP.emitInt16(NumDescriptors); - AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), IE = Info.funcinfo_end(); @@ -180,7 +180,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.emitInt16(K->StackOffset); } - AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); } } } diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 155e91ce61a1..0398675577cd 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -982,8 +982,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { OS.EmitValueToAlignment(4); OS.EmitLabel(LSDALabel); - const Function *Per = - dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + const auto *Per = cast<Function>(F.getPersonalityFn()->stripPointerCasts()); StringRef PerName = Per->getName(); int BaseState = -1; if (PerName == "_except_handler4") { diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index dc7eaf6a5fe7..27b298dcf6af 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -382,7 +382,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); - NewLI->setAlignment(LI->getAlignment()); + NewLI->setAlignment(MaybeAlign(LI->getAlignment())); NewLI->setVolatile(LI->isVolatile()); NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); @@ -469,7 +469,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); - NewSI->setAlignment(SI->getAlignment()); + NewSI->setAlignment(MaybeAlign(SI->getAlignment())); NewSI->setVolatile(SI->isVolatile()); NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); @@ -1376,7 +1376,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( Builder.SetInsertPoint(BB); LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); // Atomics require at least natural alignment. - InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8); + InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8)); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -1711,7 +1711,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'expected' argument, if present. if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); - AllocaCASExpected->setAlignment(AllocaAlignment); + AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment)); unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); AllocaCASExpected_i8 = @@ -1730,7 +1730,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( Args.push_back(IntValue); } else { AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); - AllocaValue->setAlignment(AllocaAlignment); + AllocaValue->setAlignment(MaybeAlign(AllocaAlignment)); AllocaValue_i8 = Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); @@ -1742,7 +1742,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'ret' argument. if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); - AllocaResult->setAlignment(AllocaAlignment); + AllocaResult->setAlignment(MaybeAlign(AllocaAlignment)); unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); AllocaResult_i8 = Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index fb54b5d6c8d8..455916eeb82f 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -129,9 +129,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { getAnalysis<MachineBlockFrequencyInfo>()); BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, getAnalysis<MachineBranchProbabilityInfo>()); - return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), - MF.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable<MachineModuleInfo>()); + auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); + return Folder.OptimizeFunction( + MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(), + MMIWP ? &MMIWP->getMMI() : nullptr); } BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, @@ -161,6 +162,11 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { // Avoid matching if this pointer gets reused. TriedMerging.erase(MBB); + // Update call site info. + std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) { + if (MI.isCall(MachineInstr::IgnoreBundle)) + MF->eraseCallSiteInfo(&MI); + }); // Remove the block. MF->erase(MBB); EHScopeMembership.erase(MBB); @@ -1306,6 +1312,8 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { /// result in infinite loops. static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2) { + assert(MBB1 && MBB2 && "Unknown MachineBasicBlock"); + // Right now, we use a simple heuristic. If MBB2 ends with a call, and // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block @@ -1843,7 +1851,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, template <class Container> static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, Container &Set) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Set.insert(*AI); } else { @@ -1871,7 +1879,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (const MachineOperand &MO : Loc->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { @@ -1909,7 +1917,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, return Loc; if (!MO.isReg() || MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (Uses.count(Reg)) { @@ -1937,14 +1945,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (const MachineOperand &MO : PI->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { addRegAndItsAliases(Reg, TRI, Uses); } else { if (Uses.erase(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Uses.erase(*SubRegs); // Use sub-registers to be conservative } @@ -2010,7 +2018,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { @@ -2060,13 +2068,13 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (!AllDefsSet.count(Reg)) { continue; } - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) ActiveDefsSet.erase(*AI); } else { @@ -2078,8 +2086,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Reg || Register::isVirtualRegister(Reg)) continue; addRegAndItsAliases(Reg, TRI, ActiveDefsSet); addRegAndItsAliases(Reg, TRI, AllDefsSet); diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index 3ad6266d4f35..6efdc9efa968 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -64,19 +64,18 @@ class BranchRelaxation : public MachineFunctionPass { /// Compute the offset immediately following this block. \p MBB is the next /// block. unsigned postOffset(const MachineBasicBlock &MBB) const { - unsigned PO = Offset + Size; - unsigned Align = MBB.getAlignment(); - if (Align == 0) + const unsigned PO = Offset + Size; + const Align Alignment = MBB.getAlignment(); + if (Alignment == 1) return PO; - unsigned AlignAmt = 1 << Align; - unsigned ParentAlign = MBB.getParent()->getAlignment(); - if (Align <= ParentAlign) - return PO + OffsetToAlignment(PO, AlignAmt); + const Align ParentAlign = MBB.getParent()->getAlignment(); + if (Alignment <= ParentAlign) + return PO + offsetToAlignment(PO, Alignment); // The alignment of this MBB is larger than the function's alignment, so we // can't tell whether or not it will insert nops. Assume that it will. - return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt); + return PO + Alignment.value() + offsetToAlignment(PO, Alignment); } }; @@ -128,9 +127,8 @@ void BranchRelaxation::verify() { #ifndef NDEBUG unsigned PrevNum = MF->begin()->getNumber(); for (MachineBasicBlock &MBB : *MF) { - unsigned Align = MBB.getAlignment(); - unsigned Num = MBB.getNumber(); - assert(BlockInfo[Num].Offset % (1u << Align) == 0); + const unsigned Num = MBB.getNumber(); + assert(isAligned(MBB.getAlignment(), BlockInfo[Num].Offset)); assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset); assert(BlockInfo[Num].Size == computeBlockSize(MBB)); PrevNum = Num; @@ -143,7 +141,7 @@ void BranchRelaxation::verify() { LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() { for (auto &MBB : *MF) { const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; - dbgs() << format("%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) + dbgs() << format("%%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) << format("size=%#x\n", BBI.Size); } } diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp index cc4b2caa9bed..709164e5f178 100644 --- a/lib/CodeGen/BreakFalseDeps.cpp +++ b/lib/CodeGen/BreakFalseDeps.cpp @@ -9,12 +9,11 @@ /// \file Break False Dependency pass. /// /// Some instructions have false dependencies which cause unnecessary stalls. -/// For exmaple, instructions that only write part of a register, and implicitly -/// need to read the other parts of the register. This may cause unwanted +/// For example, instructions may write part of a register and implicitly +/// need to read the other parts of the register. This may cause unwanted /// stalls preventing otherwise unrelated instructions from executing in /// parallel in an out-of-order CPU. -/// This pass is aimed at identifying and avoiding these depepndencies when -/// possible. +/// This pass is aimed at identifying and avoiding these dependencies. // //===----------------------------------------------------------------------===// @@ -24,6 +23,7 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -109,7 +109,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, MachineOperand &MO = MI->getOperand(OpIdx); assert(MO.isUndef() && "Expected undef machine operand"); - unsigned OriginalReg = MO.getReg(); + Register OriginalReg = MO.getReg(); // Update only undef operands that have reg units that are mapped to one root. for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { @@ -162,7 +162,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { - unsigned reg = MI->getOperand(OpIdx).getReg(); + Register reg = MI->getOperand(OpIdx).getReg(); unsigned Clearance = RDA->getClearance(MI, reg); LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); @@ -178,6 +178,7 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) { assert(!MI->isDebugInstr() && "Won't process debug values"); // Break dependence on undef uses. Do this before updating LiveRegs below. + // This can remove a false dependence with no additional instructions. unsigned OpNum; unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); if (Pref) { @@ -189,6 +190,11 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) { UndefReads.push_back(std::make_pair(MI, OpNum)); } + // The code below allows the target to create a new instruction to break the + // dependence. That opposes the goal of minimizing size, so bail out now. + if (MF->getFunction().hasMinSize()) + return; + const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); @@ -209,6 +215,11 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { if (UndefReads.empty()) return; + // The code below allows the target to create a new instruction to break the + // dependence. That opposes the goal of minimizing size, so bail out now. + if (MF->getFunction().hasMinSize()) + return; + // Collect this block's live out register units. LiveRegSet.init(*TRI); // We do not need to care about pristine registers as they are just preserved diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 7164fdfb7886..bf97aaee3665 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -40,7 +40,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineRegisterInfo &MRI = MF.getRegInfo(); VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) continue; VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); @@ -48,10 +48,11 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, } // Return the preferred allocation register for reg, given a COPY instruction. -static unsigned copyHint(const MachineInstr *mi, unsigned reg, +static Register copyHint(const MachineInstr *mi, unsigned reg, const TargetRegisterInfo &tri, const MachineRegisterInfo &mri) { - unsigned sub, hreg, hsub; + unsigned sub, hsub; + Register hreg; if (mi->getOperand(0).getReg() == reg) { sub = mi->getOperand(0).getSubReg(); hreg = mi->getOperand(1).getReg(); @@ -65,11 +66,11 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, if (!hreg) return 0; - if (TargetRegisterInfo::isVirtualRegister(hreg)) - return sub == hsub ? hreg : 0; + if (Register::isVirtualRegister(hreg)) + return sub == hsub ? hreg : Register(); const TargetRegisterClass *rc = mri.getRegClass(reg); - unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); + Register CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); if (rc->contains(CopiedPReg)) return CopiedPReg; @@ -112,7 +113,7 @@ static bool isRematerializable(const LiveInterval &LI, // If the original (pre-splitting) registers match this // copy came from a split. - if (!TargetRegisterInfo::isVirtualRegister(Reg) || + if (!Register::isVirtualRegister(Reg) || VRM->getOriginal(Reg) != Original) return false; @@ -243,7 +244,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // Get allocation hints from copies. if (!mi->isCopy()) continue; - unsigned hint = copyHint(mi, li.reg, tri, mri); + Register hint = copyHint(mi, li.reg, tri, mri); if (!hint) continue; // Force hweight onto the stack so that x86 doesn't add hidden precision, @@ -251,8 +252,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; - if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint)) - CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint))); + if (Register::isVirtualRegister(hint) || mri.isAllocatable(hint)) + CopyHints.insert( + CopyHint(hint, hweight, Register::isPhysicalRegister(hint))); } Hint.clear(); diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 497fcb147849..a397039180a4 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -32,7 +32,6 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) { // No stack is used. StackOffset = 0; - MaxStackArgAlign = 1; clearByValRegsInfo(); UsedRegs.resize((TRI.getNumRegs()+31)/32); @@ -41,20 +40,21 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, /// Allocate space on the stack large enough to pass an argument by value. /// The size and alignment information of the argument is encoded in /// its parameter attribute. -void CCState::HandleByVal(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, - ISD::ArgFlagsTy ArgFlags) { - unsigned Align = ArgFlags.getByValAlign(); +void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, int MinSize, + int MinAlignment, ISD::ArgFlagsTy ArgFlags) { + Align MinAlign(MinAlignment); + Align Alignment(ArgFlags.getByValAlign()); unsigned Size = ArgFlags.getByValSize(); if (MinSize > (int)Size) Size = MinSize; - if (MinAlign > (int)Align) - Align = MinAlign; - ensureMaxAlignment(Align); - MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align); + if (MinAlign > Alignment) + Alignment = MinAlign; + ensureMaxAlignment(Alignment); + MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, + Alignment.value()); Size = unsigned(alignTo(Size, MinAlign)); - unsigned Offset = AllocateStack(Size, Align); + unsigned Offset = AllocateStack(Size, Alignment.value()); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } @@ -90,13 +90,8 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Formal argument #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString() << '\n'; -#endif - llvm_unreachable(nullptr); - } + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) + report_fatal_error("unable to allocate function argument #" + Twine(i)); } } @@ -122,13 +117,8 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Return operand #" << i << " has unhandled type " - << EVT(VT).getEVTString() << '\n'; -#endif - llvm_unreachable(nullptr); - } + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + report_fatal_error("unable to allocate function return #" + Twine(i)); } } @@ -209,7 +199,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, MVT VT, CCAssignFn Fn) { unsigned SavedStackOffset = StackOffset; - unsigned SavedMaxStackArgAlign = MaxStackArgAlign; + Align SavedMaxStackArgAlign = MaxStackArgAlign; unsigned NumLocs = Locs.size(); // Set the 'inreg' flag if it is used for this calling convention. diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index c37ed57781d4..ad9525f927e8 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -28,6 +28,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); + initializeEarlyIfPredicatorPass(Registry); initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicatePass(Registry); initializeExpandMemCmpPassPass(Registry); @@ -53,6 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLocalStackSlotPassPass(Registry); initializeLowerIntrinsicsPass(Registry); initializeMIRCanonicalizerPass(Registry); + initializeMIRNamerPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); @@ -63,10 +65,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineFunctionPrinterPassPass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); - initializeMachineModuleInfoPass(Registry); + initializeMachineModuleInfoWrapperPassPass(Registry); initializeMachineOptimizationRemarkEmitterPassPass(Registry); initializeMachineOutlinerPass(Registry); initializeMachinePipelinerPass(Registry); + initializeModuloScheduleTestPass(Registry); initializeMachinePostDominatorTreePass(Registry); initializeMachineRegionInfoPassPass(Registry); initializeMachineSchedulerPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 52b4bbea012b..fa4432ea23ec 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -344,7 +344,7 @@ class TypePromotionTransaction; // Get the DominatorTree, building if necessary. DominatorTree &getDT(Function &F) { if (!DT) - DT = llvm::make_unique<DominatorTree>(F); + DT = std::make_unique<DominatorTree>(F); return *DT; } @@ -424,7 +424,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLI = SubtargetInfo->getTargetLowering(); TRI = SubtargetInfo->getRegisterInfo(); } - TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); @@ -1524,7 +1524,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL) { BasicBlock *UserBB = User->getParent(); DenseMap<BasicBlock *, CastInst *> InsertedTruncs; - TruncInst *TruncI = dyn_cast<TruncInst>(User); + auto *TruncI = cast<TruncInst>(User); bool MadeChange = false; for (Value::user_iterator TruncUI = TruncI->user_begin(), @@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, TheUse = InsertedShift; } - // If we removed all uses, nuke the shift. + // If we removed all uses, or there are none, nuke the shift. if (ShiftI->use_empty()) { salvageDebugInfo(*ShiftI); ShiftI->eraseFromParent(); + MadeChange = true; } return MadeChange; @@ -1811,7 +1812,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { AllocaInst *AI; if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign && DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) - AI->setAlignment(PrefAlign); + AI->setAlignment(MaybeAlign(PrefAlign)); // Global variables can only be aligned if they are defined in this // object (i.e. they are uniquely initialized in this object), and // over-aligning global variables that have an explicit section is @@ -1821,7 +1822,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { GV->getPointerAlignment(*DL) < PrefAlign && DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) - GV->setAlignment(PrefAlign); + GV->setAlignment(MaybeAlign(PrefAlign)); } // If this is a memcpy (or similar) then we may be able to improve the // alignment @@ -1867,24 +1868,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { }); return true; } - case Intrinsic::objectsize: { - // Lower all uses of llvm.objectsize.* - Value *RetVal = - lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); - - resetIteratorIfInvalidatedWhileCalling(BB, [&]() { - replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); - }); - return true; - } - case Intrinsic::is_constant: { - // If is_constant hasn't folded away yet, lower it to false now. - Constant *RetVal = ConstantInt::get(II->getType(), 0); - resetIteratorIfInvalidatedWhileCalling(BB, [&]() { - replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); - }); - return true; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); + case Intrinsic::is_constant: + llvm_unreachable("llvm.is.constant.* should have been lowered already"); case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); @@ -2024,17 +2011,18 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. const Function *F = BB->getParent(); - SmallVector<CallInst*, 4> TailCalls; + SmallVector<BasicBlock*, 4> TailCallBBs; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { // Look through bitcasts. Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts(); CallInst *CI = dyn_cast<CallInst>(IncomingVal); + BasicBlock *PredBB = PN->getIncomingBlock(I); // Make sure the phi value is indeed produced by the tail call. - if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + if (CI && CI->hasOneUse() && CI->getParent() == PredBB && TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCalls.push_back(CI); + TailCallBBs.push_back(PredBB); } } else { SmallPtrSet<BasicBlock*, 4> VisitedBBs; @@ -2052,24 +2040,20 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT CallInst *CI = dyn_cast<CallInst>(&*RI); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCalls.push_back(CI); + TailCallBBs.push_back(*PI); } } bool Changed = false; - for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { - CallInst *CI = TailCalls[i]; - CallSite CS(CI); - + for (auto const &TailCallBB : TailCallBBs) { // Make sure the call instruction is followed by an unconditional branch to // the return block. - BasicBlock *CallBB = CI->getParent(); - BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); + BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator()); if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) continue; - // Duplicate the return into CallBB. - (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB); + // Duplicate the return into TailCallBB. + (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB); ModifiedDT = Changed = true; ++NumRetsDup; } @@ -2683,26 +2667,26 @@ private: void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, Value *NewVal) { - Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>( + Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>( Inst, Idx, NewVal)); } void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { Actions.push_back( - llvm::make_unique<TypePromotionTransaction::InstructionRemover>( + std::make_unique<TypePromotionTransaction::InstructionRemover>( Inst, RemovedInsts, NewVal)); } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, Value *New) { Actions.push_back( - llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); + std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); } void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { Actions.push_back( - llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); + std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); } Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, @@ -2732,7 +2716,7 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst, void TypePromotionTransaction::moveBefore(Instruction *Inst, Instruction *Before) { Actions.push_back( - llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>( + std::make_unique<TypePromotionTransaction::InstructionMoveBefore>( Inst, Before)); } @@ -3048,7 +3032,7 @@ public: To = dyn_cast<PHINode>(OldReplacement); OldReplacement = Get(From); } - assert(Get(To) == To && "Replacement PHI node is already replaced."); + assert(To && Get(To) == To && "Replacement PHI node is already replaced."); Put(From, To); From->replaceAllUsesWith(To); AllPhiNodes.erase(From); @@ -3334,7 +3318,7 @@ private: // So the values are different and does not match. So we need them to // match. (But we register no more than one match per PHI node, so that // we won't later try to replace them twice.) - if (!MatchedPHIs.insert(FirstPhi).second) + if (MatchedPHIs.insert(FirstPhi).second) Matcher.insert({ FirstPhi, SecondPhi }); // But me must check it. WorkList.push_back({ FirstPhi, SecondPhi }); @@ -3412,11 +3396,10 @@ private: Select->setFalseValue(ST.Get(Map[FalseValue])); } else { // Must be a Phi node then. - PHINode *PHI = cast<PHINode>(V); - auto *CurrentPhi = dyn_cast<PHINode>(Current); + auto *PHI = cast<PHINode>(V); // Fill the Phi node with values from predecessors. for (auto B : predecessors(PHI->getParent())) { - Value *PV = CurrentPhi->getIncomingValueForBlock(B); + Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B); assert(Map.find(PV) != Map.end() && "No predecessor Value!"); PHI->addIncoming(ST.Get(Map[PV]), B); } @@ -3785,13 +3768,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // poisoned value regular value // It should be OK since undef covers valid value. if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { - const Instruction *ExtInst = - dyn_cast<const Instruction>(*Inst->user_begin()); + const auto *ExtInst = cast<const Instruction>(*Inst->user_begin()); if (ExtInst->hasOneUse()) { - const Instruction *AndInst = - dyn_cast<const Instruction>(*ExtInst->user_begin()); + const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin()); if (AndInst && AndInst->getOpcode() == Instruction::And) { - const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1)); + const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1)); if (Cst && Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) return true; @@ -4793,8 +4774,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, << " for " << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); - } else if (AddrSinkUsingGEPs || - (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) { + } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && + TM && SubtargetInfo->addrSinkUsingGEPs())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode @@ -5816,7 +5797,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { return false; IRBuilder<> Builder(Load->getNextNode()); - auto *NewAnd = dyn_cast<Instruction>( + auto *NewAnd = cast<Instruction>( Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); // Mark this instruction as "inserted by CGP", so that other // optimizations don't touch it. @@ -6193,35 +6174,49 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { // OpsToSink can contain multiple uses in a use chain (e.g. // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating - // uses must come first, which means they are sunk first, temporarily creating - // invalid IR. This will be fixed once their dominated users are sunk and - // updated. + // uses must come first, so we process the ops in reverse order so as to not + // create invalid IR. BasicBlock *TargetBB = I->getParent(); bool Changed = false; SmallVector<Use *, 4> ToReplace; - for (Use *U : OpsToSink) { + for (Use *U : reverse(OpsToSink)) { auto *UI = cast<Instruction>(U->get()); if (UI->getParent() == TargetBB || isa<PHINode>(UI)) continue; ToReplace.push_back(U); } - SmallPtrSet<Instruction *, 4> MaybeDead; + SetVector<Instruction *> MaybeDead; + DenseMap<Instruction *, Instruction *> NewInstructions; + Instruction *InsertPoint = I; for (Use *U : ToReplace) { auto *UI = cast<Instruction>(U->get()); Instruction *NI = UI->clone(); + NewInstructions[UI] = NI; MaybeDead.insert(UI); LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n"); - NI->insertBefore(I); + NI->insertBefore(InsertPoint); + InsertPoint = NI; InsertedInsts.insert(NI); - U->set(NI); + + // Update the use for the new instruction, making sure that we update the + // sunk instruction uses, if it is part of a chain that has already been + // sunk. + Instruction *OldI = cast<Instruction>(U->getUser()); + if (NewInstructions.count(OldI)) + NewInstructions[OldI]->setOperand(U->getOperandNo(), NI); + else + U->set(NI); Changed = true; } // Remove instructions that are dead after sinking. - for (auto *I : MaybeDead) - if (!I->hasNUsesOrMore(1)) + for (auto *I : MaybeDead) { + if (!I->hasNUsesOrMore(1)) { + LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n"); I->eraseFromParent(); + } + } return Changed; } @@ -7106,7 +7101,6 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { for (auto &I : reverse(BB)) { if (makeBitReverse(I, *DL, *TLI)) { MadeBitReverse = MadeChange = true; - ModifiedDT = true; break; } } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 4144c243a341..702e7e244bce 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -187,7 +187,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; const TargetRegisterClass *NewRC = nullptr; @@ -272,7 +272,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isDef()) continue; @@ -303,7 +303,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isUse()) continue; @@ -457,6 +457,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } + assert(Max && "Failed to find bottom of the critical path"); #ifndef NDEBUG { @@ -612,7 +613,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { AntiDepReg = 0; diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index b99be5d7a87c..a169c3cb16b2 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,6 +23,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -71,39 +73,13 @@ static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) { // -------------------------------------------------------------------- -DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, - const DFAStateInput (*SIT)[2], - const unsigned *SET): - InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) { - // Make sure DFA types are large enough for the number of terms & resources. - static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= - (8 * sizeof(DFAInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); - static_assert( - (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); -} - -// Read the DFA transition table and update CachedTable. -// -// Format of the transition tables: -// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid -// transitions -// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable -// for the ith state -// -void DFAPacketizer::ReadTable(unsigned int state) { - unsigned ThisState = DFAStateEntryTable[state]; - unsigned NextStateInTable = DFAStateEntryTable[state+1]; - // Early exit in case CachedTable has already contains this - // state's transitions. - if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0]))) - return; - - for (unsigned i = ThisState; i < NextStateInTable; i++) - CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = - DFAStateInputTable[i][1]; -} +// Make sure DFA types are large enough for the number of terms & resources. +static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= + (8 * sizeof(DFAInput)), + "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); +static_assert( + (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), + "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); // Return the DFAInput for an instruction class. DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { @@ -129,9 +105,7 @@ DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) { bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); - UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); - ReadTable(CurrentState); - return CachedTable.count(StateTrans) != 0; + return A.canAdd(InsnInput); } // Reserve the resources occupied by a MCInstrDesc and change the current @@ -139,10 +113,7 @@ bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); - UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); - ReadTable(CurrentState); - assert(CachedTable.count(StateTrans) != 0); - CurrentState = CachedTable[StateTrans]; + A.add(InsnInput); } // Check if the resources occupied by a machine instruction are available @@ -159,19 +130,33 @@ void DFAPacketizer::reserveResources(MachineInstr &MI) { reserveResources(&MID); } +unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) { + ArrayRef<NfaPath> NfaPaths = A.getNfaPaths(); + assert(!NfaPaths.empty() && "Invalid bundle!"); + const NfaPath &RS = NfaPaths.front(); + + // RS stores the cumulative resources used up to and including the I'th + // instruction. The 0th instruction is the base case. + if (InstIdx == 0) + return RS[0]; + // Return the difference between the cumulative resources used by InstIdx and + // its predecessor. + return RS[InstIdx] ^ RS[InstIdx - 1]; +} + namespace llvm { // This class extends ScheduleDAGInstrs and overrides the schedule method // to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { private: - AliasAnalysis *AA; + AAResults *AA; /// Ordered list of DAG postprocessing steps. std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA); + AAResults *AA); // Actual scheduling work. void schedule() override; @@ -189,7 +174,7 @@ protected: DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA) + AAResults *AA) : ScheduleDAGInstrs(MF, &MLI), AA(AA) { CanHandleTerminators = true; } @@ -207,9 +192,10 @@ void DefaultVLIWScheduler::schedule() { } VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, - MachineLoopInfo &mli, AliasAnalysis *aa) + MachineLoopInfo &mli, AAResults *aa) : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); + ResourceTracker->setTrackResources(true); VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); } @@ -224,8 +210,11 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, LLVM_DEBUG({ if (!CurrentPacketMIs.empty()) { dbgs() << "Finalizing packet:\n"; - for (MachineInstr *MI : CurrentPacketMIs) - dbgs() << " * " << *MI; + unsigned Idx = 0; + for (MachineInstr *MI : CurrentPacketMIs) { + unsigned R = ResourceTracker->getUsedResources(Idx++); + dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI; + } } }); if (CurrentPacketMIs.size() > 1) { diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 049ce7063307..9a537c859a67 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -75,8 +75,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { // Don't delete live physreg defs, or any reserved register defs. if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) return false; @@ -140,8 +140,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. @@ -159,8 +159,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) LivePhysRegs.set(*AI); } diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp index fe78acf4d80a..6d5306c1dc0c 100644 --- a/lib/CodeGen/DetectDeadLanes.cpp +++ b/lib/CodeGen/DetectDeadLanes.cpp @@ -154,7 +154,7 @@ static bool isCrossCopy(const MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC, const MachineOperand &MO) { assert(lowersToCopies(MI)); - unsigned SrcReg = MO.getReg(); + Register SrcReg = MO.getReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); if (DstRC == SrcRC) return false; @@ -194,8 +194,8 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes) { if (!MO.readsReg()) return; - unsigned MOReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(MOReg)) + Register MOReg = MO.getReg(); + if (!Register::isVirtualRegister(MOReg)) return; unsigned MOSubReg = MO.getSubReg(); @@ -203,7 +203,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes); UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg); - unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg); + unsigned MORegIdx = Register::virtReg2Index(MOReg); VRegInfo &MORegInfo = VRegInfos[MORegIdx]; LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes; // Any change at all? @@ -219,7 +219,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes) { for (const MachineOperand &MO : MI.uses()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO); addUsedLanesOnOperand(MO, UsedOnMO); @@ -230,8 +230,8 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes, const MachineOperand &MO) const { unsigned OpNum = MI.getOperandNo(&MO); - assert(lowersToCopies(MI) && DefinedByCopy[ - TargetRegisterInfo::virtReg2Index(MI.getOperand(0).getReg())]); + assert(lowersToCopies(MI) && + DefinedByCopy[Register::virtReg2Index(MI.getOperand(0).getReg())]); switch (MI.getOpcode()) { case TargetOpcode::COPY: @@ -250,7 +250,7 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI, return MO2UsedLanes; const MachineOperand &Def = MI.getOperand(0); - unsigned DefReg = Def.getReg(); + Register DefReg = Def.getReg(); const TargetRegisterClass *RC = MRI->getRegClass(DefReg); LaneBitmask MO1UsedLanes; if (RC->CoveredBySubRegs) @@ -285,10 +285,10 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use, if (MI.getOpcode() == TargetOpcode::PATCHPOINT) return; const MachineOperand &Def = *MI.defs().begin(); - unsigned DefReg = Def.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + Register DefReg = Def.getReg(); + if (!Register::isVirtualRegister(DefReg)) return; - unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + unsigned DefRegIdx = Register::virtReg2Index(DefReg); if (!DefinedByCopy.test(DefRegIdx)) return; @@ -360,7 +360,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { if (lowersToCopies(DefMI)) { // Start optimisatically with no used or defined lanes for copy // instructions. The following dataflow analysis will add more bits. - unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + unsigned RegIdx = Register::virtReg2Index(Reg); DefinedByCopy.set(RegIdx); PutInWorklist(RegIdx); @@ -377,17 +377,17 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { for (const MachineOperand &MO : DefMI.uses()) { if (!MO.isReg() || !MO.readsReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; LaneBitmask MODefinedLanes; - if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (Register::isPhysicalRegister(MOReg)) { MODefinedLanes = LaneBitmask::getAll(); } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) { MODefinedLanes = LaneBitmask::getAll(); } else { - assert(TargetRegisterInfo::isVirtualRegister(MOReg)); + assert(Register::isVirtualRegister(MOReg)); if (MRI->hasOneDef(MOReg)) { const MachineOperand &MODef = *MRI->def_begin(MOReg); const MachineInstr &MODefMI = *MODef.getParent(); @@ -428,10 +428,10 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { if (lowersToCopies(UseMI)) { assert(UseMI.getDesc().getNumDefs() == 1); const MachineOperand &Def = *UseMI.defs().begin(); - unsigned DefReg = Def.getReg(); + Register DefReg = Def.getReg(); // The used lanes of COPY-like instruction operands are determined by the // following dataflow analysis. - if (TargetRegisterInfo::isVirtualRegister(DefReg)) { + if (Register::isVirtualRegister(DefReg)) { // But ignore copies across incompatible register classes. bool CrossCopy = false; if (lowersToCopies(UseMI)) { @@ -470,10 +470,10 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, if (!lowersToCopies(MI)) return false; const MachineOperand &Def = MI.getOperand(0); - unsigned DefReg = Def.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + Register DefReg = Def.getReg(); + if (!Register::isVirtualRegister(DefReg)) return false; - unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + unsigned DefRegIdx = Register::virtReg2Index(DefReg); if (!DefinedByCopy.test(DefRegIdx)) return false; @@ -482,8 +482,8 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, if (UsedLanes.any()) return false; - unsigned MOReg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(MOReg)) { + Register MOReg = MO.getReg(); + if (Register::isVirtualRegister(MOReg)) { const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO); } @@ -494,7 +494,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { // First pass: Populate defs/uses of vregs with initial values unsigned NumVirtRegs = MRI->getNumVirtRegs(); for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + unsigned Reg = Register::index2VirtReg(RegIdx); // Determine used/defined lanes and add copy instructions to worklist. VRegInfo &Info = VRegInfos[RegIdx]; @@ -508,7 +508,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { Worklist.pop_front(); WorklistMembers.reset(RegIdx); VRegInfo &Info = VRegInfos[RegIdx]; - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + unsigned Reg = Register::index2VirtReg(RegIdx); // Transfer UsedLanes to operands of DefMI (backwards dataflow). MachineOperand &Def = *MRI->def_begin(Reg); @@ -522,7 +522,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + unsigned Reg = Register::index2VirtReg(RegIdx); const VRegInfo &Info = VRegInfos[RegIdx]; dbgs() << printReg(Reg, nullptr) << " Used: " << PrintLaneMask(Info.UsedLanes) @@ -536,10 +536,10 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { for (MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; - unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + unsigned RegIdx = Register::virtReg2Index(Reg); const VRegInfo &RegInfo = VRegInfos[RegIdx]; if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) { LLVM_DEBUG(dbgs() diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 0a83760befaa..e5694218b5c3 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" @@ -140,6 +141,18 @@ private: /// speculated. bool canSpeculateInstrs(MachineBasicBlock *MBB); + /// Return true if all non-terminator instructions in MBB can be safely + /// predicated. + bool canPredicateInstrs(MachineBasicBlock *MBB); + + /// Scan through instruction dependencies and update InsertAfter array. + /// Return false if any dependency is incompatible with if conversion. + bool InstrDependenciesAllowIfConv(MachineInstr *I); + + /// Predicate all instructions of the basic block with current condition + /// except for terminators. Reverse the condition if ReversePredicate is set. + void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate); + /// Find a valid insertion point in Head. bool findInsertionPoint(); @@ -163,11 +176,14 @@ public: /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, /// initialize the internal state, and return true. - bool canConvertIf(MachineBasicBlock *MBB); + /// If predicate is set try to predicate the block otherwise try to + /// speculatively execute it. + bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false); /// convertIf - If-convert the last block passed to canConvertIf(), assuming /// it is possible. Add any erased blocks to RemovedBlocks. - void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks); + void convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks, + bool Predicate = false); }; } // end anonymous namespace @@ -225,37 +241,112 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { } // Check for any dependencies on Head instructions. - for (const MachineOperand &MO : I->operands()) { - if (MO.isRegMask()) { - LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); - return false; - } - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); + if (!InstrDependenciesAllowIfConv(&(*I))) + return false; + } + return true; +} - // Remember clobbered regunits. - if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) - ClobberedRegUnits.set(*Units); +/// Check that there is no dependencies preventing if conversion. +/// +/// If instruction uses any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { + for (const MachineOperand &MO : I->operands()) { + if (MO.isRegMask()) { + LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); + return false; + } + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); - if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (!DefMI || DefMI->getParent() != Head) - continue; - if (InsertAfter.insert(DefMI).second) - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " - << *DefMI); - if (DefMI->isTerminator()) { - LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); - return false; - } + // Remember clobbered regunits. + if (MO.isDef() && Register::isPhysicalRegister(Reg)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + ClobberedRegUnits.set(*Units); + + if (!MO.readsReg() || !Register::isVirtualRegister(Reg)) + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI || DefMI->getParent() != Head) + continue; + if (InsertAfter.insert(DefMI).second) + LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on " + << *DefMI); + if (DefMI->isTerminator()) { + LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + return false; } } return true; } +/// canPredicateInstrs - Returns true if all the instructions in MBB can safely +/// be predicates. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + if (I->isDebugInstr()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I->isPHI()) { + LLVM_DEBUG(dbgs() << "Can't predicate: " << *I); + return false; + } + + // Check that instruction is predicable and that it is not already + // predicated. + if (!TII->isPredicable(*I) || TII->isPredicated(*I)) { + return false; + } + + // Check for any dependencies on Head instructions. + if (!InstrDependenciesAllowIfConv(&(*I))) + return false; + } + return true; +} + +// Apply predicate to all instructions in the machine block. +void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) { + auto Condition = Cond; + if (ReversePredicate) + TII->reverseBranchCondition(Condition); + // Terminators don't need to be predicated as they will be removed. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + if (I->isDebugInstr()) + continue; + TII->PredicateInstruction(*I, Condition); + } +} /// Find an insertion point in Head for the speculated instructions. The /// insertion point must be: @@ -288,8 +379,8 @@ bool SSAIfConv::findInsertionPoint() { // We're ignoring regmask operands. That is conservatively correct. if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; // I clobbers Reg, so it isn't live before I. if (MO.isDef()) @@ -337,7 +428,7 @@ bool SSAIfConv::findInsertionPoint() { /// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is /// a potential candidate for if-conversion. Fill out the internal state. /// -bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { Head = MBB; TBB = FBB = Tail = nullptr; @@ -378,8 +469,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { } // This is a triangle or a diamond. - // If Tail doesn't have any phis, there must be side effects. - if (Tail->empty() || !Tail->front().isPHI()) { + // Skip if we cannot predicate and there are no phis skip as there must be + // side effects that can only be handled with predication. + if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) { LLVM_DEBUG(dbgs() << "No phis in tail.\n"); return false; } @@ -423,8 +515,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (PI.PHI->getOperand(i+1).getMBB() == FPred) PI.FReg = PI.PHI->getOperand(i).getReg(); } - assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); - assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); + assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI"); + assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI"); // Get target information. if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, @@ -437,10 +529,17 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { // Check that the conditional instructions can be speculated. InsertAfter.clear(); ClobberedRegUnits.reset(); - if (TBB != Tail && !canSpeculateInstrs(TBB)) - return false; - if (FBB != Tail && !canSpeculateInstrs(FBB)) - return false; + if (Predicate) { + if (TBB != Tail && !canPredicateInstrs(TBB)) + return false; + if (FBB != Tail && !canPredicateInstrs(FBB)) + return false; + } else { + if (TBB != Tail && !canSpeculateInstrs(TBB)) + return false; + if (FBB != Tail && !canSpeculateInstrs(FBB)) + return false; + } // Try to find a valid insertion point for the speculated instructions in the // head basic block. @@ -467,7 +566,7 @@ void SSAIfConv::replacePHIInstrs() { for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { PHIInfo &PI = PHIs[i]; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - unsigned DstReg = PI.PHI->getOperand(0).getReg(); + Register DstReg = PI.PHI->getOperand(0).getReg(); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); @@ -494,7 +593,7 @@ void SSAIfConv::rewritePHIOperands() { // equal. DstReg = PI.TReg; } else { - unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + Register PHIDst = PI.PHI->getOperand(0).getReg(); DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); @@ -521,7 +620,8 @@ void SSAIfConv::rewritePHIOperands() { /// /// Any basic blocks erased will be added to RemovedBlocks. /// -void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { +void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks, + bool Predicate) { assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); // Update statistics. @@ -531,11 +631,16 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { ++NumDiamondsConv; // Move all instructions into Head, except for the terminators. - if (TBB != Tail) + if (TBB != Tail) { + if (Predicate) + PredicateBlock(TBB, /*ReversePredicate=*/false); Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); - if (FBB != Tail) + } + if (FBB != Tail) { + if (Predicate) + PredicateBlock(FBB, /*ReversePredicate=*/true); Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); - + } // Are there extra Tail predecessors? bool ExtraPreds = Tail->pred_size() != 2; if (ExtraPreds) @@ -587,7 +692,6 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { LLVM_DEBUG(dbgs() << *Head); } - //===----------------------------------------------------------------------===// // EarlyIfConverter Pass //===----------------------------------------------------------------------===// @@ -613,8 +717,6 @@ public: private: bool tryConvertIf(MachineBasicBlock*); - void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); - void updateLoops(ArrayRef<MachineBasicBlock*> Removed); void invalidateTraces(); bool shouldConvertIf(); }; @@ -642,32 +744,36 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +namespace { /// Update the dominator tree after if-conversion erased some blocks. -void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) { +void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv, + ArrayRef<MachineBasicBlock *> Removed) { // convertIf can remove TBB, FBB, and Tail can be merged into Head. // TBB and FBB should not dominate any blocks. // Tail children should be transferred to Head. MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); - for (unsigned i = 0, e = Removed.size(); i != e; ++i) { - MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + for (auto B : Removed) { + MachineDomTreeNode *Node = DomTree->getNode(B); assert(Node != HeadNode && "Cannot erase the head node"); while (Node->getNumChildren()) { assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); } - DomTree->eraseNode(Removed[i]); + DomTree->eraseNode(B); } } /// Update LoopInfo after if-conversion. -void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { +void updateLoops(MachineLoopInfo *Loops, + ArrayRef<MachineBasicBlock *> Removed) { if (!Loops) return; // If-conversion doesn't change loop structure, and it doesn't mess with back // edges, so updating LoopInfo is simply removing the dead blocks. - for (unsigned i = 0, e = Removed.size(); i != e; ++i) - Loops->removeBlock(Removed[i]); + for (auto B : Removed) + Loops->removeBlock(B); } +} // namespace /// Invalidate MachineTraceMetrics before if-conversion. void EarlyIfConverter::invalidateTraces() { @@ -783,8 +889,8 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { SmallVector<MachineBasicBlock*, 4> RemovedBlocks; IfConv.convertIf(RemovedBlocks); Changed = true; - updateDomTree(RemovedBlocks); - updateLoops(RemovedBlocks); + updateDomTree(DomTree, IfConv, RemovedBlocks); + updateLoops(Loops, RemovedBlocks); } return Changed; } @@ -822,3 +928,132 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +//===----------------------------------------------------------------------===// +// EarlyIfPredicator Pass +//===----------------------------------------------------------------------===// + +namespace { +class EarlyIfPredicator : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + TargetSchedModel SchedModel; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + SSAIfConv IfConv; + +public: + static char ID; + EarlyIfPredicator() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return "Early If-predicator"; } + +protected: + bool tryConvertIf(MachineBasicBlock *); + bool shouldConvertIf(); +}; +} // end anonymous namespace + +#undef DEBUG_TYPE +#define DEBUG_TYPE "early-if-predicator" + +char EarlyIfPredicator::ID = 0; +char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID; + +INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false, + false) + +void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Apply the target heuristic to decide if the transformation is profitable. +bool EarlyIfPredicator::shouldConvertIf() { + if (IfConv.isTriangle()) { + MachineBasicBlock &IfBlock = + (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB; + + unsigned ExtraPredCost = 0; + unsigned Cycles = 0; + for (MachineInstr &I : IfBlock) { + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); + if (NumCycles > 1) + Cycles += NumCycles - 1; + ExtraPredCost += TII->getPredicationCost(I); + } + + return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost, + BranchProbability::getUnknown()); + } + unsigned TExtra = 0; + unsigned FExtra = 0; + unsigned TCycle = 0; + unsigned FCycle = 0; + for (MachineInstr &I : *IfConv.TBB) { + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); + if (NumCycles > 1) + TCycle += NumCycles - 1; + TExtra += TII->getPredicationCost(I); + } + for (MachineInstr &I : *IfConv.FBB) { + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); + if (NumCycles > 1) + FCycle += NumCycles - 1; + FExtra += TII->getPredicationCost(I); + } + return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB, + FCycle, FExtra, + BranchProbability::getUnknown()); +} + +/// Attempt repeated if-conversion on MBB, return true if successful. +/// +bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) { + bool Changed = false; + while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) { + // If-convert MBB and update analyses. + SmallVector<MachineBasicBlock *, 4> RemovedBlocks; + IfConv.convertIf(RemovedBlocks, /*Predicate*/ true); + Changed = true; + updateDomTree(DomTree, IfConv, RemovedBlocks); + updateLoops(Loops, RemovedBlocks); + } + return Changed; +} + +bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n" + << "********** Function: " << MF.getName() << '\n'); + if (skipFunction(MF.getFunction())) + return false; + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + MRI = &MF.getRegInfo(); + SchedModel.init(&STI); + DomTree = &getAnalysis<MachineDominatorTree>(); + Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + + bool Changed = false; + IfConv.runOnMachineFunction(MF); + + // Visit blocks in dominator tree post-order. The post-order enables nested + // if-conversion in a single pass. The tryConvertIf() function may erase + // blocks, but only blocks dominated by the head block. This makes it safe to + // update the dominator tree while the post-order iterator is still active. + for (auto DomNode : post_order(DomTree)) + if (tryConvertIf(DomNode->getBlock())) + Changed = true; + + return Changed; +} diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp index a2dd5eee33b7..2cca05ea6f55 100644 --- a/lib/CodeGen/ExecutionDomainFix.cpp +++ b/lib/CodeGen/ExecutionDomainFix.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/ExecutionDomainFix.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp index b425482e6adf..9916f2de0414 100644 --- a/lib/CodeGen/ExpandMemCmp.cpp +++ b/lib/CodeGen/ExpandMemCmp.cpp @@ -795,7 +795,7 @@ public: TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering(); const TargetLibraryInfo *TLI = - &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); auto PA = runImpl(F, TLI, TTI, TL); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 0ab70aff7dc4..1fc57fac1489 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -79,17 +79,17 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned InsReg = MI->getOperand(2).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + Register InsReg = MI->getOperand(2).getReg(); assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?"); unsigned SubIdx = MI->getOperand(3).getImm(); assert(SubIdx != 0 && "Invalid index for insert_subreg"); - unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx); + Register DstSubReg = TRI->getSubReg(DstReg, SubIdx); - assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + assert(Register::isPhysicalRegister(DstReg) && "Insert destination must be in a physical register"); - assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + assert(Register::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 9c53550eaa9d..c1d22ef89195 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -72,7 +72,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { return *I->second; GCStrategy *S = getGCStrategy(F.getGC()); - Functions.push_back(llvm::make_unique<GCFunctionInfo>(F, *S)); + Functions.push_back(std::make_unique<GCFunctionInfo>(F, *S)); GCFunctionInfo *GFI = Functions.back().get(); FInfoMap[&F] = GFI; return *GFI; diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index 90571d090bfb..0dc0a5bce747 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -249,7 +249,7 @@ GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {} void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); - AU.addRequired<MachineModuleInfo>(); + AU.addRequired<MachineModuleInfoWrapperPass>(); AU.addRequired<GCModuleInfo>(); } @@ -310,7 +310,7 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { return false; FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(MF.getFunction()); - MMI = &getAnalysis<MachineModuleInfo>(); + MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); TII = MF.getSubtarget().getInstrInfo(); // Find the size of the stack frame. There may be no correct static frame diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp index 4518dbee1a9f..7d9d812d34bc 100644 --- a/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -52,6 +52,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_ANYEXT: case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_TRUNC: + case TargetOpcode::G_GEP: return true; } return false; @@ -65,9 +66,9 @@ std::unique_ptr<CSEConfigBase> llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) { std::unique_ptr<CSEConfigBase> Config; if (Level == CodeGenOpt::None) - Config = make_unique<CSEConfigConstantOnly>(); + Config = std::make_unique<CSEConfigConstantOnly>(); else - Config = make_unique<CSEConfigFull>(); + Config = std::make_unique<CSEConfigFull>(); return Config; } @@ -332,7 +333,7 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const { const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( const MachineOperand &MO) const { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!MO.isDef()) addNodeIDRegNum(Reg); LLT Ty = MRI.getType(Reg); diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 461bc6038c2c..51a74793f029 100644 --- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -162,6 +162,17 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, return buildConstant(DstOps[0], Cst->getSExtValue()); break; } + case TargetOpcode::G_SEXT_INREG: { + assert(DstOps.size() == 1 && "Invalid dst ops"); + assert(SrcOps.size() == 2 && "Invalid src ops"); + const DstOp &Dst = DstOps[0]; + const SrcOp &Src0 = SrcOps[0]; + const SrcOp &Src1 = SrcOps[1]; + if (auto MaybeCst = + ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI())) + return buildConstant(Dst, MaybeCst->getSExtValue()); + break; + } } bool CanCopy = checkCopyToDefsPossible(DstOps); if (!canPerformCSEForOpc(Opc)) diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index a5d8205a34a8..cdad92f7db4f 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -11,14 +11,16 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #define DEBUG_TYPE "call-lowering" @@ -32,66 +34,70 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg, std::function<unsigned()> GetCalleeReg) const { + CallLoweringInfo Info; auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout(); // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that // we'll pass to the assigner function. - SmallVector<ArgInfo, 8> OrigArgs; unsigned i = 0; unsigned NumFixedArgs = CS.getFunctionType()->getNumParams(); for (auto &Arg : CS.args()) { ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); - // We don't currently support swiftself args. - if (OrigArg.Flags.isSwiftSelf()) - return false; - OrigArgs.push_back(OrigArg); + Info.OrigArgs.push_back(OrigArg); ++i; } - MachineOperand Callee = MachineOperand::CreateImm(0); if (const Function *F = CS.getCalledFunction()) - Callee = MachineOperand::CreateGA(F, 0); + Info.Callee = MachineOperand::CreateGA(F, 0); else - Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - - ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; - if (!OrigRet.Ty->isVoidTy()) - setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS); - - return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs, - SwiftErrorVReg); + Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); + + Info.OrigRet = ArgInfo{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; + if (!Info.OrigRet.Ty->isVoidTy()) + setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CS); + + Info.KnownCallees = + CS.getInstruction()->getMetadata(LLVMContext::MD_callees); + Info.CallConv = CS.getCallingConv(); + Info.SwiftErrorVReg = SwiftErrorVReg; + Info.IsMustTailCall = CS.isMustTailCall(); + Info.IsTailCall = CS.isTailCall() && + isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()); + Info.IsVarArg = CS.getFunctionType()->isVarArg(); + return lowerCall(MIRBuilder, Info); } template <typename FuncInfoTy> void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const { + auto &Flags = Arg.Flags[0]; const AttributeList &Attrs = FuncInfo.getAttributes(); if (Attrs.hasAttribute(OpIdx, Attribute::ZExt)) - Arg.Flags.setZExt(); + Flags.setZExt(); if (Attrs.hasAttribute(OpIdx, Attribute::SExt)) - Arg.Flags.setSExt(); + Flags.setSExt(); if (Attrs.hasAttribute(OpIdx, Attribute::InReg)) - Arg.Flags.setInReg(); + Flags.setInReg(); if (Attrs.hasAttribute(OpIdx, Attribute::StructRet)) - Arg.Flags.setSRet(); + Flags.setSRet(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf)) - Arg.Flags.setSwiftSelf(); + Flags.setSwiftSelf(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError)) - Arg.Flags.setSwiftError(); + Flags.setSwiftError(); if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) - Arg.Flags.setByVal(); + Flags.setByVal(); if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) - Arg.Flags.setInAlloca(); + Flags.setInAlloca(); - if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca()) { Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType(); auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); - Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. @@ -100,11 +106,11 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); - Arg.Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) - Arg.Flags.setNest(); - Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty)); + Flags.setNest(); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); } template void @@ -159,7 +165,7 @@ void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg, } bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, - ArrayRef<ArgInfo> Args, + SmallVectorImpl<ArgInfo> &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -171,7 +177,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, bool CallLowering::handleAssignments(CCState &CCInfo, SmallVectorImpl<CCValAssign> &ArgLocs, MachineIRBuilder &MIRBuilder, - ArrayRef<ArgInfo> Args, + SmallVectorImpl<ArgInfo> &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -180,14 +186,99 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT CurVT = MVT::getVT(Args[i].Ty); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) { - // Try to use the register type if we couldn't assign the VT. - if (!Handler.isArgumentHandler() || !CurVT.isValid()) + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) { + if (!CurVT.isValid()) return false; - CurVT = TLI->getRegisterTypeForCallingConv( + MVT NewVT = TLI->getRegisterTypeForCallingConv( F.getContext(), F.getCallingConv(), EVT(CurVT)); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) - return false; + + // If we need to split the type over multiple regs, check it's a scenario + // we currently support. + unsigned NumParts = TLI->getNumRegistersForCallingConv( + F.getContext(), F.getCallingConv(), CurVT); + if (NumParts > 1) { + // For now only handle exact splits. + if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) + return false; + } + + // For incoming arguments (physregs to vregs), we could have values in + // physregs (or memlocs) which we want to extract and copy to vregs. + // During this, we might have to deal with the LLT being split across + // multiple regs, so we have to record this information for later. + // + // If we have outgoing args, then we have the opposite case. We have a + // vreg with an LLT which we want to assign to a physical location, and + // we might have to record that the value has to be split later. + if (Handler.isIncomingArgumentHandler()) { + if (NumParts == 1) { + // Try to use the register type if we couldn't assign the VT. + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) + return false; + } else { + // We're handling an incoming arg which is split over multiple regs. + // E.g. passing an s128 on AArch64. + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + Args[i].OrigRegs.push_back(Args[i].Regs[0]); + Args[i].Regs.clear(); + Args[i].Flags.clear(); + LLT NewLLT = getLLTForMVT(NewVT); + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) { + Register Reg = + MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); + ISD::ArgFlagsTy Flags = OrigFlags; + if (Part == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align::None()); + if (Part == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Reg); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[Part], CCInfo)) { + // Still couldn't assign this smaller part type for some reason. + return false; + } + } + } + } else { + // Handling an outgoing arg that might need to be split. + if (NumParts < 2) + return false; // Don't know how to deal with this type combination. + + // This type is passed via multiple registers in the calling convention. + // We need to extract the individual parts. + Register LargeReg = Args[i].Regs[0]; + LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); + auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); + assert(Unmerge->getNumOperands() == NumParts + 1); + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + // We're going to replace the regs and flags with the split ones. + Args[i].Regs.clear(); + Args[i].Flags.clear(); + for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { + ISD::ArgFlagsTy Flags = OrigFlags; + if (PartIdx == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align::None()); + if (PartIdx == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[PartIdx], CCInfo)) + return false; + } + } } } @@ -202,18 +293,32 @@ bool CallLowering::handleAssignments(CCState &CCInfo, continue; } - assert(Args[i].Regs.size() == 1 && - "Can't handle multiple virtual regs yet"); - // FIXME: Pack registers if we have more than one. Register ArgReg = Args[i].Regs[0]; + MVT OrigVT = MVT::getVT(Args[i].Ty); + MVT VAVT = VA.getValVT(); if (VA.isRegLoc()) { - MVT OrigVT = MVT::getVT(Args[i].Ty); - MVT VAVT = VA.getValVT(); - if (Handler.isArgumentHandler() && VAVT != OrigVT) { - if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) - return false; // Can't handle this type of arg yet. + if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) { + if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) { + // Expected to be multiple regs for a single incoming arg. + unsigned NumArgRegs = Args[i].Regs.size(); + if (NumArgRegs < 2) + return false; + + assert((j + (NumArgRegs - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += NumArgRegs - 1; + // Merge the split registers into the expected larger result vreg + // of the original call. + MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); + continue; + } const LLT VATy(VAVT); Register NewReg = MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); @@ -234,10 +339,28 @@ bool CallLowering::handleAssignments(CCState &CCInfo, } else { MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); } + } else if (!Handler.isIncomingArgumentHandler()) { + assert((j + (Args[i].Regs.size() - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + // This is an outgoing argument that might have been split. + for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += Args[i].Regs.size() - 1; } else { Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); } } else if (VA.isMemLoc()) { + // Don't currently support loading/storing a type that needs to be split + // to the stack. Should be easy, just not implemented yet. + if (Args[i].Regs.size() > 1) { + LLVM_DEBUG( + dbgs() + << "Load/store a split arg to/from the stack not implemented yet"); + return false; + } MVT VT = MVT::getVT(Args[i].Ty); unsigned Size = VT == MVT::iPTR ? DL.getPointerSize() : alignTo(VT.getSizeInBits(), 8) / 8; @@ -253,6 +376,81 @@ bool CallLowering::handleAssignments(CCState &CCInfo, return true; } +bool CallLowering::analyzeArgInfo(CCState &CCState, + SmallVectorImpl<ArgInfo> &Args, + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const { + for (unsigned i = 0, e = Args.size(); i < e; ++i) { + MVT VT = MVT::getVT(Args[i].Ty); + CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg; + if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) { + // Bail out on anything we can't handle. + LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString() + << " (arg number = " << i << "\n"); + return false; + } + } + return true; +} + +bool CallLowering::resultsCompatible(CallLoweringInfo &Info, + MachineFunction &MF, + SmallVectorImpl<ArgInfo> &InArgs, + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const { + const Function &F = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = F.getCallingConv(); + + if (CallerCC == CalleeCC) + return true; + + SmallVector<CCValAssign, 16> ArgLocs1; + CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext()); + if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed, + CalleeAssignFnVarArg)) + return false; + + SmallVector<CCValAssign, 16> ArgLocs2; + CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext()); + if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed, + CalleeAssignFnVarArg)) + return false; + + // We need the argument locations to match up exactly. If there's more in + // one than the other, then we are done. + if (ArgLocs1.size() != ArgLocs2.size()) + return false; + + // Make sure that each location is passed in exactly the same way. + for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) { + const CCValAssign &Loc1 = ArgLocs1[i]; + const CCValAssign &Loc2 = ArgLocs2[i]; + + // We need both of them to be the same. So if one is a register and one + // isn't, we're done. + if (Loc1.isRegLoc() != Loc2.isRegLoc()) + return false; + + if (Loc1.isRegLoc()) { + // If they don't have the same register location, we're done. + if (Loc1.getLocReg() != Loc2.getLocReg()) + return false; + + // They matched, so we can move to the next ArgLoc. + continue; + } + + // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match. + if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset()) + return false; + } + + return true; +} + Register CallLowering::ValueHandler::extendRegister(Register ValReg, CCValAssign &VA) { LLT LocTy{VA.getLocVT()}; diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp index 31cb1dbbc9b5..b4562a5c6601 100644 --- a/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/lib/CodeGen/GlobalISel/Combiner.cpp @@ -27,6 +27,18 @@ using namespace llvm; +namespace llvm { +cl::OptionCategory GICombinerOptionCategory( + "GlobalISel Combiner", + "Control the rules which are enabled. These options all take a comma " + "separated list of rules to disable and may be specified by number " + "or number range (e.g. 1-10)." +#ifndef NDEBUG + " They may also be specified by name." +#endif +); +} // end namespace llvm + namespace { /// This class acts as the glue the joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the @@ -92,7 +104,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, return false; Builder = - CSEInfo ? make_unique<CSEMIRBuilder>() : make_unique<MachineIRBuilder>(); + CSEInfo ? std::make_unique<CSEMIRBuilder>() : std::make_unique<MachineIRBuilder>(); MRI = &MF.getRegInfo(); Builder->setMF(MF); if (CSEInfo) diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 9cbf3dd83ff1..854769d283f7 100644 --- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -8,19 +8,36 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "gi-combiner" using namespace llvm; +// Option to allow testing of the combiner while no targets know about indexed +// addressing. +static cl::opt<bool> + ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), + cl::desc("Force all indexed operations to be " + "legal for the GlobalISel combiner")); + + CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, - MachineIRBuilder &B) - : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {} + MachineIRBuilder &B, GISelKnownBits *KB, + MachineDominatorTree *MDT) + : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), + KB(KB), MDT(MDT) { + (void)this->KB; +} void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const { @@ -55,8 +72,8 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::COPY) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); // Simple Copy Propagation. @@ -66,12 +83,183 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { return false; } void CombinerHelper::applyCombineCopy(MachineInstr &MI) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); MI.eraseFromParent(); replaceRegWith(MRI, DstReg, SrcReg); } +bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { + bool IsUndef = false; + SmallVector<Register, 4> Ops; + if (matchCombineConcatVectors(MI, IsUndef, Ops)) { + applyCombineConcatVectors(MI, IsUndef, Ops); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, + SmallVectorImpl<Register> &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && + "Invalid instruction"); + IsUndef = true; + MachineInstr *Undef = nullptr; + + // Walk over all the operands of concat vectors and check if they are + // build_vector themselves or undef. + // Then collect their operands in Ops. + for (const MachineOperand &MO : MI.operands()) { + // Skip the instruction definition. + if (MO.isDef()) + continue; + Register Reg = MO.getReg(); + MachineInstr *Def = MRI.getVRegDef(Reg); + assert(Def && "Operand not defined"); + switch (Def->getOpcode()) { + case TargetOpcode::G_BUILD_VECTOR: + IsUndef = false; + // Remember the operands of the build_vector to fold + // them into the yet-to-build flattened concat vectors. + for (const MachineOperand &BuildVecMO : Def->operands()) { + // Skip the definition. + if (BuildVecMO.isDef()) + continue; + Ops.push_back(BuildVecMO.getReg()); + } + break; + case TargetOpcode::G_IMPLICIT_DEF: { + LLT OpType = MRI.getType(Reg); + // Keep one undef value for all the undef operands. + if (!Undef) { + Builder.setInsertPt(*MI.getParent(), MI); + Undef = Builder.buildUndef(OpType.getScalarType()); + } + assert(MRI.getType(Undef->getOperand(0).getReg()) == + OpType.getScalarType() && + "All undefs should have the same type"); + // Break the undef vector in as many scalar elements as needed + // for the flattening. + for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); + EltIdx != EltEnd; ++EltIdx) + Ops.push_back(Undef->getOperand(0).getReg()); + break; + } + default: + return false; + } + } + return true; +} +void CombinerHelper::applyCombineConcatVectors( + MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) { + // We determined that the concat_vectors can be flatten. + // Generate the flattened build_vector. + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + Register NewDstReg = MRI.cloneVirtualRegister(DstReg); + + // Note: IsUndef is sort of redundant. We could have determine it by + // checking that at all Ops are undef. Alternatively, we could have + // generate a build_vector of undefs and rely on another combine to + // clean that up. For now, given we already gather this information + // in tryCombineConcatVectors, just save compile time and issue the + // right thing. + if (IsUndef) + Builder.buildUndef(NewDstReg); + else + Builder.buildBuildVector(NewDstReg, Ops); + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, NewDstReg); +} + +bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { + SmallVector<Register, 4> Ops; + if (matchCombineShuffleVector(MI, Ops)) { + applyCombineShuffleVector(MI, Ops); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, + SmallVectorImpl<Register> &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Invalid instruction kind"); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + Register Src1 = MI.getOperand(1).getReg(); + LLT SrcType = MRI.getType(Src1); + unsigned DstNumElts = DstType.getNumElements(); + unsigned SrcNumElts = SrcType.getNumElements(); + + // If the resulting vector is smaller than the size of the source + // vectors being concatenated, we won't be able to replace the + // shuffle vector into a concat_vectors. + // + // Note: We may still be able to produce a concat_vectors fed by + // extract_vector_elt and so on. It is less clear that would + // be better though, so don't bother for now. + if (DstNumElts < 2 * SrcNumElts) + return false; + + // Check that the shuffle mask can be broken evenly between the + // different sources. + if (DstNumElts % SrcNumElts != 0) + return false; + + // Mask length is a multiple of the source vector length. + // Check if the shuffle is some kind of concatenation of the input + // vectors. + unsigned NumConcat = DstNumElts / SrcNumElts; + SmallVector<int, 8> ConcatSrcs(NumConcat, -1); + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask); + for (unsigned i = 0; i != DstNumElts; ++i) { + int Idx = Mask[i]; + // Undef value. + if (Idx < 0) + continue; + // Ensure the indices in each SrcType sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) + return false; + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; + } + + // The shuffle is concatenating multiple vectors together. + // Collect the different operands for that. + Register UndefReg; + Register Src2 = MI.getOperand(2).getReg(); + for (auto Src : ConcatSrcs) { + if (Src < 0) { + if (!UndefReg) { + Builder.setInsertPt(*MI.getParent(), MI); + UndefReg = Builder.buildUndef(SrcType).getReg(0); + } + Ops.push_back(UndefReg); + } else if (Src == 0) + Ops.push_back(Src1); + else + Ops.push_back(Src2); + } + return true; +} + +void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, + const ArrayRef<Register> Ops) { + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + Register NewDstReg = MRI.cloneVirtualRegister(DstReg); + + Builder.buildConcatVectors(NewDstReg, Ops); + + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, NewDstReg); +} + namespace { /// Select a preference between two uses. CurrentUse is the current preference @@ -279,7 +467,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, // up the type and extend so that it uses the preferred use. if (UseMI->getOpcode() == Preferred.ExtendOpcode || UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { - unsigned UseDstReg = UseMI->getOperand(0).getReg(); + Register UseDstReg = UseMI->getOperand(0).getReg(); MachineOperand &UseSrcMO = UseMI->getOperand(1); const LLT &UseDstTy = MRI.getType(UseDstReg); if (UseDstReg != ChosenDstReg) { @@ -342,8 +530,212 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, Observer.changedInstr(MI); } -bool CombinerHelper::matchCombineBr(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR"); +bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) { + assert(DefMI.getParent() == UseMI.getParent()); + if (&DefMI == &UseMI) + return false; + + // Loop through the basic block until we find one of the instructions. + MachineBasicBlock::const_iterator I = DefMI.getParent()->begin(); + for (; &*I != &DefMI && &*I != &UseMI; ++I) + return &*I == &DefMI; + + llvm_unreachable("Block must contain instructions"); +} + +bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) { + if (MDT) + return MDT->dominates(&DefMI, &UseMI); + else if (DefMI.getParent() != UseMI.getParent()) + return false; + + return isPredecessor(DefMI, UseMI); +} + +bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + +#ifndef NDEBUG + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); +#endif + + Base = MI.getOperand(1).getReg(); + MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); + if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + return false; + + LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); + + for (auto &Use : MRI.use_instructions(Base)) { + if (Use.getOpcode() != TargetOpcode::G_GEP) + continue; + + Offset = Use.getOperand(2).getReg(); + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " + << Use); + continue; + } + + // Make sure the offset calculation is before the potentially indexed op. + // FIXME: we really care about dependency here. The offset calculation might + // be movable. + MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); + if (!OffsetDef || !dominates(*OffsetDef, MI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " + << Use); + continue; + } + + // FIXME: check whether all uses of Base are load/store with foldable + // addressing modes. If so, using the normal addr-modes is better than + // forming an indexed one. + + bool MemOpDominatesAddrUses = true; + for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) { + if (!dominates(MI, GEPUse)) { + MemOpDominatesAddrUses = false; + break; + } + } + + if (!MemOpDominatesAddrUses) { + LLVM_DEBUG( + dbgs() << " Ignoring candidate as memop does not dominate uses: " + << Use); + continue; + } + + LLVM_DEBUG(dbgs() << " Found match: " << Use); + Addr = Use.getOperand(0).getReg(); + return true; + } + + return false; +} + +bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + +#ifndef NDEBUG + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); +#endif + + Addr = MI.getOperand(1).getReg(); + MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI); + if (!AddrDef || MRI.hasOneUse(Addr)) + return false; + + Base = AddrDef->getOperand(1).getReg(); + Offset = AddrDef->getOperand(2).getReg(); + + LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); + + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { + LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); + return false; + } + + MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); + if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); + return false; + } + + if (MI.getOpcode() == TargetOpcode::G_STORE) { + // Would require a copy. + if (Base == MI.getOperand(0).getReg()) { + LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); + return false; + } + + // We're expecting one use of Addr in MI, but it could also be the + // value stored, which isn't actually dominated by the instruction. + if (MI.getOperand(0).getReg() == Addr) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); + return false; + } + } + + // FIXME: check whether all uses of the base pointer are constant GEPs. That + // might allow us to end base's liveness here by adjusting the constant. + + for (auto &UseMI : MRI.use_instructions(Addr)) { + if (!dominates(MI, UseMI)) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); + return false; + } + } + + return true; +} + +bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && + Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) + return false; + + bool IsStore = Opcode == TargetOpcode::G_STORE; + Register Addr, Base, Offset; + bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset); + if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset)) + return false; + + + unsigned NewOpcode; + switch (Opcode) { + case TargetOpcode::G_LOAD: + NewOpcode = TargetOpcode::G_INDEXED_LOAD; + break; + case TargetOpcode::G_SEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; + break; + case TargetOpcode::G_ZEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; + break; + case TargetOpcode::G_STORE: + NewOpcode = TargetOpcode::G_INDEXED_STORE; + break; + default: + llvm_unreachable("Unknown load/store opcode"); + } + + MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr); + MachineIRBuilder MIRBuilder(MI); + auto MIB = MIRBuilder.buildInstr(NewOpcode); + if (IsStore) { + MIB.addDef(Addr); + MIB.addUse(MI.getOperand(0).getReg()); + } else { + MIB.addDef(MI.getOperand(0).getReg()); + MIB.addDef(Addr); + } + + MIB.addUse(Base); + MIB.addUse(Offset); + MIB.addImm(IsPre); + MI.eraseFromParent(); + AddrDef.eraseFromParent(); + + LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); + return true; +} + +bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { + if (MI.getOpcode() != TargetOpcode::G_BR) + return false; + // Try to match the following: // bb1: // %c(s32) = G_ICMP pred, %a, %b @@ -380,9 +772,14 @@ bool CombinerHelper::matchCombineBr(MachineInstr &MI) { return true; } -bool CombinerHelper::tryCombineBr(MachineInstr &MI) { - if (!matchCombineBr(MI)) +bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) { + if (!matchElideBrByInvertingCond(MI)) return false; + applyElideBrByInvertingCond(MI); + return true; +} + +void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); MachineBasicBlock::iterator BrIt(MI); MachineInstr *BrCond = &*std::prev(BrIt); @@ -401,11 +798,509 @@ bool CombinerHelper::tryCombineBr(MachineInstr &MI) { BrCond->getOperand(1).setMBB(BrTarget); Observer.changedInstr(*BrCond); MI.eraseFromParent(); +} + +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction().hasMinSize(); + return MF.getFunction().hasOptSize(); +} + +// Returns a list of types to use for memory op lowering in MemOps. A partial +// port of findOptimalMemOpLowering in TargetLowering. +static bool findGISelOptimalMemOpLowering( + std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + bool AllowOverlap, unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes, const TargetLowering &TLI) { + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. + if (SrcAlign != 0 && SrcAlign < DstAlign) + return false; + + LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset, + ZeroMemset, MemcpyStrSrc, FuncAttributes); + + if (Ty == LLT()) { + // Use the largest scalar type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + Ty = LLT::scalar(64); + while (DstAlign && DstAlign < Ty.getSizeInBytes() && + !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign)) + Ty = LLT::scalar(Ty.getSizeInBytes()); + assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); + // FIXME: check for the largest legal type we can load/store to. + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned TySize = Ty.getSizeInBytes(); + while (TySize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + LLT NewTy = Ty; + // FIXME: check for mem op safety and legality of the types. Not all of + // SDAGisms map cleanly to GISel concepts. + if (NewTy.isVector()) + NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); + NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); + unsigned NewTySize = NewTy.getSizeInBytes(); + assert(NewTySize > 0 && "Could not find appropriate type"); + + // If the new LLT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + bool Fast; + // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). + MVT VT = getMVTForLLT(Ty); + if (NumMemOps && AllowOverlap && NewTySize < Size && + TLI.allowsMisalignedMemoryAccesses( + VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) && + Fast) + TySize = Size; + else { + Ty = NewTy; + TySize = NewTySize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(Ty); + Size -= TySize; + } + + return true; +} + +static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { + if (Ty.isVector()) + return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), + Ty.getNumElements()); + return IntegerType::get(C, Ty.getSizeInBits()); +} + +// Get a vectorized representation of the memset value operand, GISel edition. +static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { + MachineRegisterInfo &MRI = *MIB.getMRI(); + unsigned NumBits = Ty.getScalarSizeInBits(); + auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); + if (!Ty.isVector() && ValVRegAndVal) { + unsigned KnownVal = ValVRegAndVal->Value; + APInt Scalar = APInt(8, KnownVal); + APInt SplatVal = APInt::getSplat(NumBits, Scalar); + return MIB.buildConstant(Ty, SplatVal).getReg(0); + } + // FIXME: for vector types create a G_BUILD_VECTOR. + if (Ty.isVector()) + return Register(); + + // Extend the byte value to the larger type, and then multiply by a magic + // value 0x010101... in order to replicate it across every byte. + LLT ExtType = Ty.getScalarType(); + auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); + if (NumBits > 8) { + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); + auto MagicMI = MIB.buildConstant(ExtType, Magic); + Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); + } + + assert(ExtType == Ty && "Vector memset value type not supported yet"); + return Val; +} + +bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val, + unsigned KnownLen, unsigned Align, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memset length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); + std::vector<LLT> MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + + auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); + bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; + + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0, + /*IsMemset=*/true, + /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + if (NewAlign > Align) { + Align = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Align) + MFI.setObjectAlignment(FI, Align); + } + } + + MachineIRBuilder MIB(MI); + // Find the largest store and generate the bit pattern for it. + LLT LargestTy = MemOps[0]; + for (unsigned i = 1; i < MemOps.size(); i++) + if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) + LargestTy = MemOps[i]; + + // The memset stored value is always defined as an s8, so in order to make it + // work with larger store types we need to repeat the bit pattern across the + // wider type. + Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); + + if (!MemSetValue) + return false; + + // Generate the stores. For each store type in the list, we generate the + // matching store of that type to the destination address. + LLT PtrTy = MRI.getType(Dst); + unsigned DstOff = 0; + unsigned Size = KnownLen; + for (unsigned I = 0; I < MemOps.size(); I++) { + LLT Ty = MemOps[I]; + unsigned TySize = Ty.getSizeInBytes(); + if (TySize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(I == MemOps.size() - 1 && I != 0); + DstOff -= TySize - Size; + } + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + Register Value = MemSetValue; + if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { + MVT VT = getMVTForLLT(Ty); + MVT LargestVT = getMVTForLLT(LargestTy); + if (!LargestTy.isVector() && !Ty.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); + else + Value = getMemsetValue(Val, Ty, MIB); + if (!Value) + return false; + } + + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); + + Register Ptr = Dst; + if (DstOff != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); + Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + } + + MIB.buildStore(Value, Ptr, *StoreMMO); + DstOff += Ty.getSizeInBytes(); + Size -= TySize; + } + + MI.eraseFromParent(); + return true; +} + + +bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, + Register Src, unsigned KnownLen, + unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memcpy length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + unsigned Alignment = MinAlign(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + // FIXME: infer better src pointer alignment like SelectionDAG does here. + // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining + // if the memcpy is in a tail call position. + + unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); + std::vector<LLT> MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), + SrcAlign, + /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Now we need to emit a pair of load and stores for each of the types we've + // collected. I.e. for each type, generate a load from the source pointer of + // that type width, and then generate a corresponding store to the dest buffer + // of that value loaded. This can result in a sequence of loads and stores + // mixed types, depending on what the target specifies as good types to use. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + unsigned Size = KnownLen; + for (auto CopyTy : MemOps) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + if (CopyTy.getSizeInBytes() > Size) + CurrOffset -= CopyTy.getSizeInBytes() - Size; + + // Construct MMOs for the accesses. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + Register Offset; + if (CurrOffset != 0) { + Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) + .getReg(0); + LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + } + auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); + + // Create the store. + Register StorePtr = + CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + MIB.buildStore(LdVal, StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + Size -= CopyTy.getSizeInBytes(); + } + + MI.eraseFromParent(); return true; } +bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, + Register Src, unsigned KnownLen, + unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memmove length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + unsigned Alignment = MinAlign(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); + std::vector<LLT> MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due + // to a bug in it's findOptimalMemOpLowering implementation. For now do the + // same thing here. + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), + SrcAlign, + /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Memmove requires that we perform the loads first before issuing the stores. + // Apart from that, this loop is pretty much doing the same thing as the + // memcpy codegen function. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + SmallVector<Register, 16> LoadVals; + for (auto CopyTy : MemOps) { + // Construct MMO for the load. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + } + LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); + CurrOffset += CopyTy.getSizeInBytes(); + } + + CurrOffset = 0; + for (unsigned I = 0; I < MemOps.size(); ++I) { + LLT CopyTy = MemOps[I]; + // Now store the values loaded. + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + Register StorePtr = Dst; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + } + MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + } + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { + // This combine is fairly complex so it's not written with a separate + // matcher function. + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID(); + assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || + ID == Intrinsic::memset) && + "Expected a memcpy like intrinsic"); + + auto MMOIt = MI.memoperands_begin(); + const MachineMemOperand *MemOp = *MMOIt; + bool IsVolatile = MemOp->isVolatile(); + // Don't try to optimize volatile. + if (IsVolatile) + return false; + + unsigned DstAlign = MemOp->getBaseAlignment(); + unsigned SrcAlign = 0; + Register Dst = MI.getOperand(1).getReg(); + Register Src = MI.getOperand(2).getReg(); + Register Len = MI.getOperand(3).getReg(); + + if (ID != Intrinsic::memset) { + assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); + MemOp = *(++MMOIt); + SrcAlign = MemOp->getBaseAlignment(); + } + + // See if this is a constant length copy + auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); + if (!LenVRegAndVal) + return false; // Leave it to the legalizer to lower it to a libcall. + unsigned KnownLen = LenVRegAndVal->Value; + + if (KnownLen == 0) { + MI.eraseFromParent(); + return true; + } + + if (MaxLen && KnownLen > MaxLen) + return false; + + if (ID == Intrinsic::memcpy) + return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (ID == Intrinsic::memmove) + return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (ID == Intrinsic::memset) + return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); + return false; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; - return tryCombineExtendingLoads(MI); + if (tryCombineExtendingLoads(MI)) + return true; + if (tryCombineIndexedLoadStore(MI)) + return true; + return false; } diff --git a/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp new file mode 100644 index 000000000000..be8efa8795f3 --- /dev/null +++ b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -0,0 +1,383 @@ +//===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Provides analysis for querying information about KnownBits during GISel +/// passes. +// +//===------------------ +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" + +#define DEBUG_TYPE "gisel-known-bits" + +using namespace llvm; + +char llvm::GISelKnownBitsAnalysis::ID = 0; + +INITIALIZE_PASS_BEGIN(GISelKnownBitsAnalysis, DEBUG_TYPE, + "Analysis for ComputingKnownBits", false, true) +INITIALIZE_PASS_END(GISelKnownBitsAnalysis, DEBUG_TYPE, + "Analysis for ComputingKnownBits", false, true) + +GISelKnownBits::GISelKnownBits(MachineFunction &MF) + : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), + DL(MF.getFunction().getParent()->getDataLayout()) {} + +Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset, + const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset); + // TODO: How to handle cases with Base + Offset? +} + +MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) { + if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) { + int FrameIdx = MI.getOperand(1).getIndex(); + return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF()); + } + return None; +} + +void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + const MachineInstr &MI = *MRI.getVRegDef(R); + computeKnownBitsForAlignment(Known, inferPtrAlignment(MI)); +} + +void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known, + MaybeAlign Alignment) { + if (Alignment) + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2(Alignment)); +} + +KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { + return getKnownBits(MI.getOperand(0).getReg()); +} + +KnownBits GISelKnownBits::getKnownBits(Register R) { + KnownBits Known; + LLT Ty = MRI.getType(R); + APInt DemandedElts = + Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1); + computeKnownBitsImpl(R, Known, DemandedElts); + return Known; +} + +bool GISelKnownBits::signBitIsZero(Register R) { + LLT Ty = MRI.getType(R); + unsigned BitWidth = Ty.getScalarSizeInBits(); + return maskedValueIsZero(R, APInt::getSignMask(BitWidth)); +} + +APInt GISelKnownBits::getKnownZeroes(Register R) { + return getKnownBits(R).Zero; +} + +APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } + +void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + MachineInstr &MI = *MRI.getVRegDef(R); + unsigned Opcode = MI.getOpcode(); + LLT DstTy = MRI.getType(R); + + // Handle the case where this is called on a register that does not have a + // type constraint (i.e. it has a register class constraint instead). This is + // unlikely to occur except by looking through copies but it is possible for + // the initial register being queried to be in this state. + if (!DstTy.isValid()) { + Known = KnownBits(); + return; + } + + unsigned BitWidth = DstTy.getSizeInBits(); + Known = KnownBits(BitWidth); // Don't know anything + + if (DstTy.isVector()) + return; // TODO: Handle vectors. + + if (Depth == getMaxDepth()) + return; + + if (!DemandedElts) + return; // No demanded elts, better to assume we don't know anything. + + KnownBits Known2; + + switch (Opcode) { + default: + TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, + Depth); + break; + case TargetOpcode::COPY: { + MachineOperand Dst = MI.getOperand(0); + MachineOperand Src = MI.getOperand(1); + // Look through trivial copies but don't look through trivial copies of the + // form `%1:(s32) = OP %0:gpr32` known-bits analysis is currently unable to + // determine the bit width of a register class. + // + // We can't use NoSubRegister by name as it's defined by each target but + // it's always defined to be 0 by tablegen. + if (Dst.getSubReg() == 0 /*NoSubRegister*/ && Src.getReg().isVirtual() && + Src.getSubReg() == 0 /*NoSubRegister*/ && + MRI.getType(Src.getReg()).isValid()) { + // Don't increment Depth for this one since we didn't do any work. + computeKnownBitsImpl(Src.getReg(), Known, DemandedElts, Depth); + } + break; + } + case TargetOpcode::G_CONSTANT: { + auto CstVal = getConstantVRegVal(R, MRI); + if (!CstVal) + break; + Known.One = *CstVal; + Known.Zero = ~Known.One; + break; + } + case TargetOpcode::G_FRAME_INDEX: { + computeKnownBitsForFrameIndex(R, Known, DemandedElts); + break; + } + case TargetOpcode::G_SUB: { + // If low bits are known to be zero in both operands, then we know they are + // going to be 0 in the result. Both addition and complement operations + // preserve the low zero bits. + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + if (KnownZeroLow == 0) + break; + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + Known.Zero.setLowBits(KnownZeroLow); + break; + } + case TargetOpcode::G_XOR: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = KnownZeroOut; + break; + } + case TargetOpcode::G_GEP: { + // G_GEP is like G_ADD. FIXME: Is this true for all targets? + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) + break; + LLVM_FALLTHROUGH; + } + case TargetOpcode::G_ADD: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + // Output known-0 bits are also known if the top bits of each input are + // known to be clear. For example, if one input has the top 10 bits clear + // and the other has the top 8 bits clear, we know the top 7 bits of the + // output must be clear. + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + Known.Zero.setLowBits(KnownZeroLow); + if (KnownZeroHigh > 1) + Known.Zero.setHighBits(KnownZeroHigh - 1); + break; + } + case TargetOpcode::G_AND: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-1 bits are only known if set in both the LHS & RHS. + Known.One &= Known2.One; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + Known.Zero |= Known2.Zero; + break; + } + case TargetOpcode::G_OR: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + Known.Zero &= Known2.Zero; + // Output known-1 are known to be set if set in either the LHS | RHS. + Known.One |= Known2.One; + break; + } + case TargetOpcode::G_MUL: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conservative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + unsigned TrailZ = + Known.countMinTrailingZeros() + Known2.countMinTrailingZeros(); + unsigned LeadZ = + std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(), + BitWidth) - + BitWidth; + + Known.resetAll(); + Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); + Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); + break; + } + case TargetOpcode::G_SELECT: { + computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts, + Depth + 1); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + } + case TargetOpcode::G_FCMP: + case TargetOpcode::G_ICMP: { + if (TL.getBooleanContents(DstTy.isVector(), + Opcode == TargetOpcode::G_FCMP) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + } + case TargetOpcode::G_SEXT: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + // If the sign bit is known to be zero or one, then sext will extend + // it to the top bits, else it will just zext. + Known = Known.sext(BitWidth); + break; + } + case TargetOpcode::G_ANYEXT: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + break; + } + case TargetOpcode::G_LOAD: { + if (MI.hasOneMemOperand()) { + const MachineMemOperand *MMO = *MI.memoperands_begin(); + if (const MDNode *Ranges = MMO->getRanges()) { + computeKnownBitsFromRangeMetadata(*Ranges, Known); + } + } + break; + } + case TargetOpcode::G_ZEXTLOAD: { + // Everything above the retrieved bits is zero + if (MI.hasOneMemOperand()) + Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); + break; + } + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_SHL: { + KnownBits RHSKnown; + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + if (!RHSKnown.isConstant()) { + LLVM_DEBUG( + MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI); + break; + } + uint64_t Shift = RHSKnown.getConstant().getZExtValue(); + LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n'); + + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + + switch (Opcode) { + case TargetOpcode::G_ASHR: + Known.Zero = Known.Zero.ashr(Shift); + Known.One = Known.One.ashr(Shift); + break; + case TargetOpcode::G_LSHR: + Known.Zero = Known.Zero.lshr(Shift); + Known.One = Known.One.lshr(Shift); + Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift); + break; + case TargetOpcode::G_SHL: + Known.Zero = Known.Zero.shl(Shift); + Known.One = Known.One.shl(Shift); + Known.Zero.setBits(0, Shift); + break; + } + break; + } + case TargetOpcode::G_INTTOPTR: + case TargetOpcode::G_PTRTOINT: + // Fall through and handle them the same as zext/trunc. + LLVM_FALLTHROUGH; + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_TRUNC: { + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + unsigned SrcBitWidth = SrcTy.isPointer() + ? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) + : SrcTy.getSizeInBits(); + assert(SrcBitWidth && "SrcBitWidth can't be zero"); + Known = Known.zextOrTrunc(SrcBitWidth, true); + computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); + Known = Known.zextOrTrunc(BitWidth, true); + if (BitWidth > SrcBitWidth) + Known.Zero.setBitsFrom(SrcBitWidth); + break; + } + } + + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" + << Depth << "] Computed for: " << MI << "[" << Depth + << "] Known: 0x" + << (Known.Zero | Known.One).toString(16, false) << "\n" + << "[" << Depth << "] Zero: 0x" + << Known.Zero.toString(16, false) << "\n" + << "[" << Depth << "] One: 0x" + << Known.One.toString(16, false) << "\n"); +} + +void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { + return false; +} diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 6e99bdbd8264..45cef4aca888 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -334,7 +335,7 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { - const CmpInst *CI = dyn_cast<CmpInst>(&U); + auto *CI = dyn_cast<CmpInst>(&U); Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); @@ -345,11 +346,12 @@ bool IRTranslator::translateCompare(const User &U, MIRBuilder.buildICmp(Pred, Res, Op0, Op1); else if (Pred == CmpInst::FCMP_FALSE) MIRBuilder.buildCopy( - Res, getOrCreateVReg(*Constant::getNullValue(CI->getType()))); + Res, getOrCreateVReg(*Constant::getNullValue(U.getType()))); else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( - Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType()))); + Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); else { + assert(CI && "Instruction should be CmpInst"); MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1}, MachineInstr::copyFlagsFromInstruction(*CI)); } @@ -588,8 +590,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, Register CondRHS = getOrCreateVReg(*CB.CmpRHS); Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); } else { - assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE && - "Can only handle ULE ranges"); + assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && + "Can only handle SLE ranges"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); @@ -598,7 +600,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); Cond = - MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0); + MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0); } else { const LLT &CmpTy = MRI->getType(CmpOpReg); auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS); @@ -728,7 +730,7 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, MHS = nullptr; } else { // Check I->Low <= Cond <= I->High. - Pred = CmpInst::ICMP_ULE; + Pred = CmpInst::ICMP_SLE; LHS = I->Low; MHS = Cond; RHS = I->High; @@ -879,7 +881,8 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { return true; } - + const MDNode *Ranges = + Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { Register Addr; MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); @@ -888,7 +891,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { unsigned BaseAlign = getMemOpAlignment(LI); auto MMO = MF->getMachineMemOperand( Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, - MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -1075,36 +1078,29 @@ bool IRTranslator::translateGetElementPtr(const User &U, } if (Offset != 0) { - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0)); - - BaseReg = NewBaseReg; + BaseReg = + MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0); Offset = 0; } Register IdxReg = getOrCreateVReg(*Idx); - if (MRI->getType(IdxReg) != OffsetTy) { - Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg); - IdxReg = NewIdxReg; - } + if (MRI->getType(IdxReg) != OffsetTy) + IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); // N = N + Idx * ElementSize; // Avoid doing it for ElementSize of 1. Register GepOffsetReg; if (ElementSize != 1) { - GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); auto ElementSizeMIB = MIRBuilder.buildConstant( getLLTForType(*OffsetIRTy, *DL), ElementSize); - MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg); + GepOffsetReg = + MIRBuilder.buildMul(OffsetTy, ElementSizeMIB, IdxReg).getReg(0); } else GepOffsetReg = IdxReg; - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); - BaseReg = NewBaseReg; + BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0); } } @@ -1119,54 +1115,51 @@ bool IRTranslator::translateGetElementPtr(const User &U, return true; } -bool IRTranslator::translateMemfunc(const CallInst &CI, +bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - unsigned ID) { + Intrinsic::ID ID) { // If the source is undef, then just emit a nop. - if (isa<UndefValue>(CI.getArgOperand(1))) { - switch (ID) { - case Intrinsic::memmove: - case Intrinsic::memcpy: - case Intrinsic::memset: - return true; - default: - break; - } - } - - LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL); - Type *DstTy = CI.getArgOperand(0)->getType(); - if (cast<PointerType>(DstTy)->getAddressSpace() != 0 || - SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0)) - return false; + if (isa<UndefValue>(CI.getArgOperand(1))) + return true; - SmallVector<CallLowering::ArgInfo, 8> Args; - for (int i = 0; i < 3; ++i) { - const auto &Arg = CI.getArgOperand(i); - Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType()); + ArrayRef<Register> Res; + auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true); + for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) + ICall.addUse(getOrCreateVReg(**AI)); + + unsigned DstAlign = 0, SrcAlign = 0; + unsigned IsVol = + cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1)) + ->getZExtValue(); + + if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) { + DstAlign = std::max<unsigned>(MCI->getDestAlignment(), 1); + SrcAlign = std::max<unsigned>(MCI->getSourceAlignment(), 1); + } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) { + DstAlign = std::max<unsigned>(MMI->getDestAlignment(), 1); + SrcAlign = std::max<unsigned>(MMI->getSourceAlignment(), 1); + } else { + auto *MSI = cast<MemSetInst>(&CI); + DstAlign = std::max<unsigned>(MSI->getDestAlignment(), 1); } - const char *Callee; - switch (ID) { - case Intrinsic::memmove: - case Intrinsic::memcpy: { - Type *SrcTy = CI.getArgOperand(1)->getType(); - if(cast<PointerType>(SrcTy)->getAddressSpace() != 0) - return false; - Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove"; - break; - } - case Intrinsic::memset: - Callee = "memset"; - break; - default: - return false; - } + // We need to propagate the tail call flag from the IR inst as an argument. + // Otherwise, we have to pessimize and assume later that we cannot tail call + // any memory intrinsics. + ICall.addImm(CI.isTailCall() ? 1 : 0); - return CLI->lowerCall(MIRBuilder, CI.getCallingConv(), - MachineOperand::CreateES(Callee), - CallLowering::ArgInfo({0}, CI.getType()), Args); + // Create mem operands to store the alignment and volatile info. + auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + ICall.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(CI.getArgOperand(0)), + MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); + if (ID != Intrinsic::memset) + ICall.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(CI.getArgOperand(1)), + MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); + + return true; } void IRTranslator::getStackGuard(Register DstReg, @@ -1186,7 +1179,7 @@ void IRTranslator::getStackGuard(Register DstReg, MachineMemOperand::MODereferenceable; MachineMemOperand *MemRef = MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, - DL->getPointerABIAlignment(0)); + DL->getPointerABIAlignment(0).value()); MIB.setMemRefs({MemRef}); } @@ -1208,6 +1201,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { break; case Intrinsic::bswap: return TargetOpcode::G_BSWAP; + case Intrinsic::bitreverse: + return TargetOpcode::G_BITREVERSE; case Intrinsic::ceil: return TargetOpcode::G_FCEIL; case Intrinsic::cos: @@ -1383,16 +1378,17 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression()); + MIRBuilder.buildDirectDbgValue(0, DI.getVariable(), DI.getExpression()); } else if (const auto *CI = dyn_cast<Constant>(V)) { MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); } else { - Register Reg = getOrCreateVReg(*V); - // FIXME: This does not handle register-indirect values at offset 0. The - // direct/indirect thing shouldn't really be handled by something as - // implicit as reg+noreg vs reg+imm in the first palce, but it seems - // pretty baked in right now. - MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); + for (Register Reg : getOrCreateVRegs(*V)) { + // FIXME: This does not handle register-indirect values at offset 0. The + // direct/indirect thing shouldn't really be handled by something as + // implicit as reg+noreg vs reg+imm in the first place, but it seems + // pretty baked in right now. + MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); + } } return true; } @@ -1433,7 +1429,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: - return translateMemfunc(CI, MIRBuilder, ID); + return translateMemFunc(CI, MIRBuilder, ID); case Intrinsic::eh_typeid_for: { GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); Register Reg = getOrCreateVReg(CI); @@ -1441,18 +1437,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MIRBuilder.buildConstant(Reg, TypeID); return true; } - case Intrinsic::objectsize: { - // If we don't know by now, we're never going to know. - const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1)); + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); - MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0); - return true; - } case Intrinsic::is_constant: - // If this wasn't constant-folded away by now, then it's not a - // constant. - MIRBuilder.buildConstant(getOrCreateVReg(CI), 0); - return true; + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + case Intrinsic::stackguard: getStackGuard(getOrCreateVReg(CI), MIRBuilder); return true; @@ -1551,6 +1541,46 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI, return true; } +bool IRTranslator::translateCallSite(const ImmutableCallSite &CS, + MachineIRBuilder &MIRBuilder) { + const Instruction &I = *CS.getInstruction(); + ArrayRef<Register> Res = getOrCreateVRegs(I); + + SmallVector<ArrayRef<Register>, 8> Args; + Register SwiftInVReg = 0; + Register SwiftErrorVReg = 0; + for (auto &Arg : CS.args()) { + if (CLI->supportSwiftError() && isSwiftError(Arg)) { + assert(SwiftInVReg == 0 && "Expected only one swift error argument"); + LLT Ty = getLLTForType(*Arg->getType(), *DL); + SwiftInVReg = MRI->createGenericVirtualRegister(Ty); + MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( + &I, &MIRBuilder.getMBB(), Arg)); + Args.emplace_back(makeArrayRef(SwiftInVReg)); + SwiftErrorVReg = + SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); + continue; + } + Args.push_back(getOrCreateVRegs(*Arg)); + } + + // We don't set HasCalls on MFI here yet because call lowering may decide to + // optimize into tail calls. Instead, we defer that to selection where a final + // scan is done to check if any instructions are calls. + bool Success = + CLI->lowerCall(MIRBuilder, CS, Res, Args, SwiftErrorVReg, + [&]() { return getOrCreateVReg(*CS.getCalledValue()); }); + + // Check if we just inserted a tail call. + if (Success) { + assert(!HasTailCall && "Can't tail call return twice from block?"); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt())); + } + + return Success; +} + bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const CallInst &CI = cast<CallInst>(U); auto TII = MF->getTarget().getIntrinsicInfo(); @@ -1570,34 +1600,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F)); } - if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { - ArrayRef<Register> Res = getOrCreateVRegs(CI); - - SmallVector<ArrayRef<Register>, 8> Args; - Register SwiftInVReg = 0; - Register SwiftErrorVReg = 0; - for (auto &Arg: CI.arg_operands()) { - if (CLI->supportSwiftError() && isSwiftError(Arg)) { - assert(SwiftInVReg == 0 && "Expected only one swift error argument"); - LLT Ty = getLLTForType(*Arg->getType(), *DL); - SwiftInVReg = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( - &CI, &MIRBuilder.getMBB(), Arg)); - Args.emplace_back(makeArrayRef(SwiftInVReg)); - SwiftErrorVReg = - SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg); - continue; - } - Args.push_back(getOrCreateVRegs(*Arg)); - } - - MF->getFrameInfo().setHasCalls(true); - bool Success = - CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*CI.getCalledValue()); }); - - return Success; - } + if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) + return translateCallSite(&CI, MIRBuilder); assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); @@ -1615,14 +1619,29 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (isa<FPMathOperator>(CI)) MIB->copyIRFlags(CI); - for (auto &Arg : CI.arg_operands()) { + for (auto &Arg : enumerate(CI.arg_operands())) { // Some intrinsics take metadata parameters. Reject them. - if (isa<MetadataAsValue>(Arg)) - return false; - ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg); - if (VRegs.size() > 1) + if (isa<MetadataAsValue>(Arg.value())) return false; - MIB.addUse(VRegs[0]); + + // If this is required to be an immediate, don't materialize it in a + // register. + if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) { + // imm arguments are more convenient than cimm (and realistically + // probably sufficient), so use them. + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + MIB.addImm(CI->getSExtValue()); + } else { + MIB.addFPImm(cast<ConstantFP>(Arg.value())); + } + } else { + ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value()); + if (VRegs.size() > 1) + return false; + MIB.addUse(VRegs[0]); + } } // Add a MachineMemOperand if it is a target mem intrinsic. @@ -1630,13 +1649,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { - unsigned Align = Info.align; - if (Align == 0) - Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())); + MaybeAlign Align = Info.align; + if (!Align) + Align = MaybeAlign( + DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()))); uint64_t Size = Info.memVT.getStoreSize(); - MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, Size, Align)); + MIB.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(Info.ptrVal), Info.flags, Size, Align->value())); } return true; @@ -1672,30 +1692,7 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - ArrayRef<Register> Res; - if (!I.getType()->isVoidTy()) - Res = getOrCreateVRegs(I); - SmallVector<ArrayRef<Register>, 8> Args; - Register SwiftErrorVReg = 0; - Register SwiftInVReg = 0; - for (auto &Arg : I.arg_operands()) { - if (CLI->supportSwiftError() && isSwiftError(Arg)) { - assert(SwiftInVReg == 0 && "Expected only one swift error argument"); - LLT Ty = getLLTForType(*Arg->getType(), *DL); - SwiftInVReg = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( - &I, &MIRBuilder.getMBB(), Arg)); - Args.push_back(makeArrayRef(SwiftInVReg)); - SwiftErrorVReg = - SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); - continue; - } - - Args.push_back(getOrCreateVRegs(*Arg)); - } - - if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*I.getCalledValue()); })) + if (!translateCallSite(&I, MIRBuilder)) return false; MCSymbol *EndSymbol = Context.createTempSymbol(); @@ -1811,36 +1808,25 @@ bool IRTranslator::translateAlloca(const User &U, Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); Register TySize = - getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty))); + getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty))); MIRBuilder.buildMul(AllocSize, NumElts, TySize); - LLT PtrTy = getLLTForType(*AI.getType(), *DL); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); - - Register SPTmp = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildCopy(SPTmp, SPReg); - - Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize); - - // Handle alignment. We have to realign if the allocation granule was smaller - // than stack alignment, or the specific alloca requires more than stack - // alignment. unsigned StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlignment(); - Align = std::max(Align, StackAlign); - if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) { - // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. This doesn't overflow because we're computing - // an address inside an alloca. - Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align)); - AllocTmp = AlignedAlloc; - } + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign - 1); + auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne, + MachineInstr::NoUWrap); + auto AlignCst = + MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign - 1)); + auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst); - MIRBuilder.buildCopy(SPReg, AllocTmp); - MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp); + MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Align); MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI); assert(MF->getFrameInfo().hasVarSizedObjects()); @@ -1926,7 +1912,7 @@ bool IRTranslator::translateShuffleVector(const User &U, .addDef(getOrCreateVReg(U)) .addUse(getOrCreateVReg(*U.getOperand(0))) .addUse(getOrCreateVReg(*U.getOperand(1))) - .addUse(getOrCreateVReg(*U.getOperand(2))); + .addShuffleMask(cast<Constant>(U.getOperand(2))); return true; } @@ -1991,7 +1977,6 @@ bool IRTranslator::translateAtomicRMW(const User &U, unsigned Opcode = 0; switch (I.getOperation()) { default: - llvm_unreachable("Unknown atomicrmw op"); return false; case AtomicRMWInst::Xchg: Opcode = TargetOpcode::G_ATOMICRMW_XCHG; @@ -2026,6 +2011,12 @@ bool IRTranslator::translateAtomicRMW(const User &U, case AtomicRMWInst::UMin: Opcode = TargetOpcode::G_ATOMICRMW_UMIN; break; + case AtomicRMWInst::FAdd: + Opcode = TargetOpcode::G_ATOMICRMW_FADD; + break; + case AtomicRMWInst::FSub: + Opcode = TargetOpcode::G_ATOMICRMW_FSUB; + break; } MIRBuilder.buildAtomicRMW( @@ -2197,6 +2188,20 @@ void IRTranslator::finalizeFunction() { FuncInfo.clear(); } +/// Returns true if a BasicBlock \p BB within a variadic function contains a +/// variadic musttail call. +static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) { + if (!IsVarArg) + return false; + + // Walk the block backwards, because tail calls usually only appear at the end + // of a block. + return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) { + const auto *CI = dyn_cast<CallInst>(&I); + return CI && CI->isMustTailCall(); + }); +} + bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF = &CurMF; const Function &F = MF->getFunction(); @@ -2212,26 +2217,26 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { : TPC->isGISelCSEEnabled(); if (EnableCSE) { - EntryBuilder = make_unique<CSEMIRBuilder>(CurMF); + EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF); CSEInfo = &Wrapper.get(TPC->getCSEConfig()); EntryBuilder->setCSEInfo(CSEInfo); - CurBuilder = make_unique<CSEMIRBuilder>(CurMF); + CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF); CurBuilder->setCSEInfo(CSEInfo); } else { - EntryBuilder = make_unique<MachineIRBuilder>(); - CurBuilder = make_unique<MachineIRBuilder>(); + EntryBuilder = std::make_unique<MachineIRBuilder>(); + CurBuilder = std::make_unique<MachineIRBuilder>(); } CLI = MF->getSubtarget().getCallLowering(); CurBuilder->setMF(*MF); EntryBuilder->setMF(*MF); MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); - ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F); + ORE = std::make_unique<OptimizationRemarkEmitter>(&F); FuncInfo.MF = MF; FuncInfo.BPI = nullptr; const auto &TLI = *MF->getSubtarget().getTargetLowering(); const TargetMachine &TM = MF->getTarget(); - SL = make_unique<GISelSwitchLowering>(this, FuncInfo); + SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo); SL->init(TLI, TM, *DL); EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); @@ -2258,6 +2263,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { SwiftError.setFunction(CurMF); SwiftError.createEntriesInEntryBlock(DbgLoc); + bool IsVarArg = F.isVarArg(); + bool HasMustTailInVarArgFn = false; + // Create all blocks, in IR order, to preserve the layout. for (const BasicBlock &BB: F) { auto *&MBB = BBToMBB[&BB]; @@ -2267,8 +2275,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { if (BB.hasAddressTaken()) MBB->setHasAddressTaken(); + + if (!HasMustTailInVarArgFn) + HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB); } + MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn); + // Make our arguments/constants entry block fallthrough to the IR entry block. EntryBB->addSuccessor(&getMBB(F.front())); @@ -2286,18 +2299,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - // We don't currently support translating swifterror or swiftself functions. - for (auto &Arg : F.args()) { - if (Arg.hasSwiftSelfAttr()) { - OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - F.getSubprogram(), &F.getEntryBlock()); - R << "unable to lower arguments due to swiftself: " - << ore::NV("Prototype", F.getType()); - reportTranslationError(*MF, *TPC, *ORE, R); - return false; - } - } - if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); @@ -2322,8 +2323,15 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Set the insertion point of all the following translations to // the end of this basic block. CurBuilder->setMBB(MBB); - + HasTailCall = false; for (const Instruction &Inst : *BB) { + // If we translated a tail call in the last step, then we know + // everything after the call is either a return, or something that is + // handled by the call itself. (E.g. a lifetime marker or assume + // intrinsic.) In this case, we should stop translating the block and + // move on. + if (HasTailCall) + break; #ifndef NDEBUG Verifier.setCurrentInst(&Inst); #endif // ifndef NDEBUG diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 70694fe6b6c8..7c4fd2d140d3 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -12,11 +12,14 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -45,6 +48,7 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) @@ -53,6 +57,8 @@ InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { } void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); + AU.addRequired<GISelKnownBitsAnalysis>(); + AU.addPreserved<GISelKnownBitsAnalysis>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -64,11 +70,13 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); + GISelKnownBits &KB = getAnalysis<GISelKnownBitsAnalysis>().get(MF); const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); - const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); CodeGenCoverage CoverageInfo; assert(ISel && "Cannot work without InstructionSelector"); + ISel->setupMF(MF, KB, CoverageInfo); // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); @@ -124,7 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; } - if (!ISel->select(MI, CoverageInfo)) { + if (!ISel->select(MI)) { // FIXME: It would be nice to dump all inserted instructions. It's // not obvious how, esp. considering select() can insert after MI. reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI); @@ -159,10 +167,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { --MII; if (MI.getOpcode() != TargetOpcode::COPY) continue; - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + if (Register::isVirtualRegister(SrcReg) && + Register::isVirtualRegister(DstReg)) { auto SrcRC = MRI.getRegClass(SrcReg); auto DstRC = MRI.getRegClass(DstReg); if (SrcRC == DstRC) { @@ -179,7 +187,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // that the size of the now-constrained vreg is unchanged and that it has a // register class. for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(I); + unsigned VReg = Register::index2VirtReg(I); MachineInstr *MI = nullptr; if (!MRI.def_empty(VReg)) @@ -217,6 +225,22 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { auto &TLI = *MF.getSubtarget().getTargetLowering(); TLI.finalizeLowering(MF); + // Determine if there are any calls in this machine function. Ported from + // SelectionDAG. + MachineFrameInfo &MFI = MF.getFrameInfo(); + for (const auto &MBB : MF) { + if (MFI.hasCalls() && MF.hasInlineAsm()) + break; + + for (const auto &MI : MBB) { + if ((MI.isCall() && !MI.isReturn()) || MI.isStackAligningInlineAsm()) + MFI.setHasCalls(true); + if (MI.isInlineAsm()) + MF.setHasInlineAsm(true); + } + } + + LLVM_DEBUG({ dbgs() << "Rules covered by selecting function: " << MF.getName() << ":"; for (auto RuleID : CoverageInfo.covered()) diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 2ad35b3a72c9..28143b30d4e8 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -79,5 +79,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, return true; return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && - !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands()); + !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty(); } diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index b5b26bff34bb..1593e21fe07e 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -184,11 +184,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { : TPC.isGISelCSEEnabled(); if (EnableCSE) { - MIRBuilder = make_unique<CSEMIRBuilder>(); + MIRBuilder = std::make_unique<CSEMIRBuilder>(); CSEInfo = &Wrapper.get(TPC.getCSEConfig()); MIRBuilder->setCSEInfo(CSEInfo); } else - MIRBuilder = make_unique<MachineIRBuilder>(); + MIRBuilder = std::make_unique<MachineIRBuilder>(); // This observer keeps the worklist updated. LegalizerWorkListManager WorkListObserver(InstList, ArtifactList); // We want both WorkListObserver as well as CSEInfo to observe all changes. @@ -206,8 +206,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) { WrapperObserver.erasingInstr(*DeadMI); }; + auto stopLegalizing = [&](MachineInstr &MI) { + Helper.MIRBuilder.stopObservingChanges(); + reportGISelFailure(MF, TPC, MORE, "gisel-legalize", + "unable to legalize instruction", MI); + }; bool Changed = false; + SmallVector<MachineInstr *, 128> RetryList; do { + assert(RetryList.empty() && "Expected no instructions in RetryList"); + unsigned NumArtifacts = ArtifactList.size(); while (!InstList.empty()) { MachineInstr &MI = *InstList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); @@ -222,14 +230,31 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // Error out if we couldn't legalize this instruction. We may want to // fall back to DAG ISel instead in the future. if (Res == LegalizerHelper::UnableToLegalize) { - Helper.MIRBuilder.stopObservingChanges(); - reportGISelFailure(MF, TPC, MORE, "gisel-legalize", - "unable to legalize instruction", MI); + // Move illegal artifacts to RetryList instead of aborting because + // legalizing InstList may generate artifacts that allow + // ArtifactCombiner to combine away them. + if (isArtifact(MI)) { + RetryList.push_back(&MI); + continue; + } + stopLegalizing(MI); return false; } WorkListObserver.printNewInstrs(); Changed |= Res == LegalizerHelper::Legalized; } + // Try to combine the instructions in RetryList again if there + // are new artifacts. If not, stop legalizing. + if (!RetryList.empty()) { + if (ArtifactList.size() > NumArtifacts) { + while (!RetryList.empty()) + ArtifactList.insert(RetryList.pop_back_val()); + } else { + MachineInstr *MI = *RetryList.begin(); + stopLegalizing(*MI); + return false; + } + } while (!ArtifactList.empty()) { MachineInstr &MI = *ArtifactList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f5cf7fc9bd9b..21512e543878 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -171,6 +172,26 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } +static LLT getGCDType(LLT OrigTy, LLT TargetTy) { + if (OrigTy.isVector() && TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy.getElementType()); + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigTy.getElementType()); + } + + if (OrigTy.isVector() && !TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy); + return TargetTy; + } + + assert(!OrigTy.isVector() && !TargetTy.isVector()); + + int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), + TargetTy.getSizeInBits()); + return LLT::scalar(GCD); +} + void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef<Register> PartRegs, @@ -219,11 +240,29 @@ void LegalizerHelper::insertParts(Register DstReg, static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::SDIV_I32; + case 64: + return RTLIB::SDIV_I64; + case 128: + return RTLIB::SDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_UDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::UDIV_I32; + case 64: + return RTLIB::UDIV_I64; + case 128: + return RTLIB::UDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_SREM: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; @@ -288,6 +327,35 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } +/// True if an instruction is in tail position in its caller. Intended for +/// legalizing libcalls as tail calls when possible. +static bool isLibCallInTailPosition(MachineInstr &MI) { + const Function &F = MI.getParent()->getParent()->getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore NoAlias and NonNull because they don't affect the + // call sequence. + AttributeList CallerAttrs = F.getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) + .removeAttribute(Attribute::NoAlias) + .removeAttribute(Attribute::NonNull) + .hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + return false; + + // Only tail call if the following instruction is a standard return. + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + MachineInstr *Next = MI.getNextNode(); + if (!Next || TII.isTailCall(*Next) || !Next->isReturn()) + return false; + + return true; +} + LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, @@ -296,9 +364,12 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); - MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), Result, Args)) + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(Libcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = Result; + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) return LegalizerHelper::UnableToLegalize; return LegalizerHelper::Legalized; @@ -317,6 +388,74 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Args); } +LegalizerHelper::LegalizeResult +llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + + SmallVector<CallLowering::ArgInfo, 3> Args; + // Add all the args, except for the last which is an imm denoting 'tail'. + for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) { + Register Reg = MI.getOperand(i).getReg(); + + // Need derive an IR type for call lowering. + LLT OpLLT = MRI.getType(Reg); + Type *OpTy = nullptr; + if (OpLLT.isPointer()) + OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); + else + OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); + Args.push_back({Reg, OpTy}); + } + + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID(); + RTLIB::Libcall RTLibcall; + switch (ID) { + case Intrinsic::memcpy: + RTLibcall = RTLIB::MEMCPY; + break; + case Intrinsic::memset: + RTLibcall = RTLIB::MEMSET; + break; + case Intrinsic::memmove: + RTLibcall = RTLIB::MEMMOVE; + break; + default: + return LegalizerHelper::UnableToLegalize; + } + const char *Name = TLI.getLibcallName(RTLibcall); + + MIRBuilder.setInstr(MI); + + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); + Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 && + isLibCallInTailPosition(MI); + + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) + return LegalizerHelper::UnableToLegalize; + + if (Info.LoweredTailCall) { + assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + // We must have a return following the call to get past + // isLibCallInTailPosition. + assert(MI.getNextNode() && MI.getNextNode()->isReturn() && + "Expected instr following MI to be a return?"); + + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + MI.getNextNode()->eraseFromParent(); + } + + return LegalizerHelper::Legalized; +} + static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType) { auto ToMVT = MVT::getVT(ToType); @@ -518,6 +657,65 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + // FIXME: support the general case where the requested NarrowTy may not be + // the same as the source type. E.g. s128 = sext(s32) + if ((SrcTy.getSizeInBits() != SizeOp0 / 2) || + SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) { + LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + // Shift the sign bit of the low register through the high register. + auto ShiftAmt = + MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1); + auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ZEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + uint64_t SizeOp1 = SrcTy.getSizeInBits(); + if (SizeOp0 % SizeOp1 != 0) + return UnableToLegalize; + + // Generate a merge where the bottom bits are taken from the source, and + // zero everything else. + Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0); + unsigned NumParts = SizeOp0 / SizeOp1; + SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()}; + for (unsigned Part = 1; Part < NumParts; ++Part) + Srcs.push_back(ZeroReg); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_TRUNC: { + if (TypeIdx != 1) + return UnableToLegalize; + + uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { + LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); + MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0)); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ADD: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. @@ -530,15 +728,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildConstant(CarryIn, 0); - + Register CarryIn; for (int i = 0; i < NumParts; ++i) { Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], - Src2Regs[i], CarryIn); + if (i == 0) + MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]); + else { + MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], + Src2Regs[i], CarryIn); + } DstRegs.push_back(DstReg); CarryIn = CarryOut; @@ -730,7 +930,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, for (unsigned j = 1; j < MI.getNumOperands(); j += 2) MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); } - MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); Observer.changedInstr(MI); MI.eraseFromParent(); @@ -763,6 +963,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, CmpInst::Predicate Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); @@ -771,18 +972,109 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); } else { - const LLT s1 = LLT::scalar(1); - MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH); + MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = - MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH); + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( - ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL); + ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); } Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT_INREG: { + if (TypeIdx != 0) + return UnableToLegalize; + + if (!MI.getOperand(2).isImm()) + return UnableToLegalize; + int64_t SizeInBits = MI.getOperand(2).getImm(); + + // So long as the new type has more bits than the bits we're extending we + // don't need to break it apart. + if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { + Observer.changingInstr(MI); + // We don't lose any non-extension bits by truncating the src and + // sign-extending the dst. + MachineOperand &MO1 = MI.getOperand(1); + auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg()); + MO1.setReg(TruncMIB->getOperand(0).getReg()); + + MachineOperand &MO2 = MI.getOperand(0); + Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt}); + MO2.setReg(DstExt); + Observer.changedInstr(MI); + return Legalized; + } + + // Break it apart. Components below the extension point are unmodified. The + // component containing the extension point becomes a narrower SEXT_INREG. + // Components above it are ashr'd from the component containing the + // extension point. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; + + // List the registers where the destination will be scattered. + SmallVector<Register, 2> DstRegs; + // List the registers where the source will be split. + SmallVector<Register, 2> SrcRegs; + + // Create all the temporary registers. + for (int i = 0; i < NumParts; ++i) { + Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); + + SrcRegs.push_back(SrcReg); + } + + // Explode the big arguments into smaller chunks. + MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg()); + + Register AshrCstReg = + MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) + ->getOperand(0) + .getReg(); + Register FullExtensionReg = 0; + Register PartialExtensionReg = 0; + + // Do the operation on each small part. + for (int i = 0; i < NumParts; ++i) { + if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) + DstRegs.push_back(SrcRegs[i]); + else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { + assert(PartialExtensionReg && + "Expected to visit partial extension before full"); + if (FullExtensionReg) { + DstRegs.push_back(FullExtensionReg); + continue; + } + DstRegs.push_back(MIRBuilder + .buildInstr(TargetOpcode::G_ASHR, {NarrowTy}, + {PartialExtensionReg, AshrCstReg}) + ->getOperand(0) + .getReg()); + FullExtensionReg = DstRegs.back(); + } else { + DstRegs.push_back( + MIRBuilder + .buildInstr( + TargetOpcode::G_SEXT_INREG, {NarrowTy}, + {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) + ->getOperand(0) + .getReg()); + PartialExtensionReg = DstRegs.back(); + } + } + + // Gather the destination registers into the final destination. + Register DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } } } @@ -892,7 +1184,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); - Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : + Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : MRI.createGenericVirtualRegister(WideTy); auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); @@ -903,6 +1195,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, if (WideSize > DstSize) MIRBuilder.buildTrunc(DstReg, ResultReg); + else if (DstTy.isPointer()) + MIRBuilder.buildIntToPtr(DstReg, ResultReg); MI.eraseFromParent(); return Legalized; @@ -1218,6 +1512,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_BITREVERSE: { + Observer.changingInstr(MI); + + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); + + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + MI.getOperand(0).setReg(DstExt); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); + auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); + MIRBuilder.buildTrunc(DstReg, Shift); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -1310,13 +1622,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: - if (TypeIdx != 0) - return UnableToLegalize; Observer.changingInstr(MI); - widenScalarDst(MI, WideTy); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_SITOFP: if (TypeIdx != 1) return UnableToLegalize; @@ -1483,6 +1797,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FMUL: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: case TargetOpcode::G_FCANONICALIZE: @@ -1553,6 +1868,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_SEXT_INREG: + if (TypeIdx != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); + Observer.changedInstr(MI); + return Legalized; } } @@ -1579,6 +1903,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: + return lowerSADDO_SSUBO(MI); case TargetOpcode::G_SMULO: case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the @@ -1669,6 +1996,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_FMAD: + return lowerFMad(MI); case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -1690,11 +2019,57 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { LLT DstTy = MRI.getType(DstReg); auto &MMO = **MI.memoperands_begin(); - if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { - // In the case of G_LOAD, this was a non-extending load already and we're - // about to lower to the same instruction. - if (MI.getOpcode() == TargetOpcode::G_LOAD) + if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 load (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + auto LargeLoad = + MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO); + + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), + *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } MIRBuilder.buildLoad(DstReg, PtrReg, MMO); MI.eraseFromParent(); return Legalized; @@ -1723,6 +2098,51 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } + case TargetOpcode::G_STORE: { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store. + // We do this by first extending the stored value to the next largest power + // of 2 type, and then using truncating stores to store the components. + // By doing this, likewise with G_LOAD, generate an extend that can be + // artifact-combined away instead of leaving behind extracts. + Register SrcReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Extend to the next pow-2. + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + + // Obtain the smaller value by shifting away the larger value. + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + + // Generate the GEP and truncating stores. + LLT PtrTy = MRI.getType(PtrReg); + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -1797,6 +2217,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerUITOFP(MI, TypeIdx, Ty); case G_SITOFP: return lowerSITOFP(MI, TypeIdx, Ty); + case G_FPTOUI: + return lowerFPTOUI(MI, TypeIdx, Ty); case G_SMIN: case G_SMAX: case G_UMIN: @@ -1807,6 +2229,31 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); + case G_UNMERGE_VALUES: + return lowerUnmergeValues(MI); + case TargetOpcode::G_SEXT_INREG: { + assert(MI.getOperand(2).isImm() && "Expected immediate"); + int64_t SizeInBits = MI.getOperand(2).getImm(); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register TmpRes = MRI.createGenericVirtualRegister(DstTy); + + auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); + MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()}); + MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()}); + MI.eraseFromParent(); + return Legalized; + } + case G_SHUFFLE_VECTOR: + return lowerShuffleVector(MI); + case G_DYN_STACKALLOC: + return lowerDynStackAlloc(MI); + case G_EXTRACT: + return lowerExtract(MI); + case G_INSERT: + return lowerInsert(MI); } } @@ -2283,6 +2730,105 @@ LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + const int NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // TODO: Create sequence of extracts. + if (DstTy == NarrowTy) + return UnableToLegalize; + + LLT GCDTy = getGCDType(SrcTy, NarrowTy); + if (DstTy == GCDTy) { + // This would just be a copy of the same unmerge. + // TODO: Create extracts, pad with undef and create intermediate merges. + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + const int NumUnmerge = Unmerge->getNumOperands() - 1; + const int PartsPerUnmerge = NumDst / NumUnmerge; + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) + MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); + MIB.addUse(Unmerge.getReg(I)); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + assert(TypeIdx == 0 && "not a vector type index"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = DstTy.getElementType(); + + int DstNumElts = DstTy.getNumElements(); + int NarrowNumElts = NarrowTy.getNumElements(); + int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; + LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); + + SmallVector<Register, 8> ConcatOps; + SmallVector<Register, 8> SubBuildVector; + + Register UndefReg; + if (WidenedDstTy != DstTy) + UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + + // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as + // necessary. + // + // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 + // -> <2 x s16> + // + // %4:_(s16) = G_IMPLICIT_DEF + // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 + // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 + // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 + // %3:_(<3 x s16>) = G_EXTRACT %7, 0 + for (int I = 0; I != NumConcat; ++I) { + for (int J = 0; J != NarrowNumElts; ++J) { + int SrcIdx = NarrowNumElts * I + J; + + if (SrcIdx < DstNumElts) { + Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); + SubBuildVector.push_back(SrcReg); + } else + SubBuildVector.push_back(UndefReg); + } + + auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); + ConcatOps.push_back(BuildVec.getReg(0)); + SubBuildVector.clear(); + } + + if (DstTy == WidenedDstTy) + MIRBuilder.buildConcatVectors(DstReg, ConcatOps); + else { + auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); + MIRBuilder.buildExtract(DstReg, Concat, 0); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. @@ -2395,6 +2941,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FDIV: case G_FREM: case G_FMA: + case G_FMAD: case G_FPOW: case G_FEXP: case G_FEXP2: @@ -2411,6 +2958,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FSIN: case G_FSQRT: case G_BSWAP: + case G_BITREVERSE: case G_SDIV: case G_SMIN: case G_SMAX: @@ -2453,6 +3001,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); case G_PHI: return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + case G_UNMERGE_VALUES: + return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); + case G_BUILD_VECTOR: + return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); @@ -2604,11 +3156,11 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, switch (MI.getOpcode()) { case TargetOpcode::G_SHL: { // Short: ShAmt < NewBitSize - auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); - auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); - auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. @@ -2622,41 +3174,25 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, ResultRegs[1] = Hi.getReg(0); break; } - case TargetOpcode::G_LSHR: { - // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); - - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); - - // Long: ShAmt >= NewBitSize - auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. - auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. - - auto Lo = MIRBuilder.buildSelect( - HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); - auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); - - ResultRegs[0] = Lo.getReg(0); - ResultRegs[1] = Hi.getReg(0); - break; - } + case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: { // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); + auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize - - // Sign of Hi part. - auto HiL = MIRBuilder.buildAShr( - HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); - - auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. + MachineInstrBuilder HiL; + if (MI.getOpcode() == TargetOpcode::G_LSHR) { + HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. + } else { + auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); + HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. + } + auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, + {InH, AmtExcess}); // Lo from Hi part. auto Lo = MIRBuilder.buildSelect( HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); @@ -2701,12 +3237,22 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, MIRBuilder.setInstr(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { - case TargetOpcode::G_IMPLICIT_DEF: { + case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_LOAD: { + if (TypeIdx != 0) + return UnableToLegalize; Observer.changingInstr(MI); moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_STORE: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: @@ -2748,6 +3294,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_UNMERGE_VALUES: { + if (TypeIdx != 1) + return UnableToLegalize; + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + int NumDst = MI.getNumOperands() - 1; + moreElementsVectorSrc(MI, MoreTy, NumDst); + + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + for (int I = 0; I != NumDst; ++I) + MIB.addDef(MI.getOperand(I).getReg()); + + int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); + for (int I = NumDst; I != NewNumDst; ++I) + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + + MIB.addUse(MI.getOperand(NumDst).getReg()); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_PHI: return moreElementsVectorPhi(MI, TypeIdx, MoreTy); default: @@ -3310,6 +3876,48 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + if (SrcTy != S64 && SrcTy != S32) + return UnableToLegalize; + if (DstTy != S32 && DstTy != S64) + return UnableToLegalize; + + // FPTOSI gives same result as FPTOUI for positive signed integers. + // FPTOUI needs to deal with fp values that convert to unsigned integers + // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. + + APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); + APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() + : APFloat::IEEEdouble(), + APInt::getNullValue(SrcTy.getSizeInBits())); + TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); + + MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); + + MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); + // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on + // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. + MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); + MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); + MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); + MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); + + MachineInstrBuilder FCMP = + MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold); + MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); + + MI.eraseFromParent(); + return Legalized; +} + static CmpInst::Predicate minMaxToCompare(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: @@ -3419,3 +4027,251 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { + // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned Flags = MI.getFlags(); + + auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), + Flags); + MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { + const unsigned NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst0Reg); + + + // Expand scalarizing unmerge as bitcast to integer and shift. + if (!DstTy.isVector() && SrcTy.isVector() && + SrcTy.getElementType() == DstTy) { + LLT IntTy = LLT::scalar(SrcTy.getSizeInBits()); + Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0); + + MIRBuilder.buildTrunc(Dst0Reg, Cast); + + const unsigned DstSize = DstTy.getSizeInBits(); + unsigned Offset = DstSize; + for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { + auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); + auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt); + MIRBuilder.buildTrunc(MI.getOperand(I), Shift); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0Reg = MI.getOperand(1).getReg(); + Register Src1Reg = MI.getOperand(2).getReg(); + LLT Src0Ty = MRI.getType(Src0Reg); + LLT DstTy = MRI.getType(DstReg); + LLT IdxTy = LLT::scalar(32); + + const Constant *ShufMask = MI.getOperand(3).getShuffleMask(); + + SmallVector<int, 32> Mask; + ShuffleVectorInst::getShuffleMask(ShufMask, Mask); + + if (DstTy.isScalar()) { + if (Src0Ty.isVector()) + return UnableToLegalize; + + // This is just a SELECT. + assert(Mask.size() == 1 && "Expected a single mask element"); + Register Val; + if (Mask[0] < 0 || Mask[0] > 1) + Val = MIRBuilder.buildUndef(DstTy).getReg(0); + else + Val = Mask[0] == 0 ? Src0Reg : Src1Reg; + MIRBuilder.buildCopy(DstReg, Val); + MI.eraseFromParent(); + return Legalized; + } + + Register Undef; + SmallVector<Register, 32> BuildVec; + LLT EltTy = DstTy.getElementType(); + + for (int Idx : Mask) { + if (Idx < 0) { + if (!Undef.isValid()) + Undef = MIRBuilder.buildUndef(EltTy).getReg(0); + BuildVec.push_back(Undef); + continue; + } + + if (Src0Ty.isScalar()) { + BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); + } else { + int NumElts = Src0Ty.getNumElements(); + Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; + int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; + auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); + auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); + BuildVec.push_back(Extract.getReg(0)); + } + } + + MIRBuilder.buildBuildVector(DstReg, BuildVec); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + unsigned Align = MI.getOperand(2).getImm(); + + const auto &MF = *MI.getMF(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + + LLT PtrTy = MRI.getType(Dst); + LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); + + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); + auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); + SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); + + // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't + // have to generate an extra instruction to negate the alloc and then use + // G_GEP to add the negative offset. + auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); + if (Align) { + APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true); + AlignMask.negate(); + auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); + Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); + } + + SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); + MIRBuilder.buildCopy(SPReg, SPTmp); + MIRBuilder.buildCopy(Dst, SPTmp); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerExtract(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + unsigned Offset = MI.getOperand(2).getImm(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (DstTy.isScalar() && + (SrcTy.isScalar() || + (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { + LLT SrcIntTy = SrcTy; + if (!SrcTy.isScalar()) { + SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); + } + + if (Offset == 0) + MIRBuilder.buildTrunc(Dst, Src); + else { + auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); + auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); + MIRBuilder.buildTrunc(Dst, Shr); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register InsertSrc = MI.getOperand(2).getReg(); + uint64_t Offset = MI.getOperand(3).getImm(); + + LLT DstTy = MRI.getType(Src); + LLT InsertTy = MRI.getType(InsertSrc); + + if (InsertTy.isScalar() && + (DstTy.isScalar() || + (DstTy.isVector() && DstTy.getElementType() == InsertTy))) { + LLT IntDstTy = DstTy; + if (!DstTy.isScalar()) { + IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0); + } + + Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); + if (Offset != 0) { + auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); + ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); + } + + APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset, + InsertTy.getSizeInBits()); + + auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); + auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); + auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + + MIRBuilder.buildBitcast(Dst, Or); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { + Register Dst0 = MI.getOperand(0).getReg(); + Register Dst1 = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; + + LLT Ty = MRI.getType(Dst0); + LLT BoolTy = MRI.getType(Dst1); + + if (IsAdd) + MIRBuilder.buildAdd(Dst0, LHS, RHS); + else + MIRBuilder.buildSub(Dst0, LHS, RHS); + + // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. + + auto Zero = MIRBuilder.buildConstant(Ty, 0); + + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + auto ResultLowerThanLHS = + MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); + auto ConditionRHS = MIRBuilder.buildICmp( + IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); + + MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); + MI.eraseFromParent(); + return Legalized; +} diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 6e1de95b3277..70045512fae5 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -215,7 +215,30 @@ bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const { return true; } const bool AllCovered = (FirstUncovered >= NumTypeIdxs); - LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + if (NumTypeIdxs > 0) + LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); + return AllCovered; +#else + return true; +#endif +} + +bool LegalizeRuleSet::verifyImmIdxsCoverage(unsigned NumImmIdxs) const { +#ifndef NDEBUG + if (Rules.empty()) { + LLVM_DEBUG( + dbgs() << ".. imm index coverage check SKIPPED: no rules defined\n"); + return true; + } + const int64_t FirstUncovered = ImmIdxsCovered.find_first_unset(); + if (FirstUncovered < 0) { + LLVM_DEBUG(dbgs() << ".. imm index coverage check SKIPPED:" + " user-defined predicate detected\n"); + return true; + } + const bool AllCovered = (FirstUncovered >= NumImmIdxs); + LLVM_DEBUG(dbgs() << ".. the first uncovered imm index: " << FirstUncovered << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); return AllCovered; #else @@ -387,8 +410,6 @@ unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const { LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias << "\n"); OpcodeIdx = getOpcodeIdxForOpcode(Alias); - LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to " - << RulesForOpcode[OpcodeIdx].getAlias() << "\n"); assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases"); } @@ -412,7 +433,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( std::initializer_list<unsigned> Opcodes) { unsigned Representative = *Opcodes.begin(); - assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && + assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && "Initializer list must have at least two opcodes"); for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) @@ -677,12 +698,23 @@ void LegalizerInfo::verify(const MCInstrInfo &MII) const { ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc) : Acc; }); + const unsigned NumImmIdxs = std::accumulate( + MCID.opInfo_begin(), MCID.opInfo_end(), 0U, + [](unsigned Acc, const MCOperandInfo &OpInfo) { + return OpInfo.isGenericImm() + ? std::max(OpInfo.getGenericImmIndex() + 1U, Acc) + : Acc; + }); LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode << "): " << NumTypeIdxs << " type ind" - << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n"); + << (NumTypeIdxs == 1 ? "ex" : "ices") << ", " + << NumImmIdxs << " imm ind" + << (NumImmIdxs == 1 ? "ex" : "ices") << "\n"); const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode); if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs)) FailedOpcodes.push_back(Opcode); + else if (!RuleSet.verifyImmIdxsCoverage(NumImmIdxs)) + FailedOpcodes.push_back(Opcode); } if (!FailedOpcodes.empty()) { errs() << "The following opcodes have ill-defined legalization rules:"; diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp index 3592409710a7..f882ecbf5db3 100644 --- a/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/lib/CodeGen/GlobalISel/Localizer.cpp @@ -79,7 +79,7 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) { return true; case TargetOpcode::G_GLOBAL_VALUE: { unsigned RematCost = TTI->getGISelRematGlobalCost(); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); unsigned MaxUses = maxUses(RematCost); if (MaxUses == UINT_MAX) return true; // Remats are "free" so always localize. @@ -121,7 +121,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LLVM_DEBUG(dbgs() << "Should localize: " << MI); assert(MI.getDesc().getNumDefs() == 1 && "More than one definition not supported yet"); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); // Check if all the users of MI are local. // We are going to invalidation the list of use operands, so we // can't use range iterator. @@ -151,7 +151,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedMI); // Set a new register for the definition. - unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); + Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); LocalizedMI->getOperand(0).setReg(NewReg); NewVRegIt = @@ -177,7 +177,7 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { // many users, but this case may be better served by regalloc improvements. for (MachineInstr *MI : LocalizedInstrs) { - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); MachineBasicBlock &MBB = *MI->getParent(); // All of the user MIs of this reg. SmallPtrSet<MachineInstr *, 32> Users; @@ -220,5 +220,6 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { LocalizedSetVecT LocalizedInstrs; bool Changed = localizeInterBlock(MF, LocalizedInstrs); - return Changed |= localizeIntraBlock(LocalizedInstrs); + Changed |= localizeIntraBlock(LocalizedInstrs); + return Changed; } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b7a73326b85c..df770f6664ca 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -107,9 +107,13 @@ MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable, assert( cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); + // DBG_VALUE insts now carry IR-level indirection in their DIExpression + // rather than encoding it in the instruction itself. + const DIExpression *DIExpr = cast<DIExpression>(Expr); + DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return insertInstr(BuildMI(getMF(), getDL(), getTII().get(TargetOpcode::DBG_VALUE), - /*IsIndirect*/ true, Reg, Variable, Expr)); + /*IsIndirect*/ false, Reg, Variable, DIExpr)); } MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, @@ -120,11 +124,15 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, assert( cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); + // DBG_VALUE insts now carry IR-level indirection in their DIExpression + // rather than encoding it in the instruction itself. + const DIExpression *DIExpr = cast<DIExpression>(Expr); + DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return buildInstr(TargetOpcode::DBG_VALUE) .addFrameIndex(FI) - .addImm(0) + .addReg(0) .addMetadata(Variable) - .addMetadata(Expr); + .addMetadata(DIExpr); } MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, @@ -148,7 +156,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, MIB.addReg(0U); } - return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); + return MIB.addReg(0).addMetadata(Variable).addMetadata(Expr); } MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { @@ -160,6 +168,17 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { return MIB.addMetadata(Label); } +MachineInstrBuilder MachineIRBuilder::buildDynStackAlloc(const DstOp &Res, + const SrcOp &Size, + unsigned Align) { + assert(Res.getLLTTy(*getMRI()).isPointer() && "expected ptr dst type"); + auto MIB = buildInstr(TargetOpcode::G_DYN_STACKALLOC); + Res.addDefToMIB(*getMRI(), MIB); + Size.addSrcToMIB(MIB); + MIB.addImm(Align); + return MIB; +} + MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res, int Idx) { assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type"); @@ -207,11 +226,7 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res, Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type"); - auto MIB = buildInstr(TargetOpcode::G_GEP); - Res.addDefToMIB(*getMRI(), MIB); - Op0.addSrcToMIB(MIB); - Op1.addSrcToMIB(MIB); - return MIB; + return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1}); } Optional<MachineInstrBuilder> @@ -697,17 +712,19 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional<unsigned> Flags) { - return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}); + return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags); } MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional<unsigned> Flags) { - return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}); + return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags); } MachineInstrBuilder @@ -774,26 +791,28 @@ MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr, .addMemOperand(&MMO); } -MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode, - Register OldValRes, - Register Addr, - Register Val, - MachineMemOperand &MMO) { +MachineInstrBuilder MachineIRBuilder::buildAtomicRMW( + unsigned Opcode, const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + #ifndef NDEBUG - LLT OldValResTy = getMRI()->getType(OldValRes); - LLT AddrTy = getMRI()->getType(Addr); - LLT ValTy = getMRI()->getType(Val); + LLT OldValResTy = OldValRes.getLLTTy(*getMRI()); + LLT AddrTy = Addr.getLLTTy(*getMRI()); + LLT ValTy = Val.getLLTTy(*getMRI()); assert(OldValResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); assert(ValTy.isValid() && "invalid operand type"); assert(OldValResTy == ValTy && "type mismatch"); + assert(MMO.isAtomic() && "not atomic mem operand"); #endif - return buildInstr(Opcode) - .addDef(OldValRes) - .addUse(Addr) - .addUse(Val) - .addMemOperand(&MMO); + auto MIB = buildInstr(Opcode); + OldValRes.addDefToMIB(*getMRI(), MIB); + Addr.addSrcToMIB(MIB); + Val.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; } MachineInstrBuilder @@ -865,6 +884,21 @@ MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr, } MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFAdd( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { return buildInstr(TargetOpcode::G_FENCE) .addImm(Ordering) @@ -1037,8 +1071,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, "input operands do not cover output register"); if (SrcOps.size() == 1) return buildCast(DstOps[0], SrcOps[0]); - if (DstOps[0].getLLTTy(*getMRI()).isVector()) - return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); + if (DstOps[0].getLLTTy(*getMRI()).isVector()) { + if (SrcOps[0].getLLTTy(*getMRI()).isVector()) + return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps); + } break; } case TargetOpcode::G_EXTRACT_VECTOR_ELT: { diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 42be88fcf947..f0e35c65c53b 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -92,7 +92,7 @@ void RegBankSelect::init(MachineFunction &MF) { MBPI = nullptr; } MIRBuilder.setMF(MF); - MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI); + MORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI); } void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { @@ -139,7 +139,7 @@ bool RegBankSelect::repairReg( "need new vreg for each breakdown"); // An empty range of new register means no repairing. - assert(!empty(NewVRegs) && "We should not have to repair"); + assert(!NewVRegs.empty() && "We should not have to repair"); MachineInstr *MI; if (ValMapping.NumBreakDowns == 1) { @@ -154,7 +154,7 @@ bool RegBankSelect::repairReg( std::swap(Src, Dst); assert((RepairPt.getNumInsertPoints() == 1 || - TargetRegisterInfo::isPhysicalRegister(Dst)) && + Register::isPhysicalRegister(Dst)) && "We are about to create several defs for Dst"); // Build the instruction used to repair, then clone it at the right @@ -398,7 +398,7 @@ void RegBankSelect::tryAvoidingSplit( // Check if this is a physical or virtual register. Register Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // We are going to split every outgoing edges. // Check that this is possible. // FIXME: The machine representation is currently broken @@ -687,8 +687,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { // iterator before hand. MachineInstr &MI = *MII++; - // Ignore target-specific instructions: they should use proper regclasses. - if (isTargetSpecificOpcode(MI.getOpcode())) + // Ignore target-specific post-isel instructions: they should use proper + // regclasses. + if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode()) continue; if (!assignInstr(MI)) { diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp index 4e41f338934d..fc9c802693ab 100644 --- a/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "registerbank" diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 159422e38878..3fcc55286beb 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -82,7 +82,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { const RegisterBank * RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI)); assert(Reg && "NoRegister does not have a register bank"); @@ -97,8 +97,7 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterClass & RegisterBankInfo::getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Reg must be a physreg"); + assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg"); const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); if (RegRCIt != PhysRegMinimalRCs.end()) return *RegRCIt->second; @@ -284,7 +283,7 @@ RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length, ++NumPartialMappingsCreated; auto &PartMapping = MapOfPartialMappings[Hash]; - PartMapping = llvm::make_unique<PartialMapping>(StartIdx, Length, RegBank); + PartMapping = std::make_unique<PartialMapping>(StartIdx, Length, RegBank); return *PartMapping; } @@ -318,7 +317,7 @@ RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown, ++NumValueMappingsCreated; auto &ValMapping = MapOfValueMappings[Hash]; - ValMapping = llvm::make_unique<ValueMapping>(BreakDown, NumBreakDowns); + ValMapping = std::make_unique<ValueMapping>(BreakDown, NumBreakDowns); return *ValMapping; } @@ -342,7 +341,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const { // mapping, because we use the pointer of the ValueMapping // to hash and we expect them to uniquely identify an instance // of value mapping. - Res = llvm::make_unique<ValueMapping[]>(std::distance(Begin, End)); + Res = std::make_unique<ValueMapping[]>(std::distance(Begin, End)); unsigned Idx = 0; for (Iterator It = Begin; It != End; ++It, ++Idx) { const ValueMapping *ValMap = *It; @@ -392,7 +391,7 @@ RegisterBankInfo::getInstructionMappingImpl( ++NumInstructionMappingsCreated; auto &InstrMapping = MapOfInstructionMappings[Hash]; - InstrMapping = llvm::make_unique<InstructionMapping>( + InstrMapping = std::make_unique<InstructionMapping>( ID, Cost, OperandsMapping, NumOperands); return *InstrMapping; } @@ -456,7 +455,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { "This mapping is too complex for this function"); iterator_range<SmallVectorImpl<Register>::const_iterator> NewRegs = OpdMapper.getVRegs(OpIdx); - if (empty(NewRegs)) { + if (NewRegs.empty()) { LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); continue; } @@ -489,7 +488,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { unsigned RegisterBankInfo::getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 766ea1d60bac..45618d7992ad 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -43,10 +43,9 @@ unsigned llvm::constrainOperandRegClass( const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx) { - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "PhysReg not implemented"); + assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass); // If we created a new virtual register because the class is not compatible @@ -73,10 +72,9 @@ unsigned llvm::constrainOperandRegClass( MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx) { - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "PhysReg not implemented"); + assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); // Some of the target independent instructions, like COPY, may not impose any @@ -130,9 +128,9 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n'); assert(MO.isReg() && "Unsupported non-reg operand"); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Physical registers don't need to be constrained. - if (TRI.isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; // Register operands with a value of 0 (e.g. predicate operands) don't need @@ -170,9 +168,8 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI.use_nodbg_empty(Reg)) + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg) || !MRI.use_nodbg_empty(Reg)) return false; } return true; @@ -219,11 +216,33 @@ Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg, } Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( - unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { + unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, + bool HandleFConstant) { SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes; MachineInstr *MI; - while ((MI = MRI.getVRegDef(VReg)) && - MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) { + auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { + return Opcode == TargetOpcode::G_CONSTANT || + (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT); + }; + auto GetImmediateValue = [HandleFConstant, + &MRI](const MachineInstr &MI) -> Optional<APInt> { + const MachineOperand &CstVal = MI.getOperand(1); + if (!CstVal.isImm() && !CstVal.isCImm() && + (!HandleFConstant || !CstVal.isFPImm())) + return None; + if (!CstVal.isFPImm()) { + unsigned BitWidth = + MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) + : CstVal.getCImm()->getValue(); + assert(Val.getBitWidth() == BitWidth && + "Value bitwidth doesn't match definition type"); + return Val; + } + return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + }; + while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && + LookThroughInstrs) { switch (MI->getOpcode()) { case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: @@ -235,7 +254,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( break; case TargetOpcode::COPY: VReg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VReg)) + if (Register::isPhysicalRegister(VReg)) return None; break; case TargetOpcode::G_INTTOPTR: @@ -245,16 +264,13 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( return None; } } - if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT || - (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm())) + if (!MI || !IsConstantOpcode(MI->getOpcode())) return None; - const MachineOperand &CstVal = MI->getOperand(1); - unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits(); - APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) - : CstVal.getCImm()->getValue(); - assert(Val.getBitWidth() == BitWidth && - "Value bitwidth doesn't match definition type"); + Optional<APInt> MaybeVal = GetImmediateValue(*MI); + if (!MaybeVal) + return None; + APInt &Val = *MaybeVal; while (!SeenOpcodes.empty()) { std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val(); switch (OpcodeAndSize.first) { @@ -291,7 +307,7 @@ llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, if (!DstTy.isValid()) return nullptr; while (DefMI->getOpcode() == TargetOpcode::COPY) { - unsigned SrcReg = DefMI->getOperand(1).getReg(); + Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); if (!SrcTy.isValid() || SrcTy != DstTy) break; @@ -395,6 +411,40 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, return false; } +Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, + uint64_t Imm, + const MachineRegisterInfo &MRI) { + auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + if (MaybeOp1Cst) { + LLT Ty = MRI.getType(Op1); + APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); + switch (Opcode) { + default: + break; + case TargetOpcode::G_SEXT_INREG: + return C1.trunc(Imm).sext(C1.getBitWidth()); + } + } + return None; +} + void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } + +MVT llvm::getMVTForLLT(LLT Ty) { + if (!Ty.isVector()) + return MVT::getIntegerVT(Ty.getSizeInBits()); + + return MVT::getVectorVT( + MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), + Ty.getNumElements()); +} + +LLT llvm::getLLTForMVT(MVT Ty) { + if (!Ty.isVector()) + return LLT::scalar(Ty.getSizeInBits()); + + return LLT::vector(Ty.getVectorNumElements(), + Ty.getVectorElementType().getSizeInBits()); +} diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 09201c2e7bae..d4fa45fcb405 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -456,14 +456,14 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, bool HasExternal = false; StringRef FirstExternalName; - unsigned MaxAlign = 1; + Align MaxAlign; unsigned CurIdx = 0; for (j = i; j != -1; j = GlobalSet.find_next(j)) { Type *Ty = Globals[j]->getValueType(); // Make sure we use the same alignment AsmPrinter would use. - unsigned Align = DL.getPreferredAlignment(Globals[j]); - unsigned Padding = alignTo(MergedSize, Align) - MergedSize; + Align Alignment(DL.getPreferredAlignment(Globals[j])); + unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize; MergedSize += Padding; MergedSize += DL.getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { @@ -478,7 +478,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, Inits.push_back(Globals[j]->getInitializer()); StructIdxs.push_back(CurIdx++); - MaxAlign = std::max(MaxAlign, Align); + MaxAlign = std::max(MaxAlign, Alignment); if (Globals[j]->hasExternalLinkage() && !HasExternal) { HasExternal = true; diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp index 5f57cabbe865..6a0f98d2e2b4 100644 --- a/lib/CodeGen/HardwareLoops.cpp +++ b/lib/CodeGen/HardwareLoops.cpp @@ -183,7 +183,7 @@ bool HardwareLoops::runOnFunction(Function &F) { TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - LibInfo = TLIP ? &TLIP->getTLI() : nullptr; + LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); M = F.getParent(); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index b17a253fe23f..d9caa5660695 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -285,14 +285,113 @@ namespace { Prediction); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, - unsigned TCycle, unsigned TExtra, - MachineBasicBlock &FBB, - unsigned FCycle, unsigned FExtra, - BranchProbability Prediction) const { - return TCycle > 0 && FCycle > 0 && - TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, - Prediction); + bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo, + MachineBasicBlock &CommBB, unsigned Dups, + BranchProbability Prediction, bool Forked) const { + const MachineFunction &MF = *TBBInfo.BB->getParent(); + if (MF.getFunction().hasMinSize()) { + MachineBasicBlock::iterator TIB = TBBInfo.BB->begin(); + MachineBasicBlock::iterator FIB = FBBInfo.BB->begin(); + MachineBasicBlock::iterator TIE = TBBInfo.BB->end(); + MachineBasicBlock::iterator FIE = FBBInfo.BB->end(); + + unsigned Dups1, Dups2; + if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TBBInfo.BB, *FBBInfo.BB, + /*SkipUnconditionalBranches*/ true)) + llvm_unreachable("should already have been checked by ValidDiamond"); + + unsigned BranchBytes = 0; + unsigned CommonBytes = 0; + + // Count common instructions at the start of the true and false blocks. + for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + + // Count instructions at the end of the true and false blocks, after + // the ones we plan to predicate. Analyzable branches will be removed + // (unless this is a forked diamond), and all other instructions are + // common between the two blocks. + for (auto &I : make_range(TIE, TBBInfo.BB->end())) { + if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : make_range(FIE, FBBInfo.BB->end())) { + if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : CommBB.terminators()) { + if (I.isBranch()) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } + } + + // The common instructions in one branch will be eliminated, halving + // their code size. + CommonBytes /= 2; + + // Count the instructions which we need to predicate. + unsigned NumPredicatedInstructions = 0; + for (auto &I : make_range(TIB, TIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + for (auto &I : make_range(FIB, FIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + + // Even though we're optimising for size at the expense of performance, + // avoid creating really long predicated blocks. + if (NumPredicatedInstructions > 15) + return false; + + // Some targets (e.g. Thumb2) need to insert extra instructions to + // start predicated blocks. + unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions( + MF, NumPredicatedInstructions); + + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes + << ", CommonBytes=" << CommonBytes + << ", NumPredicatedInstructions=" + << NumPredicatedInstructions + << ", ExtraPredicateBytes=" << ExtraPredicateBytes + << ")\n"); + return (BranchBytes + CommonBytes) > ExtraPredicateBytes; + } else { + unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups; + unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups; + bool Res = TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt( + *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB, + FCycle, FBBInfo.ExtraCost2, Prediction); + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle + << ", FCycle=" << FCycle + << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra=" + << FBBInfo.ExtraCost2 << ") = " << Res << "\n"); + return Res; + } } /// Returns true if Block ends without a terminator. @@ -356,8 +455,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. BranchFolder BF(true, false, MBFI, *MBPI); - BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(), - getAnalysisIfAvailable<MachineModuleInfo>()); + auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); + BFChange = BF.OptimizeFunction( + MF, TII, ST.getRegisterInfo(), + MMIWP ? &MMIWP->getMMI() : nullptr); } LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" @@ -496,8 +597,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (MadeChange && IfCvtBranchFold) { BranchFolder BF(false, false, MBFI, *MBPI); - BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable<MachineModuleInfo>()); + auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); + BF.OptimizeFunction( + MF, TII, MF.getSubtarget().getRegisterInfo(), + MMIWP ? &MMIWP->getMMI() : nullptr); } MadeChange |= BFChange; @@ -569,6 +672,9 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, BranchProbability Prediction) const { Dups = 0; + if (TrueBBI.BB == FalseBBI.BB) + return false; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -835,6 +941,8 @@ bool IfConverter::ValidForkedDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; @@ -892,6 +1000,8 @@ bool IfConverter::ValidDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; // The size is used to decide whether to if-convert, and the shared portions @@ -912,6 +1022,12 @@ void IfConverter::AnalyzeBranches(BBInfo &BBI) { BBI.BrCond.clear(); BBI.IsBrAnalyzable = !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + if (!BBI.IsBrAnalyzable) { + BBI.TrueBB = nullptr; + BBI.FalseBB = nullptr; + BBI.BrCond.clear(); + } + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); BBI.IsBrReversible = (RevCond.size() == 0) || !TII->reverseBranchCondition(RevCond); @@ -1173,13 +1289,9 @@ void IfConverter::AnalyzeBlock( if (CanRevCond) { BBInfo TrueBBICalc, FalseBBICalc; - auto feasibleDiamond = [&]() { - bool MeetsSize = MeetIfcvtSizeLimit( - *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + - TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, - *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + - FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, - Prediction); + auto feasibleDiamond = [&](bool Forked) { + bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB, + Dups + Dups2, Prediction, Forked); bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond, /* IsTriangle */ false, /* RevCond */ false, /* hasCommonTail */ true); @@ -1191,7 +1303,7 @@ void IfConverter::AnalyzeBlock( if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(false)) { // Diamond: // EBB // / \_ @@ -1200,14 +1312,14 @@ void IfConverter::AnalyzeBlock( // \ / // TailBB // Note TailBB can be empty. - Tokens.push_back(llvm::make_unique<IfcvtToken>( + Tokens.push_back(std::make_unique<IfcvtToken>( BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2, (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); Enqueued = true; } } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(true)) { // ForkedDiamond: // if TBB and FBB have a common tail that includes their conditional // branch instructions, then we can If Convert this pattern. @@ -1218,7 +1330,7 @@ void IfConverter::AnalyzeBlock( // / \ / \ // FalseBB TrueBB FalseBB // - Tokens.push_back(llvm::make_unique<IfcvtToken>( + Tokens.push_back(std::make_unique<IfcvtToken>( BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2, (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); Enqueued = true; @@ -1238,7 +1350,7 @@ void IfConverter::AnalyzeBlock( // | / // FBB Tokens.push_back( - llvm::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups)); + std::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups)); Enqueued = true; } @@ -1247,7 +1359,7 @@ void IfConverter::AnalyzeBlock( TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back( - llvm::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups)); + std::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } @@ -1263,7 +1375,7 @@ void IfConverter::AnalyzeBlock( // | // FBB Tokens.push_back( - llvm::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups)); + std::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups)); Enqueued = true; } @@ -1275,7 +1387,7 @@ void IfConverter::AnalyzeBlock( FalseBBI.NonPredSize + FalseBBI.ExtraCost, FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { - Tokens.push_back(llvm::make_unique<IfcvtToken>(BBI, ICTriangleFalse, + Tokens.push_back(std::make_unique<IfcvtToken>(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } @@ -1287,7 +1399,7 @@ void IfConverter::AnalyzeBlock( FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back( - llvm::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups)); + std::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } @@ -1297,7 +1409,7 @@ void IfConverter::AnalyzeBlock( FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back( - llvm::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups)); + std::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; } } @@ -1730,6 +1842,11 @@ bool IfConverter::IfConvertDiamondCommon( ++i; } while (NumDups1 != 0) { + // Since this instruction is going to be deleted, update call + // site info state if the instruction is call instruction. + if (DI2->isCall(MachineInstr::IgnoreBundle)) + MBB2.getParent()->eraseCallSiteInfo(&*DI2); + ++DI2; if (DI2 == MBB2.end()) break; @@ -1758,14 +1875,27 @@ bool IfConverter::IfConvertDiamondCommon( if (!BBI1->IsBrAnalyzable) verifySameBranchInstructions(&MBB1, &MBB2); #endif - BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB); - // Remove duplicated instructions. + // Remove duplicated instructions from the tail of MBB1: any branch + // instructions, and the common instructions counted by NumDups2. DI1 = MBB1.end(); + while (DI1 != MBB1.begin()) { + MachineBasicBlock::iterator Prev = std::prev(DI1); + if (!Prev->isBranch() && !Prev->isDebugInstr()) + break; + DI1 = Prev; + } for (unsigned i = 0; i != NumDups2; ) { // NumDups2 only counted non-dbg_value instructions, so this won't // run off the head of the list. assert(DI1 != MBB1.begin()); + --DI1; + + // Since this instruction is going to be deleted, update call + // site info state if the instruction is call instruction. + if (DI1->isCall(MachineInstr::IgnoreBundle)) + MBB1.getParent()->eraseCallSiteInfo(&*DI1); + // skip dbg_value instructions if (!DI1->isDebugInstr()) ++i; @@ -1815,7 +1945,7 @@ bool IfConverter::IfConvertDiamondCommon( for (const MachineOperand &MO : FI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { @@ -1983,7 +2113,7 @@ static bool MaySpeculate(const MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef() && !LaterRedefs.count(Reg)) @@ -2050,6 +2180,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, break; MachineInstr *MI = MF.CloneMachineInstr(&I); + // Make a copy of the call site info. + if (MI->isCall(MachineInstr::IgnoreBundle)) + MF.copyCallSiteInfo(&I,MI); + ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; unsigned ExtraPredCost = TII->getPredicationCost(I); diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 1e82ea659617..b7dcaec90106 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -278,12 +278,12 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A, if (!(MOA.isReg() && MOA.getReg())) continue; - unsigned RegA = MOA.getReg(); + Register RegA = MOA.getReg(); for (auto MOB : B->operands()) { if (!(MOB.isReg() && MOB.getReg())) continue; - unsigned RegB = MOB.getReg(); + Register RegB = MOB.getReg(); if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef())) return false; @@ -517,7 +517,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // // we must ensure that there are no instructions between the 'test' and // conditional jump that modify %rax. - const unsigned PointerReg = MBP.LHS.getReg(); + const Register PointerReg = MBP.LHS.getReg(); assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block"); @@ -689,7 +689,7 @@ void ImplicitNullChecks::rewriteNullChecks( for (const MachineOperand &MO : FaultingInstr->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg || MBB->isLiveIn(Reg)) continue; MBB->addLiveIn(Reg); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 41ae8061a917..2408f18678e4 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "LiveRangeCalc.h" #include "Spiller.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" @@ -26,6 +25,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -346,8 +346,7 @@ void InlineSpiller::collectRegsToSpill() { } bool InlineSpiller::isSibling(unsigned Reg) { - return TargetRegisterInfo::isVirtualRegister(Reg) && - VRM.getOriginal(Reg) == Original; + return Register::isVirtualRegister(Reg) && VRM.getOriginal(Reg) == Original; } /// It is beneficial to spill to earlier place in the same BB in case @@ -377,7 +376,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); #endif - unsigned SrcReg = CopyMI.getOperand(1).getReg(); + Register SrcReg = CopyMI.getOperand(1).getReg(); LiveInterval &SrcLI = LIS.getInterval(SrcReg); VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx); LiveQueryResult SrcQ = SrcLI.Query(Idx); @@ -845,9 +844,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) { if (!MO->isReg()) continue; - unsigned Reg = MO->getReg(); - if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || - MRI.isReserved(Reg)) { + Register Reg = MO->getReg(); + if (!Reg || Register::isVirtualRegister(Reg) || MRI.isReserved(Reg)) { continue; } // Skip non-Defs, including undef uses and internal reads. @@ -869,7 +867,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, --NumSpills; LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); if (MI->isCall()) - MI->getMF()->updateCallSiteInfo(MI, FoldMI); + MI->getMF()->moveCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); // Insert any new instructions other than FoldMI into the LIS maps. @@ -1111,8 +1109,8 @@ void InlineSpiller::spillAll() { void InlineSpiller::spill(LiveRangeEdit &edit) { ++NumSpilledRanges; Edit = &edit; - assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) - && "Trying to spill a stack slot."); + assert(!Register::isStackSlot(edit.getReg()) && + "Trying to spill a stack slot."); // Share a stack slot among all descendants of Original. Original = VRM.getOriginal(edit.getReg()); StackSlot = VRM.getStackSlot(Original); @@ -1147,7 +1145,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, // save a copy of LiveInterval in StackSlotToOrigLI because the original // LiveInterval may be cleared after all its references are spilled. if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) { - auto LI = llvm::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight); + auto LI = std::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight); LI->assign(OrigLI, Allocator); StackSlotToOrigLI[StackSlot] = std::move(LI); } @@ -1459,7 +1457,7 @@ void HoistSpillHelper::hoistAllSpills() { LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); unsigned Original = VRM.getPreSplitReg(Reg); if (!MRI.def_empty(Reg)) Virt2SiblingsMap[Original].insert(Reg); diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp index 9525da849e2a..770c4952d169 100644 --- a/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -940,8 +940,8 @@ public: /// \param V input value /// \param Result result polynomial static void computePolynomial(Value &V, Polynomial &Result) { - if (isa<BinaryOperator>(&V)) - computePolynomialBinOp(*dyn_cast<BinaryOperator>(&V), Result); + if (auto *BO = dyn_cast<BinaryOperator>(&V)) + computePolynomialBinOp(*BO, Result); else Result = Polynomial(&V); } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 886ae7e94adb..1c362aec6e67 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -96,14 +96,15 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) { /// addPassesToX helper drives creation and initialization of TargetPassConfig. static TargetPassConfig * addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM, - bool DisableVerify, MachineModuleInfo &MMI) { + bool DisableVerify, + MachineModuleInfoWrapperPass &MMIWP) { // Targets may override createPassConfig to provide a target-specific // subclass. TargetPassConfig *PassConfig = TM.createPassConfig(PM); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); PM.add(PassConfig); - PM.add(&MMI); + PM.add(&MMIWP); if (PassConfig->addISelPasses()) return nullptr; @@ -139,7 +140,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, std::unique_ptr<MCAsmBackend> MAB( getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); - auto FOut = llvm::make_unique<formatted_raw_ostream>(Out); + auto FOut = std::make_unique<formatted_raw_ostream>(Out); MCStreamer *S = getTarget().createAsmStreamer( Context, std::move(FOut), Options.MCOptions.AsmVerbose, Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE), @@ -186,17 +187,15 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, return false; } -bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, - CodeGenFileType FileType, - bool DisableVerify, - MachineModuleInfo *MMI) { +bool LLVMTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, + CodeGenFileType FileType, bool DisableVerify, + MachineModuleInfoWrapperPass *MMIWP) { // Add common CodeGen passes. - if (!MMI) - MMI = new MachineModuleInfo(this); + if (!MMIWP) + MMIWP = new MachineModuleInfoWrapperPass(this); TargetPassConfig *PassConfig = - addPassesToGenerateCode(*this, PM, DisableVerify, *MMI); + addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP); if (!PassConfig) return true; @@ -206,12 +205,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // testing to be meaningful, we need to ensure that the symbols created // are MCSymbolXCOFF variants, which requires that // the TargetLoweringObjectFile instance has been initialized. - MCContext &Ctx = MMI->getContext(); + MCContext &Ctx = MMIWP->getMMI().getContext(); const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering()) .Initialize(Ctx, *this); } PM.add(createPrintMIRPass(Out)); - } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext())) + } else if (addAsmPrinter(PM, Out, DwoOut, FileType, + MMIWP->getMMI().getContext())) return true; PM.add(createFreeMachineFunctionPass()); @@ -227,15 +227,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_pwrite_stream &Out, bool DisableVerify) { // Add common CodeGen passes. - MachineModuleInfo *MMI = new MachineModuleInfo(this); + MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(this); TargetPassConfig *PassConfig = - addPassesToGenerateCode(*this, PM, DisableVerify, *MMI); + addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP); if (!PassConfig) return true; assert(TargetPassConfig::willCompleteCodeGenPipeline() && "Cannot emit MC with limited codegen pipeline"); - Ctx = &MMI->getContext(); + Ctx = &MMIWP->getMMI().getContext(); if (Options.MCOptions.MCSaveTempLabels) Ctx->setAllowTemporaryLabels(false); diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 200ac0ba15bf..cef5085ae079 100644 --- a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -73,18 +73,18 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const { if (!MDT) { LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n"); - OwnedMDT = make_unique<MachineDominatorTree>(); + OwnedMDT = std::make_unique<MachineDominatorTree>(); OwnedMDT->getBase().recalculate(*MF); MDT = OwnedMDT.get(); } // Generate LoopInfo from it. - OwnedMLI = make_unique<MachineLoopInfo>(); + OwnedMLI = std::make_unique<MachineLoopInfo>(); OwnedMLI->getBase().analyze(MDT->getBase()); MLI = OwnedMLI.get(); } - OwnedMBFI = make_unique<MachineBlockFrequencyInfo>(); + OwnedMBFI = std::make_unique<MachineBlockFrequencyInfo>(); OwnedMBFI->calculate(*MF, MBPI, *MLI); return *OwnedMBFI.get(); } diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 503821537ed9..ac3ef0e709f3 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index a669e64692b9..f1b237d83e8c 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -7,14 +7,23 @@ //===----------------------------------------------------------------------===// /// /// This pass implements a data flow analysis that propagates debug location -/// information by inserting additional DBG_VALUE instructions into the machine -/// instruction stream. The pass internally builds debug location liveness -/// ranges to determine the points where additional DBG_VALUEs need to be -/// inserted. +/// information by inserting additional DBG_VALUE insts into the machine +/// instruction stream. Before running, each DBG_VALUE inst corresponds to a +/// source assignment of a variable. Afterwards, a DBG_VALUE inst specifies a +/// variable location for the current basic block (see SourceLevelDebugging.rst). /// /// This is a separate pass from DbgValueHistoryCalculator to facilitate /// testing and improve modularity. /// +/// Each variable location is represented by a VarLoc object that identifies the +/// source variable, its current machine-location, and the DBG_VALUE inst that +/// specifies the location. Each VarLoc is indexed in the (function-scope) +/// VarLocMap, giving each VarLoc a unique index. Rather than operate directly +/// on machine locations, the dataflow analysis in this pass identifies +/// locations by their index in the VarLocMap, meaning all the variable +/// locations in a block can be described by a sparse vector of VarLocMap +/// indexes. +/// //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" @@ -68,6 +77,7 @@ using namespace llvm; #define DEBUG_TYPE "livedebugvalues" STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); +STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed"); // If @MI is a DBG_VALUE with debug value described by a defined // register, returns the number of this register. In the other case, returns 0. @@ -179,8 +189,16 @@ private: } }; + /// Identity of the variable at this location. const DebugVariable Var; - const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE. + + /// The expression applied to this location. + const DIExpression *Expr; + + /// DBG_VALUE to clone var/expr information from if this location + /// is moved. + const MachineInstr &MI; + mutable UserValueScopes UVS; enum VarLocKind { InvalidKind = 0, @@ -201,9 +219,9 @@ private: const ConstantInt *CImm; } Loc; - VarLoc(const MachineInstr &MI, LexicalScopes &LS, - VarLocKind K = InvalidKind) - : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS){ + VarLoc(const MachineInstr &MI, LexicalScopes &LS) + : Var(MI), Expr(MI.getDebugExpression()), MI(MI), + UVS(MI.getDebugLoc(), LS) { static_assert((sizeof(Loc) == sizeof(uint64_t)), "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); @@ -225,17 +243,78 @@ private: "entry values must be register locations"); } - /// The constructor for spill locations. - VarLoc(const MachineInstr &MI, unsigned SpillBase, int SpillOffset, - LexicalScopes &LS) - : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS) { - assert(MI.isDebugValue() && "not a DBG_VALUE"); - assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); - Kind = SpillLocKind; - Loc.SpillLocation = {SpillBase, SpillOffset}; + /// Take the variable and machine-location in DBG_VALUE MI, and build an + /// entry location using the given expression. + static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS, + const DIExpression *EntryExpr) { + VarLoc VL(MI, LS); + VL.Kind = EntryValueKind; + VL.Expr = EntryExpr; + return VL; + } + + /// Copy the register location in DBG_VALUE MI, updating the register to + /// be NewReg. + static VarLoc CreateCopyLoc(const MachineInstr &MI, LexicalScopes &LS, + unsigned NewReg) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Loc.RegNo = NewReg; + return VL; + } + + /// Take the variable described by DBG_VALUE MI, and create a VarLoc + /// locating it in the specified spill location. + static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase, + int SpillOffset, LexicalScopes &LS) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Kind = SpillLocKind; + VL.Loc.SpillLocation = {SpillBase, SpillOffset}; + return VL; } - // Is the Loc field a constant or constant object? + /// Create a DBG_VALUE representing this VarLoc in the given function. + /// Copies variable-specific information such as DILocalVariable and + /// inlining information from the original DBG_VALUE instruction, which may + /// have been several transfers ago. + MachineInstr *BuildDbgValue(MachineFunction &MF) const { + const DebugLoc &DbgLoc = MI.getDebugLoc(); + bool Indirect = MI.isIndirectDebugValue(); + const auto &IID = MI.getDesc(); + const DILocalVariable *Var = MI.getDebugVariable(); + const DIExpression *DIExpr = MI.getDebugExpression(); + + switch (Kind) { + case EntryValueKind: + // An entry value is a register location -- but with an updated + // expression. + return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr); + case RegisterKind: + // Register locations are like the source DBG_VALUE, but with the + // register number from this VarLoc. + return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, DIExpr); + case SpillLocKind: { + // Spills are indirect DBG_VALUEs, with a base register and offset. + // Use the original DBG_VALUEs expression to build the spilt location + // on top of. FIXME: spill locations created before this pass runs + // are not recognized, and not handled here. + auto *SpillExpr = DIExpression::prepend( + DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset); + unsigned Base = Loc.SpillLocation.SpillBase; + return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr); + } + case ImmediateKind: { + MachineOperand MO = MI.getOperand(0); + return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr); + } + case InvalidKind: + llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc"); + } + llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum"); + } + + /// Is the Loc field a constant or constant object? bool isConstant() const { return Kind == ImmediateKind; } /// If this variable is described by a register, return it, @@ -251,18 +330,42 @@ private: bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const { MI.dump(); } + // TRI can be null. + void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const { + dbgs() << "VarLoc("; + switch (Kind) { + case RegisterKind: + case EntryValueKind: + dbgs() << printReg(Loc.RegNo, TRI); + break; + case SpillLocKind: + dbgs() << printReg(Loc.SpillLocation.SpillBase, TRI); + dbgs() << "[" << Loc.SpillLocation.SpillOffset << "]"; + break; + case ImmediateKind: + dbgs() << Loc.Immediate; + break; + case InvalidKind: + llvm_unreachable("Invalid VarLoc in dump method"); + } + + dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", "; + if (Var.getInlinedAt()) + dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n"; + else + dbgs() << "(null))\n"; + } #endif bool operator==(const VarLoc &Other) const { return Kind == Other.Kind && Var == Other.Var && - Loc.Hash == Other.Loc.Hash; + Loc.Hash == Other.Loc.Hash && Expr == Other.Expr; } /// This operator guarantees that VarLocs are sorted by Variable first. bool operator<(const VarLoc &Other) const { - return std::tie(Var, Kind, Loc.Hash) < - std::tie(Other.Var, Other.Kind, Other.Loc.Hash); + return std::tie(Var, Kind, Loc.Hash, Expr) < + std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr); } }; @@ -271,8 +374,8 @@ private: using VarLocSet = SparseBitVector<>; using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>; struct TransferDebugPair { - MachineInstr *TransferInst; - MachineInstr *DebugInst; + MachineInstr *TransferInst; /// Instruction where this transfer occurs. + unsigned LocationID; /// Location number for the transfer dest. }; using TransferMap = SmallVector<TransferDebugPair, 4>; @@ -320,6 +423,14 @@ private: Vars.insert({Var, VarLocID}); } + /// Insert a set of ranges. + void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) { + for (unsigned Id : ToLoad) { + const VarLoc &Var = Map[Id]; + insert(Id, Var.Var); + } + } + /// Empty the set. void clear() { VarLocs.clear(); @@ -333,8 +444,18 @@ private: } }; - bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF, - unsigned &Reg); + /// Tests whether this instruction is a spill to a stack location. + bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); + + /// Decide if @MI is a spill instruction and return true if it is. We use 2 + /// criteria to make this decision: + /// - Is this instruction a store to a spill slot? + /// - Is there a register operand that is both used and killed? + /// TODO: Store optimization can fold spills into other stores (including + /// other spills). We do not handle this yet (more than one memory operand). + bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, + unsigned &Reg); + /// If a given instruction is identified as a spill, return the spill location /// and set \p Reg to the spilled register. Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI, @@ -361,13 +482,13 @@ private: void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, DebugParamMap &DebugEntryVals); - bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); + bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); - bool process(MachineInstr &MI, OpenRangesSet &OpenRanges, + void process(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, TransferMap &Transfers, DebugParamMap &DebugEntryVals, - bool transferChanges, OverlapMap &OverlapFragments, + OverlapMap &OverlapFragments, VarToFragments &SeenFragments); void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, @@ -376,7 +497,12 @@ private: bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet<const MachineBasicBlock *, 16> &Visited, - SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks); + SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks, + VarLocInMBB &PendingInLocs); + + /// Create DBG_VALUE insts for inlocs that have been propagated but + /// had their instruction creation deferred. + void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs); bool ExtendRanges(MachineFunction &MF); @@ -518,7 +644,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, const VarLoc &VL = VarLocIDs[VLL]; Out << " Var: " << VL.Var.getVar()->getName(); Out << " MI: "; - VL.dump(); + VL.dump(TRI, Out); } } Out << "\n"; @@ -567,11 +693,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, ID = VarLocIDs.insert(VL); OpenRanges.insert(ID, VL.Var); } else if (MI.hasOneMemOperand()) { - // It's a stack spill -- fetch spill base and offset. - VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI); - VarLoc VL(MI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS); - ID = VarLocIDs.insert(VL); - OpenRanges.insert(ID, VL.Var); + llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?"); } else { // This must be an undefined location. We should leave OpenRanges closed. assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 && @@ -585,7 +707,6 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, TransferMap &Transfers, DebugParamMap &DebugEntryVals, SparseBitVector<> &KillSet) { - MachineFunction *MF = MI.getParent()->getParent(); for (unsigned ID : KillSet) { if (!VarLocIDs[ID].Var.getVar()->isParameter()) continue; @@ -600,20 +721,12 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()]; DIExpression *NewExpr = DIExpression::prepend( ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue); - MachineInstr *EntryValDbgMI = - BuildMI(*MF, ParamDebugInstr->getDebugLoc(), ParamDebugInstr->getDesc(), - ParamDebugInstr->isIndirectDebugValue(), - ParamDebugInstr->getOperand(0).getReg(), - ParamDebugInstr->getDebugVariable(), NewExpr); - - if (ParamDebugInstr->isIndirectDebugValue()) - EntryValDbgMI->getOperand(1).setImm( - ParamDebugInstr->getOperand(1).getImm()); - - Transfers.push_back({&MI, EntryValDbgMI}); - VarLoc VL(*EntryValDbgMI, LS); - unsigned EntryValLocID = VarLocIDs.insert(VL); - OpenRanges.insert(EntryValLocID, VL.Var); + + VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr); + + unsigned EntryValLocID = VarLocIDs.insert(EntryLoc); + Transfers.push_back({&MI, EntryValLocID}); + OpenRanges.insert(EntryValLocID, EntryLoc.Var); } } @@ -627,21 +740,19 @@ void LiveDebugValues::insertTransferDebugPair( VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind, unsigned NewReg) { const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI; - MachineFunction *MF = MI.getParent()->getParent(); - MachineInstr *NewDebugInstr; auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr, - &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) { + &VarLocIDs](VarLoc &VL) { unsigned LocId = VarLocIDs.insert(VL); // Close this variable's previous location range. DebugVariable V(*DebugInstr); OpenRanges.erase(V); + // Record the new location as an open range, and a postponed transfer + // inserting a DBG_VALUE for this location. OpenRanges.insert(LocId, VL.Var); - // The newly created DBG_VALUE instruction NewDebugInstr must be inserted - // after MI. Keep track of the pairing. - TransferDebugPair MIP = {&MI, NewDebugInstr}; + TransferDebugPair MIP = {&MI, LocId}; Transfers.push_back(MIP); }; @@ -653,37 +764,25 @@ void LiveDebugValues::insertTransferDebugPair( "No register supplied when handling a copy of a debug value"); // Create a DBG_VALUE instruction to describe the Var in its new // register location. - NewDebugInstr = BuildMI( - *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), - DebugInstr->isIndirectDebugValue(), NewReg, - DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression()); - if (DebugInstr->isIndirectDebugValue()) - NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm()); - VarLoc VL(*NewDebugInstr, LS); - ProcessVarLoc(VL, NewDebugInstr); - LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: "; - NewDebugInstr->print(dbgs(), /*IsStandalone*/false, - /*SkipOpers*/false, /*SkipDebugLoc*/false, - /*AddNewLine*/true, TII)); + VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg); + ProcessVarLoc(VL); + LLVM_DEBUG({ + dbgs() << "Creating VarLoc for register copy:"; + VL.dump(TRI); + }); return; } case TransferKind::TransferSpill: { // Create a DBG_VALUE instruction to describe the Var in its spilled // location. VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI); - auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(), - DIExpression::ApplyOffset, - SpillLocation.SpillOffset); - NewDebugInstr = BuildMI( - *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true, - SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr); - VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase, - SpillLocation.SpillOffset, LS); - ProcessVarLoc(VL, NewDebugInstr); - LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: "; - NewDebugInstr->print(dbgs(), /*IsStandalone*/false, - /*SkipOpers*/false, /*SkipDebugLoc*/false, - /*AddNewLine*/true, TII)); + VarLoc VL = VarLoc::CreateSpillLoc(*DebugInstr, SpillLocation.SpillBase, + SpillLocation.SpillOffset, LS); + ProcessVarLoc(VL); + LLVM_DEBUG({ + dbgs() << "Creating VarLoc for spill:"; + VL.dump(TRI); + }); return; } case TransferKind::TransferRestore: { @@ -691,15 +790,14 @@ void LiveDebugValues::insertTransferDebugPair( "No register supplied when handling a restore of a debug value"); MachineFunction *MF = MI.getMF(); DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent()); - NewDebugInstr = - BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false, - NewReg, DebugInstr->getDebugVariable(), DIB.createExpression()); - VarLoc VL(*NewDebugInstr, LS); - ProcessVarLoc(VL, NewDebugInstr); - LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: "; - NewDebugInstr->print(dbgs(), /*IsStandalone*/false, - /*SkipOpers*/false, /*SkipDebugLoc*/false, - /*AddNewLine*/true, TII)); + // DebugInstr refers to the pre-spill location, therefore we can reuse + // its expression. + VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg); + ProcessVarLoc(VL); + LLVM_DEBUG({ + dbgs() << "Creating VarLoc for restore:"; + VL.dump(TRI); + }); return; } } @@ -719,7 +817,7 @@ void LiveDebugValues::transferRegisterDef( // instructions never clobber SP, because some backends (e.g., AArch64) // never list SP in the regmask. if (MO.isReg() && MO.isDef() && MO.getReg() && - TRI->isPhysicalRegister(MO.getReg()) && + Register::isPhysicalRegister(MO.getReg()) && !(MI.isCall() && MO.getReg() == SP)) { // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) @@ -748,16 +846,8 @@ void LiveDebugValues::transferRegisterDef( } } -/// Decide if @MI is a spill instruction and return true if it is. We use 2 -/// criteria to make this decision: -/// - Is this instruction a store to a spill slot? -/// - Is there a register operand that is both used and killed? -/// TODO: Store optimization can fold spills into other stores (including -/// other spills). We do not handle this yet (more than one memory operand). bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, - MachineFunction *MF, unsigned &Reg) { - SmallVector<const MachineMemOperand*, 1> Accesses; - + MachineFunction *MF) { // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) return false; @@ -766,6 +856,14 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, return false; // This is not a spill instruction, since no valid size was // returned from either function. + return true; +} + +bool LiveDebugValues::isLocationSpill(const MachineInstr &MI, + MachineFunction *MF, unsigned &Reg) { + if (!isSpillInstruction(MI, MF)) + return false; + auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { if (!MO.isReg() || !MO.isUse()) { Reg = 0; @@ -834,7 +932,37 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump();); - if (isSpillInstruction(MI, MF, Reg)) { + // First, if there are any DBG_VALUEs pointing at a spill slot that is + // written to, then close the variable location. The value in memory + // will have changed. + VarLocSet KillSet; + if (isSpillInstruction(MI, MF)) { + Loc = extractSpillBaseRegAndOffset(MI); + for (unsigned ID : OpenRanges.getVarLocs()) { + const VarLoc &VL = VarLocIDs[ID]; + if (VL.Kind == VarLoc::SpillLocKind && VL.Loc.SpillLocation == *Loc) { + // This location is overwritten by the current instruction -- terminate + // the open range, and insert an explicit DBG_VALUE $noreg. + // + // Doing this at a later stage would require re-interpreting all + // DBG_VALUes and DIExpressions to identify whether they point at + // memory, and then analysing all memory writes to see if they + // overwrite that memory, which is expensive. + // + // At this stage, we already know which DBG_VALUEs are for spills and + // where they are located; it's best to fix handle overwrites now. + KillSet.set(ID); + VarLoc UndefVL = VarLoc::CreateCopyLoc(VL.MI, LS, 0); + unsigned UndefLocID = VarLocIDs.insert(UndefVL); + Transfers.push_back({&MI, UndefLocID}); + } + } + OpenRanges.erase(KillSet, VarLocIDs); + } + + // Try to recognise spill and restore instructions that may create a new + // variable location. + if (isLocationSpill(MI, MF, Reg)) { TKind = TransferKind::TransferSpill; LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump();); LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI) @@ -854,6 +982,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); } else if (TKind == TransferKind::TransferRestore && + VarLocIDs[ID].Kind == VarLoc::SpillLocKind && VarLocIDs[ID].Loc.SpillLocation == *Loc) { LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '(' << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); @@ -885,8 +1014,8 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, return false; }; - unsigned SrcReg = SrcRegOp->getReg(); - unsigned DestReg = DestRegOp->getReg(); + Register SrcReg = SrcRegOp->getReg(); + Register DestReg = DestRegOp->getReg(); // We want to recognize instructions where destination register is callee // saved register. If register that could be clobbered by the call is @@ -906,26 +1035,20 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, } /// Terminate all open ranges at the end of the current basic block. -bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, - OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, - const VarLocMap &VarLocIDs) { +bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB, + OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, + const VarLocMap &VarLocIDs) { bool Changed = false; - const MachineBasicBlock *CurMBB = MI.getParent(); - if (!(MI.isTerminator() || (&MI == &CurMBB->back()))) - return false; - - if (OpenRanges.empty()) - return false; LLVM_DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) { // Copy OpenRanges to OutLocs, if not already present. dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": "; - VarLocIDs[ID].dump(); + VarLocIDs[ID].dump(TRI); }); VarLocSet &VLS = OutLocs[CurMBB]; - Changed = VLS |= OpenRanges.getVarLocs(); + Changed = VLS != OpenRanges.getVarLocs(); // New OutLocs set may be different due to spill, restore or register // copy instruction processing. if (Changed) @@ -995,26 +1118,17 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, } /// This routine creates OpenRanges and OutLocs. -bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, +void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, - TransferMap &Transfers, DebugParamMap &DebugEntryVals, - bool transferChanges, + TransferMap &Transfers, + DebugParamMap &DebugEntryVals, OverlapMap &OverlapFragments, VarToFragments &SeenFragments) { - bool Changed = false; transferDebugValue(MI, OpenRanges, VarLocIDs); transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals); - if (transferChanges) { - transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); - transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); - } else { - // Build up a map of overlapping fragments on the first run through. - if (MI.isDebugValue()) - accumulateFragmentMap(MI, SeenFragments, OverlapFragments); - } - Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs); - return Changed; + transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); + transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); } /// This routine joins the analysis results of all incoming edges in @MBB by @@ -1024,7 +1138,8 @@ bool LiveDebugValues::join( MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet<const MachineBasicBlock *, 16> &Visited, - SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) { + SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks, + VarLocInMBB &PendingInLocs) { LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; @@ -1034,9 +1149,11 @@ bool LiveDebugValues::join( // can be joined. int NumVisited = 0; for (auto p : MBB.predecessors()) { - // Ignore unvisited predecessor blocks. As we are processing - // the blocks in reverse post-order any unvisited block can - // be considered to not remove any incoming values. + // Ignore backedges if we have not visited the predecessor yet. As the + // predecessor hasn't yet had locations propagated into it, most locations + // will not yet be valid, so treat them as all being uninitialized and + // potentially valid. If a location guessed to be correct here is + // invalidated later, we will remove it when we revisit this block. if (!Visited.count(p)) { LLVM_DEBUG(dbgs() << " ignoring unvisited pred MBB: " << p->getNumber() << "\n"); @@ -1086,44 +1203,59 @@ bool LiveDebugValues::join( // is the entry block which has no predecessor. assert((NumVisited || MBB.pred_empty()) && "Should have processed at least one predecessor"); - if (InLocsT.empty()) - return false; VarLocSet &ILS = InLocs[&MBB]; + VarLocSet &Pending = PendingInLocs[&MBB]; - // Insert DBG_VALUE instructions, if not already inserted. + // New locations will have DBG_VALUE insts inserted at the start of the + // block, after location propagation has finished. Record the insertions + // that we need to perform in the Pending set. VarLocSet Diff = InLocsT; Diff.intersectWithComplement(ILS); for (auto ID : Diff) { - // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a - // new range is started for the var from the mbb's beginning by inserting - // a new DBG_VALUE. process() will end this range however appropriate. - const VarLoc &DiffIt = VarLocIDs[ID]; - const MachineInstr *DebugInstr = &DiffIt.MI; - MachineInstr *MI = nullptr; - if (DiffIt.isConstant()) { - MachineOperand MO(DebugInstr->getOperand(0)); - MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(), - DebugInstr->getDesc(), false, MO, - DebugInstr->getDebugVariable(), - DebugInstr->getDebugExpression()); - } else { - MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(), - DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(), - DebugInstr->getOperand(0).getReg(), - DebugInstr->getDebugVariable(), - DebugInstr->getDebugExpression()); - if (DebugInstr->isIndirectDebugValue()) - MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm()); - } - LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump();); + Pending.set(ID); ILS.set(ID); ++NumInserted; Changed = true; } + + // We may have lost locations by learning about a predecessor that either + // loses or moves a variable. Find any locations in ILS that are not in the + // new in-locations, and delete those. + VarLocSet Removed = ILS; + Removed.intersectWithComplement(InLocsT); + for (auto ID : Removed) { + Pending.reset(ID); + ILS.reset(ID); + ++NumRemoved; + Changed = true; + } + return Changed; } +void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, + VarLocMap &VarLocIDs) { + // PendingInLocs records all locations propagated into blocks, which have + // not had DBG_VALUE insts created. Go through and create those insts now. + for (auto &Iter : PendingInLocs) { + // Map is keyed on a constant pointer, unwrap it so we can insert insts. + auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first); + VarLocSet &Pending = Iter.second; + + for (unsigned ID : Pending) { + // The ID location is live-in to MBB -- work out what kind of machine + // location it is and create a DBG_VALUE. + const VarLoc &DiffIt = VarLocIDs[ID]; + MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent()); + MBB.insert(MBB.instr_begin(), MI); + + (void)MI; + LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump();); + } + } +} + /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { @@ -1140,6 +1272,9 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { VarLocInMBB OutLocs; // Ranges that exist beyond bb. VarLocInMBB InLocs; // Ranges that are incoming after joining. TransferMap Transfers; // DBG_VALUEs associated with spills. + VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but + // that we have deferred creating DBG_VALUE insts + // for immediately. VarToFragments SeenFragments; @@ -1156,8 +1291,6 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { std::greater<unsigned int>> Pending; - enum : bool { dontTransferChanges = false, transferChanges = true }; - // Besides parameter's modification, check whether a DBG_VALUE is inlined // in order to deduce whether the variable that it tracks comes from // a different function. If that is the case we can't track its entry value. @@ -1169,7 +1302,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - unsigned FP = TRI->getFrameRegister(MF); + Register FP = TRI->getFrameRegister(MF); auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool { return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP; }; @@ -1195,23 +1328,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { !MI.getDebugExpression()->isFragment()) DebugEntryVals[MI.getDebugVariable()] = &MI; - // Initialize every mbb with OutLocs. - // We are not looking at any spill instructions during the initial pass - // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE - // instructions for spills of registers that are known to be user variables - // within the BB in which the spill occurs. + // Initialize per-block structures and scan for fragment overlaps. for (auto &MBB : MF) { + PendingInLocs[&MBB] = VarLocSet(); + for (auto &MI : MBB) { - process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, DebugEntryVals, - dontTransferChanges, OverlapFragments, SeenFragments); - } - // Add any entry DBG_VALUE instructions necessitated by parameter - // clobbering. - for (auto &TR : Transfers) { - MBB.insertAfter(MachineBasicBlock::iterator(*TR.TransferInst), - TR.DebugInst); + if (MI.isDebugValue()) + accumulateFragmentMap(MI, SeenFragments, OverlapFragments); } - Transfers.clear(); } auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool { @@ -1248,26 +1372,21 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { while (!Worklist.empty()) { MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; Worklist.pop(); - MBBJoined = - join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, ArtificialBlocks); - Visited.insert(MBB); + MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, + ArtificialBlocks, PendingInLocs); + MBBJoined |= Visited.insert(MBB).second; if (MBBJoined) { MBBJoined = false; Changed = true; // Now that we have started to extend ranges across BBs we need to // examine spill instructions to see whether they spill registers that // correspond to user variables. + // First load any pending inlocs. + OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs); for (auto &MI : *MBB) - OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, - DebugEntryVals, transferChanges, OverlapFragments, - SeenFragments); - - // Add any DBG_VALUE instructions necessitated by spills. - for (auto &TR : Transfers) - MBB->insertAfter(MachineBasicBlock::iterator(*TR.TransferInst), - TR.DebugInst); - Transfers.clear(); + DebugEntryVals, OverlapFragments, SeenFragments); + OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs); LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after propagating", dbgs())); @@ -1289,6 +1408,19 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { assert(Pending.empty() && "Pending should be empty"); } + // Add any DBG_VALUE instructions created by location transfers. + for (auto &TR : Transfers) { + MachineBasicBlock *MBB = TR.TransferInst->getParent(); + const VarLoc &VL = VarLocIDs[TR.LocationID]; + MachineInstr *MI = VL.BuildDbgValue(MF); + MBB->insertAfterBundle(TR.TransferInst->getIterator(), MI); + } + Transfers.clear(); + + // Deferred inlocs will not have had any DBG_VALUE insts created; do + // that now. + flushPendingLocs(PendingInLocs, VarLocIDs); + LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs())); LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs())); return Changed; @@ -1308,7 +1440,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); TFI->determineCalleeSaves(MF, CalleeSavedRegs, - make_unique<RegScavenger>().get()); + std::make_unique<RegScavenger>().get()); LS.initialize(MF); bool Changed = ExtendRanges(MF); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 656ec7d4bdfd..2dd462fc72b3 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -99,28 +99,27 @@ enum : unsigned { UndefLocNo = ~0U }; /// usage of the location. class DbgValueLocation { public: - DbgValueLocation(unsigned LocNo, bool WasIndirect) - : LocNo(LocNo), WasIndirect(WasIndirect) { + DbgValueLocation(unsigned LocNo) + : LocNo(LocNo) { static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing"); assert(locNo() == LocNo && "location truncation"); } - DbgValueLocation() : LocNo(0), WasIndirect(0) {} + DbgValueLocation() : LocNo(0) {} unsigned locNo() const { // Fix up the undef location number, which gets truncated. return LocNo == INT_MAX ? UndefLocNo : LocNo; } - bool wasIndirect() const { return WasIndirect; } bool isUndef() const { return locNo() == UndefLocNo; } DbgValueLocation changeLocNo(unsigned NewLocNo) const { - return DbgValueLocation(NewLocNo, WasIndirect); + return DbgValueLocation(NewLocNo); } friend inline bool operator==(const DbgValueLocation &LHS, const DbgValueLocation &RHS) { - return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect; + return LHS.LocNo == RHS.LocNo; } friend inline bool operator!=(const DbgValueLocation &LHS, @@ -129,8 +128,7 @@ public: } private: - unsigned LocNo : 31; - unsigned WasIndirect : 1; + unsigned LocNo; }; /// Map of where a user value is live, and its location. @@ -144,22 +142,51 @@ namespace { class LDVImpl; +/// A UserValue is uniquely identified by the source variable it refers to +/// (Variable), the expression describing how to get the value (Expression) and +/// the specific usage (InlinedAt). InlinedAt differentiates both between +/// inline and non-inline functions, and multiple inlined instances in the same +/// scope. FIXME: The only part of the Expression which matters for UserValue +/// identification is the fragment part. +class UserValueIdentity { +private: + /// The debug info variable we are part of. + const DILocalVariable *Variable; + /// Any complex address expression. + const DIExpression *Expression; + /// Function usage identification. + const DILocation *InlinedAt; + +public: + UserValueIdentity(const DILocalVariable *Var, const DIExpression *Expr, + const DILocation *IA) + : Variable(Var), Expression(Expr), InlinedAt(IA) {} + + bool match(const DILocalVariable *Var, const DIExpression *Expr, + const DILocation *IA) const { + // FIXME: The fragment should be part of the identity, but not + // other things in the expression like stack values. + return Var == Variable && Expr == Expression && IA == InlinedAt; + } + + bool match(const UserValueIdentity &Other) const { + return match(Other.Variable, Other.Expression, Other.InlinedAt); + } + + unsigned hash_value() const { + return hash_combine(Variable, Expression, InlinedAt); + } +}; + /// A user value is a part of a debug info user variable. /// /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register /// holds part of a user variable. The part is identified by a byte offset. -/// -/// UserValues are grouped into equivalence classes for easier searching. Two -/// user values are related if they refer to the same variable, or if they are -/// held by the same virtual register. The equivalence class is the transitive -/// closure of that relation. class UserValue { const DILocalVariable *Variable; ///< The debug info variable we are part of. const DIExpression *Expression; ///< Any complex address expression. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. - UserValue *leader; ///< Equivalence class leader. - UserValue *next = nullptr; ///< Next value in equivalence class, or null. /// Numbered locations referenced by locmap. SmallVector<MachineOperand, 4> locations; @@ -180,49 +207,15 @@ class UserValue { LiveIntervals &LIS); public: + UserValue(const UserValue &) = delete; + /// Create a new UserValue. UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L, LocMap::Allocator &alloc) - : Variable(var), Expression(expr), dl(std::move(L)), leader(this), - locInts(alloc) {} - - /// Get the leader of this value's equivalence class. - UserValue *getLeader() { - UserValue *l = leader; - while (l != l->leader) - l = l->leader; - return leader = l; - } + : Variable(var), Expression(expr), dl(std::move(L)), locInts(alloc) {} - /// Return the next UserValue in the equivalence class. - UserValue *getNext() const { return next; } - - /// Does this UserValue match the parameters? - bool match(const DILocalVariable *Var, const DIExpression *Expr, - const DILocation *IA) const { - // FIXME: The fragment should be part of the equivalence class, but not - // other things in the expression like stack values. - return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA; - } - - /// Merge equivalence classes. - static UserValue *merge(UserValue *L1, UserValue *L2) { - L2 = L2->getLeader(); - if (!L1) - return L2; - L1 = L1->getLeader(); - if (L1 == L2) - return L1; - // Splice L2 before L1's members. - UserValue *End = L2; - while (End->next) { - End->leader = L1; - End = End->next; - } - End->leader = L1; - End->next = L1->next; - L1->next = L2; - return L1; + UserValueIdentity getId() { + return UserValueIdentity(Variable, Expression, dl->getInlinedAt()); } /// Return the location number that matches Loc. @@ -261,8 +254,8 @@ public: void mapVirtRegs(LDVImpl *LDV); /// Add a definition point to this value. - void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) { - DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect); + void addDef(SlotIndex Idx, const MachineOperand &LocMO) { + DbgValueLocation Loc(getLocationNo(LocMO)); // Add a singular (Idx,Idx) -> Loc mapping. LocMap::iterator I = locInts.find(Idx); if (!I.valid() || I.start() != Idx) @@ -297,11 +290,10 @@ public: /// /// \param LI Scan for copies of the value in LI->reg. /// \param LocNo Location number of LI->reg. - /// \param WasIndirect Indicates if the original use of LI->reg was indirect /// \param Kills Points where the range of LocNo could be extended. /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here. void addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, bool WasIndirect, + LiveInterval *LI, unsigned LocNo, const SmallVectorImpl<SlotIndex> &Kills, SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS); @@ -335,7 +327,29 @@ public: void print(raw_ostream &, const TargetRegisterInfo *); }; +} // namespace +namespace llvm { +template <> struct DenseMapInfo<UserValueIdentity> { + static UserValueIdentity getEmptyKey() { + auto Key = DenseMapInfo<DILocalVariable *>::getEmptyKey(); + return UserValueIdentity(Key, nullptr, nullptr); + } + static UserValueIdentity getTombstoneKey() { + auto Key = DenseMapInfo<DILocalVariable *>::getTombstoneKey(); + return UserValueIdentity(Key, nullptr, nullptr); + } + static unsigned getHashValue(const UserValueIdentity &Val) { + return Val.hash_value(); + } + static bool isEqual(const UserValueIdentity &LHS, + const UserValueIdentity &RHS) { + return LHS.match(RHS); + } +}; +} // namespace llvm + +namespace { /// A user label is a part of a debug info user label. class UserLabel { const DILabel *Label; ///< The debug info label we are part of. @@ -387,20 +401,20 @@ class LDVImpl { /// All allocated UserLabel instances. SmallVector<std::unique_ptr<UserLabel>, 2> userLabels; - /// Map virtual register to eq class leader. - using VRMap = DenseMap<unsigned, UserValue *>; - VRMap virtRegToEqClass; + /// Map virtual register to UserValues which use it. + using VRMap = DenseMap<unsigned, SmallVector<UserValue *, 4>>; + VRMap VirtRegToUserVals; - /// Map user variable to eq class leader. - using UVMap = DenseMap<const DILocalVariable *, UserValue *>; - UVMap userVarMap; + /// Map unique UserValue identity to UserValue. + using UVMap = DenseMap<UserValueIdentity, UserValue *>; + UVMap UserVarMap; /// Find or create a UserValue. UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr, const DebugLoc &DL); - /// Find the EC leader for VirtReg or null. - UserValue *lookupVirtReg(unsigned VirtReg); + /// Find the UserValues for VirtReg or null. + SmallVectorImpl<UserValue *> *lookupVirtReg(unsigned VirtReg); /// Add DBG_VALUE instruction to our maps. /// @@ -440,8 +454,8 @@ public: MF = nullptr; userValues.clear(); userLabels.clear(); - virtRegToEqClass.clear(); - userVarMap.clear(); + VirtRegToUserVals.clear(); + UserVarMap.clear(); // Make sure we call emitDebugValues if the machine function was modified. assert((!ModifiedMF || EmitDone) && "Dbg values are not emitted in LDV"); @@ -449,8 +463,8 @@ public: ModifiedMF = false; } - /// Map virtual register to an equivalence class. - void mapVirtReg(unsigned VirtReg, UserValue *EC); + /// Map virtual register to a UserValue. + void mapVirtReg(unsigned VirtReg, UserValue *UV); /// Replace all references to OldReg with NewRegs. void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs); @@ -521,8 +535,6 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { OS << "undef"; else { OS << I.value().locNo(); - if (I.value().wasIndirect()) - OS << " ind"; } } for (unsigned i = 0, e = locations.size(); i != e; ++i) { @@ -554,37 +566,33 @@ void LDVImpl::print(raw_ostream &OS) { void UserValue::mapVirtRegs(LDVImpl *LDV) { for (unsigned i = 0, e = locations.size(); i != e; ++i) if (locations[i].isReg() && - TargetRegisterInfo::isVirtualRegister(locations[i].getReg())) + Register::isVirtualRegister(locations[i].getReg())) LDV->mapVirtReg(locations[i].getReg(), this); } UserValue *LDVImpl::getUserValue(const DILocalVariable *Var, const DIExpression *Expr, const DebugLoc &DL) { - UserValue *&Leader = userVarMap[Var]; - if (Leader) { - UserValue *UV = Leader->getLeader(); - Leader = UV; - for (; UV; UV = UV->getNext()) - if (UV->match(Var, Expr, DL->getInlinedAt())) - return UV; - } + auto Ident = UserValueIdentity(Var, Expr, DL->getInlinedAt()); + UserValue *&UVEntry = UserVarMap[Ident]; - userValues.push_back( - llvm::make_unique<UserValue>(Var, Expr, DL, allocator)); - UserValue *UV = userValues.back().get(); - Leader = UserValue::merge(Leader, UV); - return UV; + if (UVEntry) + return UVEntry; + + userValues.push_back(std::make_unique<UserValue>(Var, Expr, DL, allocator)); + return UVEntry = userValues.back().get(); } -void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs"); - UserValue *&Leader = virtRegToEqClass[VirtReg]; - Leader = UserValue::merge(Leader, EC); +void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *UV) { + assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs"); + assert(UserVarMap.find(UV->getId()) != UserVarMap.end() && + "UserValue should exist in UserVarMap"); + VirtRegToUserVals[VirtReg].push_back(UV); } -UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { - if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) - return UV->getLeader(); +SmallVectorImpl<UserValue *> *LDVImpl::lookupVirtReg(unsigned VirtReg) { + VRMap::iterator Itr = VirtRegToUserVals.find(VirtReg); + if (Itr != VirtRegToUserVals.end()) + return &Itr->getSecond(); return nullptr; } @@ -606,8 +614,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // could be removed or replaced by asserts. bool Discard = false; if (MI.getOperand(0).isReg() && - TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) { - const unsigned Reg = MI.getOperand(0).getReg(); + Register::isVirtualRegister(MI.getOperand(0).getReg())) { + const Register Reg = MI.getOperand(0).getReg(); if (!LIS->hasInterval(Reg)) { // The DBG_VALUE is described by a virtual register that does not have a // live interval. Discard the DBG_VALUE. @@ -631,19 +639,18 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { } // Get or create the UserValue for (variable,offset) here. - bool IsIndirect = MI.getOperand(1).isImm(); - if (IsIndirect) - assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); + assert(!MI.getOperand(1).isImm() && "DBG_VALUE with indirect flag before " + "LiveDebugVariables"); const DILocalVariable *Var = MI.getDebugVariable(); const DIExpression *Expr = MI.getDebugExpression(); UserValue *UV = getUserValue(Var, Expr, MI.getDebugLoc()); if (!Discard) - UV->addDef(Idx, MI.getOperand(0), IsIndirect); + UV->addDef(Idx, MI.getOperand(0)); else { MachineOperand MO = MachineOperand::CreateReg(0U, false); MO.setIsDebug(); - UV->addDef(Idx, MO, false); + UV->addDef(Idx, MO); } return true; } @@ -666,7 +673,7 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { } } if (!Found) - userLabels.push_back(llvm::make_unique<UserLabel>(Label, DL, Idx)); + userLabels.push_back(std::make_unique<UserLabel>(Label, DL, Idx)); return true; } @@ -751,14 +758,14 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, } void UserValue::addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, bool WasIndirect, + LiveInterval *LI, unsigned LocNo, const SmallVectorImpl<SlotIndex> &Kills, SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS) { if (Kills.empty()) return; // Don't track copies from physregs, there are too many uses. - if (!TargetRegisterInfo::isVirtualRegister(LI->reg)) + if (!Register::isVirtualRegister(LI->reg)) return; // Collect all the (vreg, valno) pairs that are copies of LI. @@ -768,13 +775,13 @@ void UserValue::addDefsFromCopies( // Copies of the full value. if (MO.getSubReg() || !MI->isCopy()) continue; - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); // Don't follow copies to physregs. These are usually setting up call // arguments, and the argument registers are always call clobbered. We are // better off in the source register which could be a callee-saved register, // or it could be spilled. - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + if (!Register::isVirtualRegister(DstReg)) continue; // Is LocNo extended to reach this copy? If not, another def may be blocking @@ -815,7 +822,7 @@ void UserValue::addDefsFromCopies( MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); - DbgValueLocation NewLoc(LocNo, WasIndirect); + DbgValueLocation NewLoc(LocNo); I.insert(Idx, Idx.getNextSlot(), NewLoc); NewDefs.push_back(std::make_pair(Idx, NewLoc)); break; @@ -845,7 +852,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, } // Register locations are constrained to where the register value is live. - if (TargetRegisterInfo::isVirtualRegister(LocMO.getReg())) { + if (Register::isVirtualRegister(LocMO.getReg())) { LiveInterval *LI = nullptr; const VNInfo *VNI = nullptr; if (LIS.hasInterval(LocMO.getReg())) { @@ -863,8 +870,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // sub-register in that regclass). For now, simply skip handling copies if // a sub-register is involved. if (LI && !LocMO.getSubReg()) - addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI, - LIS); + addDefsFromCopies(LI, Loc.locNo(), Kills, Defs, MRI, LIS); continue; } @@ -1123,16 +1129,18 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) { bool DidChange = false; - for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) - DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); + if (auto *UserVals = lookupVirtReg(OldReg)) + for (auto *UV : *UserVals) + DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); if (!DidChange) return; // Map all of the new virtual registers. - UserValue *UV = lookupVirtReg(OldReg); - for (unsigned i = 0; i != NewRegs.size(); ++i) - mapVirtReg(NewRegs[i], UV); + if (auto *UserVals = lookupVirtReg(OldReg)) + for (auto *UV : *UserVals) + for (unsigned i = 0; i != NewRegs.size(); ++i) + mapVirtReg(NewRegs[i], UV); } void LiveDebugVariables:: @@ -1161,10 +1169,10 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF, MachineOperand Loc = locations[I]; // Only virtual registers are rewritten. if (Loc.isReg() && Loc.getReg() && - TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { - unsigned VirtReg = Loc.getReg(); + Register::isVirtualRegister(Loc.getReg())) { + Register VirtReg = Loc.getReg(); if (VRM.isAssignedReg(VirtReg) && - TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) { + Register::isPhysicalRegister(VRM.getPhys(VirtReg))) { // This can create a %noreg operand in rare cases when the sub-register // index is no longer available. That means the user value is in a // non-existent sub-register, and %noreg is exactly what we want. @@ -1258,7 +1266,7 @@ findNextInsertLocation(MachineBasicBlock *MBB, const TargetRegisterInfo &TRI) { if (!LocMO.isReg()) return MBB->instr_end(); - unsigned Reg = LocMO.getReg(); + Register Reg = LocMO.getReg(); // Find the next instruction in the MBB that define the register Reg. while (I != MBB->end() && !I->isTerminator()) { @@ -1302,21 +1310,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, // that the original virtual register was a pointer. Also, add the stack slot // offset for the spilled register to the expression. const DIExpression *Expr = Expression; - uint8_t DIExprFlags = DIExpression::ApplyOffset; - bool IsIndirect = Loc.wasIndirect(); - if (Spilled) { - if (IsIndirect) - DIExprFlags |= DIExpression::DerefAfter; - Expr = - DIExpression::prepend(Expr, DIExprFlags, SpillOffset); - IsIndirect = true; - } + if (Spilled) + Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, SpillOffset); assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index"); do { BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, MO, Variable, Expr); + Spilled, MO, Variable, Expr); // Continue and insert DBG_VALUES after every redefinition of register // associated with the debug value within the range diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 70b2a77fe800..54ac46f2e7ce 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -886,7 +886,7 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, const TargetRegisterInfo &TRI) { // Phys reg should not be tracked at subreg level. // Same for noreg (Reg == 0). - if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg) + if (!Register::isVirtualRegister(Reg) || !Reg) return; // Remove the values that don't define those lanes. SmallVector<VNInfo *, 8> ToBeRemoved; @@ -917,7 +917,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, for (VNInfo *VNI : ToBeRemoved) SR.removeValNo(VNI); - assert(!SR.empty() && "At least one value should be defined by this mask"); + // If the subrange is empty at this point, the MIR is invalid. Do not assert + // and let the verifier catch this case. } void LiveInterval::refineSubRanges( @@ -967,7 +968,7 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const { - assert(TargetRegisterInfo::isVirtualRegister(reg)); + assert(Register::isVirtualRegister(reg)); LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg); assert((VRegMask & LaneMask).any()); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp index aa85569063b3..2989930ad093 100644 --- a/lib/CodeGen/LiveIntervals.cpp +++ b/lib/CodeGen/LiveIntervals.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveIntervals.h" -#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -22,6 +21,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -108,7 +108,7 @@ LiveIntervals::~LiveIntervals() { void LiveIntervals::releaseMemory() { // Free the live intervals themselves. for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i) - delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)]; + delete VirtRegIntervals[Register::index2VirtReg(i)]; VirtRegIntervals.clear(); RegMaskSlots.clear(); RegMaskBits.clear(); @@ -161,7 +161,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { // Dump the virtregs. for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (hasInterval(Reg)) OS << getInterval(Reg) << '\n'; } @@ -186,7 +186,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F; + float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F; return new LiveInterval(reg, Weight); } @@ -201,7 +201,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { void LiveIntervals::computeVirtRegs() { for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; createAndComputeVirtRegInterval(Reg); @@ -441,8 +441,8 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments, bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead) { LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n'); - assert(TargetRegisterInfo::isVirtualRegister(li->reg) - && "Can only shrink virtual registers"); + assert(Register::isVirtualRegister(li->reg) && + "Can only shrink virtual registers"); // Shrink subregister live ranges. bool NeedsCleanup = false; @@ -541,8 +541,8 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n'); - assert(TargetRegisterInfo::isVirtualRegister(Reg) - && "Can only shrink virtual registers"); + assert(Register::isVirtualRegister(Reg) && + "Can only shrink virtual registers"); // Find all the values used, including PHI kills. ShrinkToUsesWorkList WorkList; @@ -688,7 +688,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { LiveRange::const_iterator>, 4> SRs; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; const LiveInterval &LI = getInterval(Reg); @@ -986,10 +986,10 @@ public: MO.setIsKill(false); } - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LiveInterval &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { unsigned SubReg = MO.getSubReg(); @@ -1023,7 +1023,7 @@ private: return; LLVM_DEBUG({ dbgs() << " "; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { dbgs() << printReg(Reg); if (LaneMask.any()) dbgs() << " L" << PrintLaneMask(LaneMask); @@ -1288,6 +1288,20 @@ private: const SlotIndex SplitPos = NewIdxDef; OldIdxVNI = OldIdxIn->valno; + SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end; + LiveRange::iterator Prev = std::prev(OldIdxIn); + if (OldIdxIn != LR.begin() && + SlotIndex::isEarlierInstr(NewIdx, Prev->end)) { + // If the segment before OldIdx read a value defined earlier than + // NewIdx, the moved instruction also reads and forwards that + // value. Extend the lifetime of the new def point. + + // Extend to where the previous range started, unless there is + // another redef first. + NewDefEndPoint = std::min(OldIdxIn->start, + std::next(NewIdxOut)->start); + } + // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut. OldIdxOut->valno->def = OldIdxIn->start; *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end, @@ -1305,7 +1319,8 @@ private: // There is no gap between NewSegment and its predecessor. *NewSegment = LiveRange::Segment(Next->start, SplitPos, Next->valno); - *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI); + + *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI); Next->valno->def = SplitPos; } else { // There is a gap between NewSegment and its predecessor @@ -1384,7 +1399,7 @@ private: // Return the last use of reg between NewIdx and OldIdx. SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg, LaneBitmask LaneMask) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { SlotIndex LastUse = Before; for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { if (MO.isUndef()) @@ -1429,7 +1444,7 @@ private: // Check if MII uses Reg. for (MIBundleOperands MO(*MII); MO.isValid(); ++MO) if (MO->isReg() && !MO->isUndef() && - TargetRegisterInfo::isPhysicalRegister(MO->getReg()) && + Register::isPhysicalRegister(MO->getReg()) && TRI.hasRegUnit(MO->getReg(), Reg)) return Idx.getRegSlot(); } @@ -1439,7 +1454,10 @@ private: }; void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) { - assert(!MI.isBundled() && "Can't handle bundled instructions yet."); + // It is fine to move a bundle as a whole, but not an individual instruction + // inside it. + assert((!MI.isBundled() || MI.getOpcode() == TargetOpcode::BUNDLE) && + "Cannot move instruction in bundle"); SlotIndex OldIndex = Indexes->getInstructionIndex(MI); Indexes->removeMachineInstrFromMaps(MI); SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI); @@ -1582,8 +1600,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), MOE = MI.operands_end(); MOI != MOE; ++MOI) { - if (MOI->isReg() && - TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && + if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) && !hasInterval(MOI->getReg())) { createAndComputeVirtRegInterval(MOI->getReg()); } @@ -1591,7 +1608,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, } for (unsigned Reg : OrigRegs) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; LiveInterval &LI = getInterval(Reg); @@ -1642,7 +1659,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, unsigned Reg = LI.reg; const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); for (unsigned I = 1; I < NumComp; ++I) { - unsigned NewVReg = MRI->createVirtualRegister(RegClass); + Register NewVReg = MRI->createVirtualRegister(RegClass); LiveInterval &NewLI = createEmptyInterval(NewVReg); SplitLIs.push_back(&NewLI); } diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index cd3d248ac878..c2a1cc7c6490 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -46,8 +46,8 @@ void LivePhysRegs::removeDefs(const MachineInstr &MI) { if (O->isReg()) { if (!O->isDef() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; removeReg(Reg); } else if (O->isRegMask()) @@ -60,8 +60,8 @@ void LivePhysRegs::addUses(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (!O->isReg() || !O->readsReg() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; addReg(Reg); } @@ -86,8 +86,8 @@ void LivePhysRegs::stepForward(const MachineInstr &MI, // Remove killed registers from the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg() && !O->isDebug()) { - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (O->isDef()) { // Note, dead defs are still recorded. The caller should decide how to @@ -292,10 +292,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { if (!MO->isReg() || !MO->isDef() || MO->isDebug()) continue; - unsigned Reg = MO->getReg(); + Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); bool IsNotLive = LiveRegs.available(MRI, Reg); MO->setIsDead(IsNotLive); @@ -309,10 +309,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { if (!MO->isReg() || !MO->readsReg() || MO->isDebug()) continue; - unsigned Reg = MO->getReg(); + Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); bool IsNotLive = LiveRegs.available(MRI, Reg); MO->setIsKill(IsNotLive); diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d670f28df6ba..24b57be0da00 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "LiveRangeCalc.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -372,8 +372,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, report_fatal_error("Use not jointly dominated by defs."); } - if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && - !MBB->isLiveIn(PhysReg)) { + if (Register::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) { MBB->getParent()->verify(); const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); errs() << "The register " << printReg(PhysReg, TRI) diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h deleted file mode 100644 index 11aea5a3b016..000000000000 --- a/lib/CodeGen/LiveRangeCalc.h +++ /dev/null @@ -1,297 +0,0 @@ -//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// The LiveRangeCalc class can be used to compute live ranges from scratch. It -// caches information about values in the CFG to speed up repeated operations -// on the same live range. The cache can be shared by non-overlapping live -// ranges. SplitKit uses that when computing the live range of split products. -// -// A low-level interface is available to clients that know where a variable is -// live, but don't know which value it has as every point. LiveRangeCalc will -// propagate values down the dominator tree, and even insert PHI-defs where -// needed. SplitKit uses this faster interface when possible. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H -#define LLVM_LIB_CODEGEN_LIVERANGECALC_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/MC/LaneBitmask.h" -#include <utility> - -namespace llvm { - -template <class NodeT> class DomTreeNodeBase; -class MachineDominatorTree; -class MachineFunction; -class MachineRegisterInfo; - -using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>; - -class LiveRangeCalc { - const MachineFunction *MF = nullptr; - const MachineRegisterInfo *MRI = nullptr; - SlotIndexes *Indexes = nullptr; - MachineDominatorTree *DomTree = nullptr; - VNInfo::Allocator *Alloc = nullptr; - - /// LiveOutPair - A value and the block that defined it. The domtree node is - /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. - using LiveOutPair = std::pair<VNInfo *, MachineDomTreeNode *>; - - /// LiveOutMap - Map basic blocks to the value leaving the block. - using LiveOutMap = IndexedMap<LiveOutPair, MBB2NumberFunctor>; - - /// Bit vector of active entries in LiveOut, also used as a visited set by - /// findReachingDefs. One entry per basic block, indexed by block number. - /// This is kept as a separate bit vector because it can be cleared quickly - /// when switching live ranges. - BitVector Seen; - - /// Map LiveRange to sets of blocks (represented by bit vectors) that - /// in the live range are defined on entry and undefined on entry. - /// A block is defined on entry if there is a path from at least one of - /// the defs in the live range to the entry of the block, and conversely, - /// a block is undefined on entry, if there is no such path (i.e. no - /// definition reaches the entry of the block). A single LiveRangeCalc - /// object is used to track live-out information for multiple registers - /// in live range splitting (which is ok, since the live ranges of these - /// registers do not overlap), but the defined/undefined information must - /// be kept separate for each individual range. - /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }. - using EntryInfoMap = DenseMap<LiveRange *, std::pair<BitVector, BitVector>>; - EntryInfoMap EntryInfos; - - /// Map each basic block where a live range is live out to the live-out value - /// and its defining block. - /// - /// For every basic block, MBB, one of these conditions shall be true: - /// - /// 1. !Seen.count(MBB->getNumber()) - /// Blocks without a Seen bit are ignored. - /// 2. LiveOut[MBB].second.getNode() == MBB - /// The live-out value is defined in MBB. - /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB] - /// The live-out value passses through MBB. All predecessors must carry - /// the same value. - /// - /// The domtree node may be null, it can be computed. - /// - /// The map can be shared by multiple live ranges as long as no two are - /// live-out of the same block. - LiveOutMap Map; - - /// LiveInBlock - Information about a basic block where a live range is known - /// to be live-in, but the value has not yet been determined. - struct LiveInBlock { - // The live range set that is live-in to this block. The algorithms can - // handle multiple non-overlapping live ranges simultaneously. - LiveRange &LR; - - // DomNode - Dominator tree node for the block. - // Cleared when the final value has been determined and LI has been updated. - MachineDomTreeNode *DomNode; - - // Position in block where the live-in range ends, or SlotIndex() if the - // range passes through the block. When the final value has been - // determined, the range from the block start to Kill will be added to LI. - SlotIndex Kill; - - // Live-in value filled in by updateSSA once it is known. - VNInfo *Value = nullptr; - - LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) - : LR(LR), DomNode(node), Kill(kill) {} - }; - - /// LiveIn - Work list of blocks where the live-in value has yet to be - /// determined. This list is typically computed by findReachingDefs() and - /// used as a work list by updateSSA(). The low-level interface may also be - /// used to add entries directly. - SmallVector<LiveInBlock, 16> LiveIn; - - /// Check if the entry to block @p MBB can be reached by any of the defs - /// in @p LR. Return true if none of the defs reach the entry to @p MBB. - bool isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs, - MachineBasicBlock &MBB, BitVector &DefOnEntry, - BitVector &UndefOnEntry); - - /// Find the set of defs that can reach @p Kill. @p Kill must belong to - /// @p UseMBB. - /// - /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill, - /// all paths from the def to @p UseMBB are added to @p LR, and the function - /// returns true. - /// - /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be - /// live in are added to the LiveIn array, and the function returns false. - /// - /// The array @p Undef provides the locations where the range @p LR becomes - /// undefined by <def,read-undef> operands on other subranges. If @p Undef - /// is non-empty and @p Kill is jointly dominated only by the entries of - /// @p Undef, the function returns false. - /// - /// PhysReg, when set, is used to verify live-in lists on basic blocks. - bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, - SlotIndex Use, unsigned PhysReg, - ArrayRef<SlotIndex> Undefs); - - /// updateSSA - Compute the values that will be live in to all requested - /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. - /// - /// Every live-in block must be jointly dominated by the added live-out - /// blocks. No values are read from the live ranges. - void updateSSA(); - - /// Transfer information from the LiveIn vector to the live ranges and update - /// the given @p LiveOuts. - void updateFromLiveIns(); - - /// Extend the live range of @p LR to reach all uses of Reg. - /// - /// If @p LR is a main range, or if @p LI is null, then all uses must be - /// jointly dominated by the definitions from @p LR. If @p LR is a subrange - /// of the live interval @p LI, corresponding to lane mask @p LaneMask, - /// all uses must be jointly dominated by the definitions from @p LR - /// together with definitions of other lanes where @p LR becomes undefined - /// (via <def,read-undef> operands). - /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e. - /// LaneBitmask::getAll(). - void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask, - LiveInterval *LI = nullptr); - - /// Reset Map and Seen fields. - void resetLiveOutMap(); - -public: - LiveRangeCalc() = default; - - //===--------------------------------------------------------------------===// - // High-level interface. - //===--------------------------------------------------------------------===// - // - // Calculate live ranges from scratch. - // - - /// reset - Prepare caches for a new set of non-overlapping live ranges. The - /// caches must be reset before attempting calculations with a live range - /// that may overlap a previously computed live range, and before the first - /// live range in a function. If live ranges are not known to be - /// non-overlapping, call reset before each. - void reset(const MachineFunction *mf, SlotIndexes *SI, - MachineDominatorTree *MDT, VNInfo::Allocator *VNIA); - - //===--------------------------------------------------------------------===// - // Mid-level interface. - //===--------------------------------------------------------------------===// - // - // Modify existing live ranges. - // - - /// Extend the live range of @p LR to reach @p Use. - /// - /// The existing values in @p LR must be live so they jointly dominate @p Use. - /// If @p Use is not dominated by a single existing value, PHI-defs are - /// inserted as required to preserve SSA form. - /// - /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg, - ArrayRef<SlotIndex> Undefs); - - /// createDeadDefs - Create a dead def in LI for every def operand of Reg. - /// Each instruction defining Reg gets a new VNInfo with a corresponding - /// minimal live range. - void createDeadDefs(LiveRange &LR, unsigned Reg); - - /// Extend the live range of @p LR to reach all uses of Reg. - /// - /// All uses must be jointly dominated by existing liveness. PHI-defs are - /// inserted as needed to preserve SSA form. - void extendToUses(LiveRange &LR, unsigned PhysReg) { - extendToUses(LR, PhysReg, LaneBitmask::getAll()); - } - - /// Calculates liveness for the register specified in live interval @p LI. - /// Creates subregister live ranges as needed if subreg liveness tracking is - /// enabled. - void calculate(LiveInterval &LI, bool TrackSubRegs); - - /// For live interval \p LI with correct SubRanges construct matching - /// information for the main live range. Expects the main live range to not - /// have any segments or value numbers. - void constructMainRangeFromSubranges(LiveInterval &LI); - - //===--------------------------------------------------------------------===// - // Low-level interface. - //===--------------------------------------------------------------------===// - // - // These functions can be used to compute live ranges where the live-in and - // live-out blocks are already known, but the SSA value in each block is - // unknown. - // - // After calling reset(), add known live-out values and known live-in blocks. - // Then call calculateValues() to compute the actual value that is - // live-in to each block, and add liveness to the live ranges. - // - - /// setLiveOutValue - Indicate that VNI is live out from MBB. The - /// calculateValues() function will not add liveness for MBB, the caller - /// should take care of that. - /// - /// VNI may be null only if MBB is a live-through block also passed to - /// addLiveInBlock(). - void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { - Seen.set(MBB->getNumber()); - Map[MBB] = LiveOutPair(VNI, nullptr); - } - - /// addLiveInBlock - Add a block with an unknown live-in value. This - /// function can only be called once per basic block. Once the live-in value - /// has been determined, calculateValues() will add liveness to LI. - /// - /// @param LR The live range that is live-in to the block. - /// @param DomNode The domtree node for the block. - /// @param Kill Index in block where LI is killed. If the value is - /// live-through, set Kill = SLotIndex() and also call - /// setLiveOutValue(MBB, 0). - void addLiveInBlock(LiveRange &LR, - MachineDomTreeNode *DomNode, - SlotIndex Kill = SlotIndex()) { - LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); - } - - /// calculateValues - Calculate the value that will be live-in to each block - /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA - /// form. Add liveness to all live-in blocks up to the Kill point, or the - /// whole block for live-through blocks. - /// - /// Every predecessor of a live-in block must have been given a value with - /// setLiveOutValue, the value may be null for live-trough blocks. - void calculateValues(); - - /// A diagnostic function to check if the end of the block @p MBB is - /// jointly dominated by the blocks corresponding to the slot indices - /// in @p Defs. This function is mainly for use in self-verification - /// checks. - LLVM_ATTRIBUTE_UNUSED - static bool isJointlyDominated(const MachineBasicBlock *MBB, - ArrayRef<SlotIndex> Defs, - const SlotIndexes &Indexes); -}; - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_LIVERANGECALC_H diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 882e562ba95c..34bac082bcd7 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -32,7 +32,7 @@ void LiveRangeEdit::Delegate::anchor() { } LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, bool createSubRanges) { - unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); @@ -52,7 +52,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, } unsigned LiveRangeEdit::createFrom(unsigned OldReg) { - unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } @@ -114,7 +114,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, continue; // We can't remat physreg uses, unless it is a constant. - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(MO.getReg())) { if (MRI.isConstantPhysReg(MO.getReg())) continue; return false; @@ -232,7 +232,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, LLVM_DEBUG(dbgs() << " folded: " << *FoldMI); LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); if (UseMI->isCall()) - UseMI->getMF()->updateCallSiteInfo(UseMI, FoldMI); + UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI); UseMI->eraseFromParent(); DefMI->addRegisterDead(LI->reg, nullptr); Dead.push_back(DefMI); @@ -308,8 +308,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg()) continue; - unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MOI->getReg(); + if (!Register::isVirtualRegister(Reg)) { // Check if MI reads any unreserved physregs. if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; @@ -349,7 +349,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // Remove all operands that aren't physregs. for (unsigned i = MI->getNumOperands(); i; --i) { const MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) continue; MI->RemoveOperand(i-1); } diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp index 8818f1ce0ad9..cbf112ee2bd5 100644 --- a/lib/CodeGen/LiveRangeShrink.cpp +++ b/lib/CodeGen/LiveRangeShrink.cpp @@ -172,10 +172,10 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDead() || MO.isDebug()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Do not move the instruction if it def/uses a physical register, // unless it is a constant physical register or a noreg. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { if (!Reg || MRI.isConstantPhysReg(Reg)) continue; Insert = nullptr; diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index ce99e5535c25..72c79e5f8a75 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -118,7 +118,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { } void LiveRegMatrix::unassign(LiveInterval &VirtReg) { - unsigned PhysReg = VRM->getPhys(VirtReg.reg); + Register PhysReg = VRM->getPhys(VirtReg.reg); LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from " << printReg(PhysReg, TRI) << ':'); VRM->clearVirt(VirtReg.reg); diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index 6afb7fb7aa11..97763def1f40 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -47,8 +47,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { if (O->isReg()) { if (!O->isDef() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; removeReg(Reg); } else if (O->isRegMask()) @@ -59,8 +59,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (!O->isReg() || !O->readsReg() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; addReg(Reg); } @@ -70,8 +70,8 @@ void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (!O->isDef() && !O->readsReg()) continue; diff --git a/lib/CodeGen/LiveStacks.cpp b/lib/CodeGen/LiveStacks.cpp index f55977d72723..8df84ebf4f06 100644 --- a/lib/CodeGen/LiveStacks.cpp +++ b/lib/CodeGen/LiveStacks.cpp @@ -58,9 +58,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { assert(Slot >= 0 && "Spill slot indice must be >= 0"); SS2IntervalMap::iterator I = S2IMap.find(Slot); if (I == S2IMap.end()) { - I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot), - std::forward_as_tuple( - TargetRegisterInfo::index2StackSlot(Slot), 0.0F)) + I = S2IMap + .emplace( + std::piecewise_construct, std::forward_as_tuple(Slot), + std::forward_as_tuple(Register::index2StackSlot(Slot), 0.0F)) .first; S2RCMap.insert(std::make_pair(Slot, RC)); } else { diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index aaff982ef1b0..9bd55c6f750f 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -26,6 +26,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -82,7 +83,7 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const { /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { - assert(TargetRegisterInfo::isVirtualRegister(RegIdx) && + assert(Register::isVirtualRegister(RegIdx) && "getVarInfo: not a virtual register!"); VirtRegInfo.grow(RegIdx); return VirtRegInfo[RegIdx]; @@ -214,7 +215,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, MachineOperand &MO = LastDef->getOperand(i); if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) continue; - unsigned DefReg = MO.getReg(); + Register DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) @@ -519,10 +520,9 @@ void LiveVariables::runOnInstr(MachineInstr &MI, } if (!MO.isReg() || MO.getReg() == 0) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (MO.isUse()) { - if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && - MRI->isReserved(MOReg))) + if (!(Register::isPhysicalRegister(MOReg) && MRI->isReserved(MOReg))) MO.setIsKill(false); if (MO.readsReg()) UseRegs.push_back(MOReg); @@ -530,8 +530,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, assert(MO.isDef()); // FIXME: We should not remove any dead flags. However the MIPS RDDSP // instruction needs it at the moment: http://llvm.org/PR27116. - if (TargetRegisterInfo::isPhysicalRegister(MOReg) && - !MRI->isReserved(MOReg)) + if (Register::isPhysicalRegister(MOReg) && !MRI->isReserved(MOReg)) MO.setIsDead(false); DefRegs.push_back(MOReg); } @@ -541,7 +540,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, // Process all uses. for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { unsigned MOReg = UseRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) + if (Register::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegUse(MOReg, MI); @@ -554,7 +553,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { unsigned MOReg = DefRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) + if (Register::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegDef(MOReg, &MI, Defs); @@ -566,7 +565,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { // Mark live-in registers as live-in. SmallVector<unsigned, 4> Defs; for (const auto &LI : MBB->liveins()) { - assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) && + assert(Register::isPhysicalRegister(LI.PhysReg) && "Cannot have a live-in virtual register!"); HandlePhysRegDef(LI.PhysReg, nullptr, Defs); } @@ -654,7 +653,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Convert and transfer the dead / killed information we have gathered into // VirtRegInfo onto MI's. for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { - const unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + const unsigned Reg = Register::index2VirtReg(i); for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); @@ -692,8 +691,8 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) { MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isKill()) { MO.setIsKill(false); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) { bool removed = getVarInfo(Reg).removeKill(MI); assert(removed && "kill not in register's VarInfo?"); (void)removed; @@ -783,7 +782,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, for (; BBI != BBE; ++BBI) { for (MachineInstr::mop_iterator I = BBI->operands_begin(), E = BBI->operands_end(); I != E; ++I) { - if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) { + if (I->isReg() && Register::isVirtualRegister(I->getReg())) { if (I->isDef()) Defs.insert(I->getReg()); else if (I->isKill()) @@ -794,7 +793,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, // Update info for all live variables for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); // If the Defs is defined in the successor it can't be live in BB. if (Defs.count(Reg)) diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index b14d76a585f7..2392d4d00b56 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -261,7 +261,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Remember how big this blob of stack space is MFI.setLocalFrameSize(Offset); - MFI.setLocalFrameMaxAlign(MaxAlign); + MFI.setLocalFrameMaxAlign(assumeAligned(MaxAlign)); } static inline bool @@ -351,6 +351,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { assert(MFI.isObjectPreAllocated(FrameIdx) && "Only pre-allocated locals expected!"); + // We need to keep the references to the stack protector slot through frame + // index operands so that it gets resolved by PEI rather than this pass. + // This avoids accesses to the stack protector though virtual base + // registers, and forces PEI to address it using fp/sp/bp. + if (MFI.hasStackProtectorIndex() && + FrameIdx == MFI.getStackProtectorIndex()) + continue; + LLVM_DEBUG(dbgs() << "Considering: " << MI); unsigned idx = 0; diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp index c8cf6abda4fc..ed48365b0102 100644 --- a/lib/CodeGen/LowerEmuTLS.cpp +++ b/lib/CodeGen/LowerEmuTLS.cpp @@ -142,7 +142,7 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer"); EmuTlsTmplVar->setConstant(true); EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue)); - EmuTlsTmplVar->setAlignment(GVAlignment); + EmuTlsTmplVar->setAlignment(Align(GVAlignment)); copyLinkageVisibility(M, GV, EmuTlsTmplVar); } @@ -155,9 +155,8 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { ArrayRef<Constant*> ElementValueArray(ElementValues, 4); EmuTlsVar->setInitializer( ConstantStruct::get(EmuTlsVarType, ElementValueArray)); - unsigned MaxAlignment = std::max( - DL.getABITypeAlignment(WordType), - DL.getABITypeAlignment(VoidPtrType)); + Align MaxAlignment(std::max(DL.getABITypeAlignment(WordType), + DL.getABITypeAlignment(VoidPtrType))); EmuTlsVar->setAlignment(MaxAlignment); return true; } diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp index f49bc854e23f..c9bb5461aa3c 100644 --- a/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -23,12 +23,14 @@ // //===----------------------------------------------------------------------===// +#include "MIRVRegNamerUtils.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <queue> @@ -71,28 +73,6 @@ public: } // end anonymous namespace -enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; -class TypedVReg { - VRType type; - unsigned reg; - -public: - TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} - TypedVReg(VRType type) : type(type), reg(~0U) { - assert(type != RSE_Reg && "Expected a non-register type."); - } - - bool isReg() const { return type == RSE_Reg; } - bool isFrameIndex() const { return type == RSE_FrameIndex; } - bool isCandidate() const { return type == RSE_NewCandidate; } - - VRType getType() const { return type; } - unsigned getReg() const { - assert(this->isReg() && "Expected a virtual or physical register."); - return reg; - } -}; - char MIRCanonicalizer::ID; char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; @@ -190,7 +170,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, if (!MO.isReg()) continue; - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (Register::isVirtualRegister(MO.getReg())) continue; if (!MO.isDef()) @@ -207,7 +187,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, continue; MachineOperand &MO = II->getOperand(0); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; if (!MO.isDef()) continue; @@ -220,7 +200,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, } if (II->getOperand(i).isReg()) { - if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg())) + if (!Register::isVirtualRegister(II->getOperand(i).getReg())) if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == PhysRegDefs.end()) { continue; @@ -340,12 +320,12 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) { if (!MI->getOperand(1).isReg()) continue; - const unsigned Dst = MI->getOperand(0).getReg(); - const unsigned Src = MI->getOperand(1).getReg(); + const Register Dst = MI->getOperand(0).getReg(); + const Register Src = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Dst)) + if (!Register::isVirtualRegister(Dst)) continue; - if (!TargetRegisterInfo::isVirtualRegister(Src)) + if (!Register::isVirtualRegister(Src)) continue; // Not folding COPY instructions if regbankselect has not set the RCs. // Why are we only considering Register Classes? Because the verifier @@ -370,258 +350,6 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) { return Changed; } -/// Here we find our candidates. What makes an interesting candidate? -/// An candidate for a canonicalization tree root is normally any kind of -/// instruction that causes side effects such as a store to memory or a copy to -/// a physical register or a return instruction. We use these as an expression -/// tree root that we walk inorder to build a canonical walk which should result -/// in canoncal vreg renaming. -static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { - std::vector<MachineInstr *> Candidates; - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - - for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - MachineInstr *MI = &*II; - - bool DoesMISideEffect = false; - - if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { - const unsigned Dst = MI->getOperand(0).getReg(); - DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); - - for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { - if (DoesMISideEffect) - break; - DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); - } - } - - if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) - continue; - - LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump();); - Candidates.push_back(MI); - } - - return Candidates; -} - -static void doCandidateWalk(std::vector<TypedVReg> &VRegs, - std::queue<TypedVReg> &RegQueue, - std::vector<MachineInstr *> &VisitedMIs, - const MachineBasicBlock *MBB) { - - const MachineFunction &MF = *MBB->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - while (!RegQueue.empty()) { - - auto TReg = RegQueue.front(); - RegQueue.pop(); - - if (TReg.isFrameIndex()) { - LLVM_DEBUG(dbgs() << "Popping frame index.\n";); - VRegs.push_back(TypedVReg(RSE_FrameIndex)); - continue; - } - - assert(TReg.isReg() && "Expected vreg or physreg."); - unsigned Reg = TReg.getReg(); - - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - LLVM_DEBUG({ - dbgs() << "Popping vreg "; - MRI.def_begin(Reg)->dump(); - dbgs() << "\n"; - }); - - if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { - return TR.isReg() && TR.getReg() == Reg; - })) { - VRegs.push_back(TypedVReg(Reg)); - } - } else { - LLVM_DEBUG(dbgs() << "Popping physreg.\n";); - VRegs.push_back(TypedVReg(Reg)); - continue; - } - - for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { - MachineInstr *Def = RI->getParent(); - - if (Def->getParent() != MBB) - continue; - - if (llvm::any_of(VisitedMIs, - [&](const MachineInstr *VMI) { return Def == VMI; })) { - break; - } - - LLVM_DEBUG({ - dbgs() << "\n========================\n"; - dbgs() << "Visited MI: "; - Def->dump(); - dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; - dbgs() << "\n========================\n"; - }); - VisitedMIs.push_back(Def); - for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { - - MachineOperand &MO = Def->getOperand(I); - if (MO.isFI()) { - LLVM_DEBUG(dbgs() << "Pushing frame index.\n";); - RegQueue.push(TypedVReg(RSE_FrameIndex)); - } - - if (!MO.isReg()) - continue; - RegQueue.push(TypedVReg(MO.getReg())); - } - } - } -} - -namespace { -class NamedVRegCursor { - MachineRegisterInfo &MRI; - unsigned virtualVRegNumber; - -public: - NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {} - - void SkipVRegs() { - unsigned VRegGapIndex = 1; - if (!virtualVRegNumber) { - VRegGapIndex = 0; - virtualVRegNumber = MRI.createIncompleteVirtualRegister(); - } - const unsigned VR_GAP = (++VRegGapIndex * 1000); - - unsigned I = virtualVRegNumber; - const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; - - virtualVRegNumber = E; - } - - unsigned getVirtualVReg() const { return virtualVRegNumber; } - - unsigned incrementVirtualVReg(unsigned incr = 1) { - virtualVRegNumber += incr; - return virtualVRegNumber; - } - - unsigned createVirtualRegister(unsigned VReg) { - if (!virtualVRegNumber) - SkipVRegs(); - std::string S; - raw_string_ostream OS(S); - OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); - OS.flush(); - virtualVRegNumber++; - if (auto RC = MRI.getRegClassOrNull(VReg)) - return MRI.createVirtualRegister(RC, OS.str()); - return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str()); - } -}; -} // namespace - -static std::map<unsigned, unsigned> -GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, - const std::vector<unsigned> &renamedInOtherBB, - MachineRegisterInfo &MRI, NamedVRegCursor &NVC) { - std::map<unsigned, unsigned> VRegRenameMap; - bool FirstCandidate = true; - - for (auto &vreg : VRegs) { - if (vreg.isFrameIndex()) { - // We skip one vreg for any frame index because there is a good chance - // (especially when comparing SelectionDAG to GlobalISel generated MIR) - // that in the other file we are just getting an incoming vreg that comes - // from a copy from a frame index. So it's safe to skip by one. - unsigned LastRenameReg = NVC.incrementVirtualVReg(); - (void)LastRenameReg; - LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); - continue; - } else if (vreg.isCandidate()) { - - // After the first candidate, for every subsequent candidate, we skip mod - // 10 registers so that the candidates are more likely to start at the - // same vreg number making it more likely that the canonical walk from the - // candidate insruction. We don't need to skip from the first candidate of - // the BasicBlock because we already skip ahead several vregs for each BB. - unsigned LastRenameReg = NVC.getVirtualVReg(); - if (FirstCandidate) - NVC.incrementVirtualVReg(LastRenameReg % 10); - FirstCandidate = false; - continue; - } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { - unsigned LastRenameReg = NVC.incrementVirtualVReg(); - (void)LastRenameReg; - LLVM_DEBUG({ - dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; - }); - continue; - } - - auto Reg = vreg.getReg(); - if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { - LLVM_DEBUG(dbgs() << "Vreg " << Reg - << " already renamed in other BB.\n";); - continue; - } - - auto Rename = NVC.createVirtualRegister(Reg); - - if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { - LLVM_DEBUG(dbgs() << "Mapping vreg ";); - if (MRI.reg_begin(Reg) != MRI.reg_end()) { - LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); - } else { - LLVM_DEBUG(dbgs() << Reg;); - } - LLVM_DEBUG(dbgs() << " to ";); - if (MRI.reg_begin(Rename) != MRI.reg_end()) { - LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); - } else { - LLVM_DEBUG(dbgs() << Rename;); - } - LLVM_DEBUG(dbgs() << "\n";); - - VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); - } - } - - return VRegRenameMap; -} - -static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB, - const std::map<unsigned, unsigned> &VRegRenameMap, - MachineRegisterInfo &MRI) { - bool Changed = false; - for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { - - auto VReg = I->first; - auto Rename = I->second; - - RenamedInOtherBB.push_back(Rename); - - std::vector<MachineOperand *> RenameMOs; - for (auto &MO : MRI.reg_operands(VReg)) { - RenameMOs.push_back(&MO); - } - - for (auto *MO : RenameMOs) { - Changed = true; - MO->setReg(Rename); - - if (!MO->isDef()) - MO->setIsKill(false); - } - } - - return Changed; -} - static bool doDefKillClear(MachineBasicBlock *MBB) { bool Changed = false; @@ -646,9 +374,7 @@ static bool doDefKillClear(MachineBasicBlock *MBB) { static bool runOnBasicBlock(MachineBasicBlock *MBB, std::vector<StringRef> &bbNames, - std::vector<unsigned> &renamedInOtherBB, - unsigned &basicBlockNum, unsigned &VRegGapIndex, - NamedVRegCursor &NVC) { + unsigned &basicBlockNum, NamedVRegCursor &NVC) { if (CanonicalizeBasicBlockNumber != ~0U) { if (CanonicalizeBasicBlockNumber != basicBlockNum++) @@ -687,74 +413,20 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, Changed |= rescheduleCanonically(IdempotentInstCount, MBB); LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); - std::vector<MachineInstr *> Candidates = populateCandidates(MBB); - std::vector<MachineInstr *> VisitedMIs; - llvm::copy(Candidates, std::back_inserter(VisitedMIs)); - - std::vector<TypedVReg> VRegs; - for (auto candidate : Candidates) { - VRegs.push_back(TypedVReg(RSE_NewCandidate)); - - std::queue<TypedVReg> RegQueue; - - // Here we walk the vreg operands of a non-root node along our walk. - // The root nodes are the original candidates (stores normally). - // These are normally not the root nodes (except for the case of copies to - // physical registers). - for (unsigned i = 1; i < candidate->getNumOperands(); i++) { - if (candidate->mayStore() || candidate->isBranch()) - break; - - MachineOperand &MO = candidate->getOperand(i); - if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) - continue; - - LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); - RegQueue.push(TypedVReg(MO.getReg())); - } - - // Here we walk the root candidates. We start from the 0th operand because - // the root is normally a store to a vreg. - for (unsigned i = 0; i < candidate->getNumOperands(); i++) { - - if (!candidate->mayStore() && !candidate->isBranch()) - break; - - MachineOperand &MO = candidate->getOperand(i); - - // TODO: Do we want to only add vregs here? - if (!MO.isReg() && !MO.isFI()) - continue; - - LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); - - RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) - : TypedVReg(RSE_FrameIndex)); - } - - doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); - } - - // If we have populated no vregs to rename then bail. - // The rest of this function does the vreg remaping. - if (VRegs.size() == 0) - return Changed; - - auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC); - Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + Changed |= NVC.renameVRegs(MBB); // Here we renumber the def vregs for the idempotent instructions from the top // of the MachineBasicBlock so that they are named in the order that we sorted // them alphabetically. Eventually we wont need SkipVRegs because we will use // named vregs instead. if (IdempotentInstCount) - NVC.SkipVRegs(); + NVC.skipVRegs(); auto MII = MBB->begin(); for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { MachineInstr &MI = *MII++; Changed = true; - unsigned vRegToRename = MI.getOperand(0).getReg(); + Register vRegToRename = MI.getOperand(0).getReg(); auto Rename = NVC.createVirtualRegister(vRegToRename); std::vector<MachineOperand *> RenameMOs; @@ -799,9 +471,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { << "\n\n================================================\n\n";); std::vector<StringRef> BBNames; - std::vector<unsigned> RenamedInOtherBB; - unsigned GapIdx = 0; unsigned BBNum = 0; bool Changed = false; @@ -809,8 +479,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); NamedVRegCursor NVC(MRI); for (auto MBB : RPOList) - Changed |= - runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx, NVC); + Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC); return Changed; } diff --git a/lib/CodeGen/MIRNamerPass.cpp b/lib/CodeGen/MIRNamerPass.cpp new file mode 100644 index 000000000000..9d719f3917ce --- /dev/null +++ b/lib/CodeGen/MIRNamerPass.cpp @@ -0,0 +1,77 @@ +//===----------------------- MIRNamer.cpp - MIR Namer ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The purpose of this pass is to rename virtual register operands with the goal +// of making it easier to author easier to read tests for MIR. This pass reuses +// the vreg renamer used by MIRCanonicalizerPass. +// +// Basic Usage: +// +// llc -o - -run-pass mir-namer example.mir +// +//===----------------------------------------------------------------------===// + +#include "MIRVRegNamerUtils.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +namespace llvm { +extern char &MIRNamerID; +} // namespace llvm + +#define DEBUG_TYPE "mir-namer" + +namespace { + +class MIRNamer : public MachineFunctionPass { +public: + static char ID; + MIRNamer() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Rename virtual register operands"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + bool Changed = false; + + if (MF.empty()) + return Changed; + + NamedVRegCursor NVC(MF.getRegInfo()); + + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + for (auto &MBB : RPOT) + Changed |= NVC.renameVRegs(MBB); + + return Changed; + } +}; + +} // end anonymous namespace + +char MIRNamer::ID; + +char &llvm::MIRNamerID = MIRNamer::ID; + +INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false, + false) + +INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false, + false) diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index 4899bd3f5811..ad5c617623f2 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -249,6 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("successors", MIToken::kw_successors) .Case("floatpred", MIToken::kw_floatpred) .Case("intpred", MIToken::kw_intpred) + .Case("shufflemask", MIToken::kw_shufflemask) .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) .Case("unknown-size", MIToken::kw_unknown_size) diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index 0fe3f9f706db..200f9d026cc8 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -117,6 +117,7 @@ struct MIToken { kw_successors, kw_floatpred, kw_intpred, + kw_shufflemask, kw_pre_instr_symbol, kw_post_instr_symbol, kw_unknown_size, @@ -146,6 +147,7 @@ struct MIToken { IntegerLiteral, FloatingPointLiteral, HexLiteral, + VectorLiteral, VirtualRegister, ConstantPoolItem, JumpTableIndex, diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c0b800a0b870..6498acc9fa51 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -451,6 +451,7 @@ public: bool parseBlockAddressOperand(MachineOperand &Dest); bool parseIntrinsicOperand(MachineOperand &Dest); bool parsePredicateOperand(MachineOperand &Dest); + bool parseShuffleMaskOperand(MachineOperand &Dest); bool parseTargetIndexOperand(MachineOperand &Dest); bool parseCustomRegisterMaskOperand(MachineOperand &Dest); bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); @@ -640,7 +641,7 @@ bool MIParser::parseBasicBlockDefinition( return error(Loc, Twine("redefinition of machine basic block with id #") + Twine(ID)); if (Alignment) - MBB->setAlignment(Alignment); + MBB->setAlignment(Align(Alignment)); if (HasAddressTaken) MBB->setHasAddressTaken(); MBB->setIsEHPad(IsLandingPad); @@ -1078,7 +1079,7 @@ static const char *printImplicitRegisterFlag(const MachineOperand &MO) { static std::string getRegisterName(const TargetRegisterInfo *TRI, unsigned Reg) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg"); + assert(Register::isPhysicalRegister(Reg) && "expected phys reg"); return StringRef(TRI->getName(Reg)).lower(); } @@ -1408,11 +1409,11 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (Token.is(MIToken::dot)) { if (parseSubRegisterIndex(SubReg)) return true; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return error("subregister index expects a virtual register"); } if (Token.is(MIToken::colon)) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return error("register class specification expects a virtual register"); lex(); if (parseRegisterClassOrBank(*RegInfo)) @@ -1436,12 +1437,13 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) return error("inconsistent type for generic virtual register"); + MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr)); MRI.setType(Reg, Ty); } } } else if (consumeIfPresent(MIToken::lparen)) { // Virtual registers may have a tpe with GlobalISel. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return error("unexpected type on physical register"); LLT Ty; @@ -1454,8 +1456,9 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) return error("inconsistent type for generic virtual register"); + MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr)); MRI.setType(Reg, Ty); - } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (Register::isVirtualRegister(Reg)) { // Generic virtual registers must have a type. // If we end up here this means the type hasn't been specified and // this is bad! @@ -2285,6 +2288,49 @@ bool MIParser::parsePredicateOperand(MachineOperand &Dest) { return false; } +bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_shufflemask)); + + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected syntax shufflemask(<integer or undef>, ...)"); + + SmallVector<Constant *, 32> ShufMask; + LLVMContext &Ctx = MF.getFunction().getContext(); + Type *I32Ty = Type::getInt32Ty(Ctx); + + bool AllZero = true; + bool AllUndef = true; + + do { + if (Token.is(MIToken::kw_undef)) { + ShufMask.push_back(UndefValue::get(I32Ty)); + AllZero = false; + } else if (Token.is(MIToken::IntegerLiteral)) { + AllUndef = false; + const APSInt &Int = Token.integerValue(); + if (!Int.isNullValue()) + AllZero = false; + ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue())); + } else + return error("expected integer constant"); + + lex(); + } while (consumeIfPresent(MIToken::comma)); + + if (expectAndConsume(MIToken::rparen)) + return error("shufflemask should be terminated by ')'."); + + if (AllZero || AllUndef) { + VectorType *VT = VectorType::get(I32Ty, ShufMask.size()); + Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT); + Dest = MachineOperand::CreateShuffleMask(C); + } else + Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask)); + + return false; +} + bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { assert(Token.is(MIToken::kw_target_index)); lex(); @@ -2432,6 +2478,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, case MIToken::kw_floatpred: case MIToken::kw_intpred: return parsePredicateOperand(Dest); + case MIToken::kw_shufflemask: + return parseShuffleMaskOperand(Dest); case MIToken::Error: return true; case MIToken::Identifier: diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index b242934def80..55fac93d8991 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -216,7 +216,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() { return nullptr; // Create an empty module when the MIR file is empty. NoMIRDocuments = true; - return llvm::make_unique<Module>(Filename, Context); + return std::make_unique<Module>(Filename, Context); } std::unique_ptr<Module> M; @@ -236,7 +236,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() { NoMIRDocuments = true; } else { // Create an new, empty module. - M = llvm::make_unique<Module>(Filename, Context); + M = std::make_unique<Module>(Filename, Context); NoLLVMIR = true; } return M; @@ -306,7 +306,7 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { static bool isSSA(const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg)) return false; } @@ -355,10 +355,10 @@ bool MIRParserImpl::initializeCallSiteInfo( if (MILoc.Offset >= CallB->size()) return error(Twine(MF.getName()) + Twine(" call instruction offset out of range.") + - "Unable to reference instruction at bb: " + + " Unable to reference instruction at bb: " + Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset)); - auto CallI = std::next(CallB->begin(), MILoc.Offset); - if (!CallI->isCall()) + auto CallI = std::next(CallB->instr_begin(), MILoc.Offset); + if (!CallI->isCall(MachineInstr::IgnoreBundle)) return error(Twine(MF.getName()) + Twine(" call site info should reference call " "instruction. Instruction at bb:") + @@ -393,7 +393,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, } if (YamlMF.Alignment) - MF.setAlignment(YamlMF.Alignment); + MF.setAlignment(Align(YamlMF.Alignment)); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasWinCFI(YamlMF.HasWinCFI); @@ -949,6 +949,6 @@ llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents, "Can't read MIR with a Context that discards named Values"))); return nullptr; } - return llvm::make_unique<MIRParser>( - llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context)); + return std::make_unique<MIRParser>( + std::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context)); } diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 0a95a0ced0f5..1a4e21ac06a9 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -197,7 +197,7 @@ void MIRPrinter::print(const MachineFunction &MF) { yaml::MachineFunction YamlMF; YamlMF.Name = MF.getName(); - YamlMF.Alignment = MF.getAlignment(); + YamlMF.Alignment = MF.getAlignment().value(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasWinCFI = MF.hasWinCFI(); @@ -290,7 +290,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, // Print the virtual register definitions. for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); yaml::VirtualRegisterDefinition VReg; VReg.ID = I; if (RegInfo.getVRegName(Reg) != "") @@ -473,10 +473,11 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF, yaml::CallSiteInfo::MachineInstrLoc CallLocation; // Prepare instruction position. - MachineBasicBlock::const_iterator CallI = CSInfo.first->getIterator(); + MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator(); CallLocation.BlockNum = CallI->getParent()->getNumber(); // Get call instruction offset from the beginning of block. - CallLocation.Offset = std::distance(CallI->getParent()->begin(), CallI); + CallLocation.Offset = + std::distance(CallI->getParent()->instr_begin(), CallI); YmlCS.CallLocation = CallLocation; // Construct call arguments and theirs forwarding register info. for (auto ArgReg : CSInfo.second) { @@ -628,9 +629,9 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { OS << "landing-pad"; HasAttributes = true; } - if (MBB.getAlignment()) { + if (MBB.getAlignment() != Align::None()) { OS << (HasAttributes ? ", " : " ("); - OS << "align " << MBB.getAlignment(); + OS << "align " << MBB.getAlignment().value(); HasAttributes = true; } if (HasAttributes) @@ -842,7 +843,8 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, case MachineOperand::MO_CFIIndex: case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: - case MachineOperand::MO_BlockAddress: { + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ShuffleMask: { unsigned TiedOperandIdx = 0; if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef()) TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx); diff --git a/lib/CodeGen/MIRVRegNamerUtils.cpp b/lib/CodeGen/MIRVRegNamerUtils.cpp new file mode 100644 index 000000000000..6629000f468f --- /dev/null +++ b/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -0,0 +1,348 @@ +//===---------- MIRVRegNamerUtils.cpp - MIR VReg Renaming Utilities -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MIRVRegNamerUtils.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "mir-vregnamer-utils" + +namespace { + +// TypedVReg and VRType are used to tell the renamer what to do at points in a +// sequence of values to be renamed. A TypedVReg can either contain +// an actual VReg, a FrameIndex, or it could just be a barrier for the next +// candidate (side-effecting instruction). This tells the renamer to increment +// to the next vreg name, or to skip modulo some skip-gap value. +enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; +class TypedVReg { + VRType Type; + Register Reg; + +public: + TypedVReg(Register Reg) : Type(RSE_Reg), Reg(Reg) {} + TypedVReg(VRType Type) : Type(Type), Reg(~0U) { + assert(Type != RSE_Reg && "Expected a non-Register Type."); + } + + bool isReg() const { return Type == RSE_Reg; } + bool isFrameIndex() const { return Type == RSE_FrameIndex; } + bool isCandidate() const { return Type == RSE_NewCandidate; } + + VRType getType() const { return Type; } + Register getReg() const { + assert(this->isReg() && "Expected a virtual or physical Register."); + return Reg; + } +}; + +/// Here we find our candidates. What makes an interesting candidate? +/// A candidate for a canonicalization tree root is normally any kind of +/// instruction that causes side effects such as a store to memory or a copy to +/// a physical register or a return instruction. We use these as an expression +/// tree root that we walk in order to build a canonical walk which should +/// result in canonical vreg renaming. +std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { + std::vector<MachineInstr *> Candidates; + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { + MachineInstr *MI = &*II; + + bool DoesMISideEffect = false; + + if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { + const Register Dst = MI->getOperand(0).getReg(); + DoesMISideEffect |= !Register::isVirtualRegister(Dst); + + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { + if (DoesMISideEffect) + break; + DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); + } + } + + if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) + continue; + + LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump();); + Candidates.push_back(MI); + } + + return Candidates; +} + +void doCandidateWalk(std::vector<TypedVReg> &VRegs, + std::queue<TypedVReg> &RegQueue, + std::vector<MachineInstr *> &VisitedMIs, + const MachineBasicBlock *MBB) { + + const MachineFunction &MF = *MBB->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + while (!RegQueue.empty()) { + + auto TReg = RegQueue.front(); + RegQueue.pop(); + + if (TReg.isFrameIndex()) { + LLVM_DEBUG(dbgs() << "Popping frame index.\n";); + VRegs.push_back(TypedVReg(RSE_FrameIndex)); + continue; + } + + assert(TReg.isReg() && "Expected vreg or physreg."); + Register Reg = TReg.getReg(); + + if (Register::isVirtualRegister(Reg)) { + LLVM_DEBUG({ + dbgs() << "Popping vreg "; + MRI.def_begin(Reg)->dump(); + dbgs() << "\n"; + }); + + if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { + return TR.isReg() && TR.getReg() == Reg; + })) { + VRegs.push_back(TypedVReg(Reg)); + } + } else { + LLVM_DEBUG(dbgs() << "Popping physreg.\n";); + VRegs.push_back(TypedVReg(Reg)); + continue; + } + + for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { + MachineInstr *Def = RI->getParent(); + + if (Def->getParent() != MBB) + continue; + + if (llvm::any_of(VisitedMIs, + [&](const MachineInstr *VMI) { return Def == VMI; })) { + break; + } + + LLVM_DEBUG({ + dbgs() << "\n========================\n"; + dbgs() << "Visited MI: "; + Def->dump(); + dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; + dbgs() << "\n========================\n"; + }); + VisitedMIs.push_back(Def); + for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { + + MachineOperand &MO = Def->getOperand(I); + if (MO.isFI()) { + LLVM_DEBUG(dbgs() << "Pushing frame index.\n";); + RegQueue.push(TypedVReg(RSE_FrameIndex)); + } + + if (!MO.isReg()) + continue; + RegQueue.push(TypedVReg(MO.getReg())); + } + } + } +} + +std::map<unsigned, unsigned> +getVRegRenameMap(const std::vector<TypedVReg> &VRegs, + const std::vector<Register> &renamedInOtherBB, + MachineRegisterInfo &MRI, NamedVRegCursor &NVC) { + std::map<unsigned, unsigned> VRegRenameMap; + bool FirstCandidate = true; + + for (auto &vreg : VRegs) { + if (vreg.isFrameIndex()) { + // We skip one vreg for any frame index because there is a good chance + // (especially when comparing SelectionDAG to GlobalISel generated MIR) + // that in the other file we are just getting an incoming vreg that comes + // from a copy from a frame index. So it's safe to skip by one. + unsigned LastRenameReg = NVC.incrementVirtualVReg(); + (void)LastRenameReg; + LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); + continue; + } else if (vreg.isCandidate()) { + + // After the first candidate, for every subsequent candidate, we skip mod + // 10 registers so that the candidates are more likely to start at the + // same vreg number making it more likely that the canonical walk from the + // candidate insruction. We don't need to skip from the first candidate of + // the BasicBlock because we already skip ahead several vregs for each BB. + unsigned LastRenameReg = NVC.getVirtualVReg(); + if (FirstCandidate) + NVC.incrementVirtualVReg(LastRenameReg % 10); + FirstCandidate = false; + continue; + } else if (!Register::isVirtualRegister(vreg.getReg())) { + unsigned LastRenameReg = NVC.incrementVirtualVReg(); + (void)LastRenameReg; + LLVM_DEBUG({ + dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; + }); + continue; + } + + auto Reg = vreg.getReg(); + if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { + LLVM_DEBUG(dbgs() << "Vreg " << Reg + << " already renamed in other BB.\n";); + continue; + } + + auto Rename = NVC.createVirtualRegister(Reg); + + if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { + LLVM_DEBUG(dbgs() << "Mapping vreg ";); + if (MRI.reg_begin(Reg) != MRI.reg_end()) { + LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); + } else { + LLVM_DEBUG(dbgs() << Reg;); + } + LLVM_DEBUG(dbgs() << " to ";); + if (MRI.reg_begin(Rename) != MRI.reg_end()) { + LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); + } else { + LLVM_DEBUG(dbgs() << Rename;); + } + LLVM_DEBUG(dbgs() << "\n";); + + VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); + } + } + + return VRegRenameMap; +} + +bool doVRegRenaming(std::vector<Register> &renamedInOtherBB, + const std::map<unsigned, unsigned> &VRegRenameMap, + MachineRegisterInfo &MRI) { + bool Changed = false; + for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { + + auto VReg = I->first; + auto Rename = I->second; + + renamedInOtherBB.push_back(Rename); + + std::vector<MachineOperand *> RenameMOs; + for (auto &MO : MRI.reg_operands(VReg)) { + RenameMOs.push_back(&MO); + } + + for (auto *MO : RenameMOs) { + Changed = true; + MO->setReg(Rename); + + if (!MO->isDef()) + MO->setIsKill(false); + } + } + + return Changed; +} + +bool renameVRegs(MachineBasicBlock *MBB, + std::vector<Register> &renamedInOtherBB, + NamedVRegCursor &NVC) { + bool Changed = false; + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + std::vector<MachineInstr *> Candidates = populateCandidates(MBB); + std::vector<MachineInstr *> VisitedMIs; + llvm::copy(Candidates, std::back_inserter(VisitedMIs)); + + std::vector<TypedVReg> VRegs; + for (auto candidate : Candidates) { + VRegs.push_back(TypedVReg(RSE_NewCandidate)); + + std::queue<TypedVReg> RegQueue; + + // Here we walk the vreg operands of a non-root node along our walk. + // The root nodes are the original candidates (stores normally). + // These are normally not the root nodes (except for the case of copies to + // physical registers). + for (unsigned i = 1; i < candidate->getNumOperands(); i++) { + if (candidate->mayStore() || candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) + continue; + + LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); + RegQueue.push(TypedVReg(MO.getReg())); + } + + // Here we walk the root candidates. We start from the 0th operand because + // the root is normally a store to a vreg. + for (unsigned i = 0; i < candidate->getNumOperands(); i++) { + + if (!candidate->mayStore() && !candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + + // TODO: Do we want to only add vregs here? + if (!MO.isReg() && !MO.isFI()) + continue; + + LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); + + RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) + : TypedVReg(RSE_FrameIndex)); + } + + doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); + } + + // If we have populated no vregs to rename then bail. + // The rest of this function does the vreg remaping. + if (VRegs.size() == 0) + return Changed; + + auto VRegRenameMap = getVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC); + Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + return Changed; +} +} // anonymous namespace + +void NamedVRegCursor::skipVRegs() { + unsigned VRegGapIndex = 1; + if (!virtualVRegNumber) { + VRegGapIndex = 0; + virtualVRegNumber = MRI.createIncompleteVirtualRegister(); + } + const unsigned VR_GAP = (++VRegGapIndex * SkipGapSize); + + unsigned I = virtualVRegNumber; + const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; + + virtualVRegNumber = E; +} + +unsigned NamedVRegCursor::createVirtualRegister(unsigned VReg) { + if (!virtualVRegNumber) + skipVRegs(); + std::string S; + raw_string_ostream OS(S); + OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); + OS.flush(); + virtualVRegNumber++; + if (auto RC = MRI.getRegClassOrNull(VReg)) + return MRI.createVirtualRegister(RC, OS.str()); + return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str()); +} + +bool NamedVRegCursor::renameVRegs(MachineBasicBlock *MBB) { + return ::renameVRegs(MBB, RenamedInOtherBB, *this); +} diff --git a/lib/CodeGen/MIRVRegNamerUtils.h b/lib/CodeGen/MIRVRegNamerUtils.h new file mode 100644 index 000000000000..c5b52a968538 --- /dev/null +++ b/lib/CodeGen/MIRVRegNamerUtils.h @@ -0,0 +1,91 @@ + +//===------------ MIRVRegNamerUtils.h - MIR VReg Renaming Utilities -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The purpose of these utilities is to abstract out parts of the MIRCanon pass +// that are responsible for renaming virtual registers with the purpose of +// sharing code with a MIRVRegNamer pass that could be the analog of the +// opt -instnamer pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H +#define LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/raw_ostream.h" + +#include <queue> + +namespace llvm { + +/// NamedVRegCursor - The cursor is an object that keeps track of what the next +/// vreg name should be. It does book keeping to determine when to skip the +/// index value and by how much, or if the next vreg name should be an increment +/// from the previous. +class NamedVRegCursor { + MachineRegisterInfo &MRI; + + /// virtualVRegNumber - Book keeping of the last vreg position. + unsigned virtualVRegNumber; + + /// SkipGapSize - Used to calculate a modulo amount to skip by after every + /// sequence of instructions starting from a given side-effecting + /// MachineInstruction for a given MachineBasicBlock. The general idea is that + /// for a given program compiled with two different opt pipelines, there + /// shouldn't be greater than SkipGapSize difference in how many vregs are in + /// play between the two and for every def-use graph of vregs we rename we + /// will round up to the next SkipGapSize'th number so that we have a high + /// change of landing on the same name for two given matching side-effects + /// for the two compilation outcomes. + const unsigned SkipGapSize; + + /// RenamedInOtherBB - VRegs that we already renamed: ie breadcrumbs. + std::vector<Register> RenamedInOtherBB; + +public: + NamedVRegCursor() = delete; + /// 1000 for the SkipGapSize was a good heuristic at the time of the writing + /// of the MIRCanonicalizerPass. Adjust as needed. + NamedVRegCursor(MachineRegisterInfo &MRI, unsigned SkipGapSize = 1000) + : MRI(MRI), virtualVRegNumber(0), SkipGapSize(SkipGapSize) {} + + /// SkipGapSize - Skips modulo a gap value of indices. Indices are used to + /// produce the next vreg name. + void skipVRegs(); + + unsigned getVirtualVReg() const { return virtualVRegNumber; } + + /// incrementVirtualVReg - This increments an index value that us used to + /// create a new vreg name. This is not a Register. + unsigned incrementVirtualVReg(unsigned incr = 1) { + virtualVRegNumber += incr; + return virtualVRegNumber; + } + + /// createVirtualRegister - Given an existing vreg, create a named vreg to + /// take its place. + unsigned createVirtualRegister(unsigned VReg); + + /// renameVRegs - For a given MachineBasicBlock, scan for side-effecting + /// instructions, walk the def-use from each side-effecting root (in sorted + /// root order) and rename the encountered vregs in the def-use graph in a + /// canonical ordering. This method maintains book keeping for which vregs + /// were already renamed in RenamedInOtherBB. + // @return changed + bool renameVRegs(MachineBasicBlock *MBB); +}; + +} // namespace llvm + +#endif diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 4d29e883d879..854bef3aab05 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -39,6 +39,12 @@ using namespace llvm; #define DEBUG_TYPE "codegen" +static cl::opt<bool> PrintSlotIndexes( + "print-slotindexes", + cl::desc("When printing machine IR, annotate instructions and blocks with " + "SlotIndexes when available"), + cl::init(true), cl::Hidden); + MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) : BB(B), Number(-1), xParent(&MF) { Insts.Parent = this; @@ -291,7 +297,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, return; } - if (Indexes) + if (Indexes && PrintSlotIndexes) OS << Indexes->getMBBStartIdx(this) << '\t'; OS << "bb." << getNumber(); @@ -320,9 +326,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "landing-pad"; HasAttributes = true; } - if (getAlignment()) { + if (getAlignment() != Align::None()) { OS << (HasAttributes ? ", " : " ("); - OS << "align " << getAlignment(); + OS << "align " << Log2(getAlignment()); HasAttributes = true; } if (HasAttributes) @@ -402,7 +408,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, bool IsInBundle = false; for (const MachineInstr &MI : instrs()) { - if (Indexes) { + if (Indexes && PrintSlotIndexes) { if (Indexes->hasIndex(MI)) OS << Indexes->getInstructionIndex(MI); OS << '\t'; @@ -484,9 +490,9 @@ void MachineBasicBlock::sortUniqueLiveIns() { } unsigned -MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) { +MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) { assert(getParent() && "MBB must be inserted in function"); - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg"); + assert(PhysReg.isPhysical() && "Expected physreg"); assert(RC && "Register class is required"); assert((isEHPad() || this == &getParent()->front()) && "Only the entry block and landing pads can have physreg live ins"); @@ -500,14 +506,14 @@ MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) { if (LiveIn) for (;I != E && I->isCopy(); ++I) if (I->getOperand(1).getReg() == PhysReg) { - unsigned VirtReg = I->getOperand(0).getReg(); + Register VirtReg = I->getOperand(0).getReg(); if (!MRI.constrainRegClass(VirtReg, RC)) llvm_unreachable("Incompatible live-in register class."); return VirtReg; } // No luck, create a virtual register. - unsigned VirtReg = MRI.createVirtualRegister(RC); + Register VirtReg = MRI.createVirtualRegister(RC); BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg) .addReg(PhysReg, RegState::Kill); if (!LiveIn) @@ -772,7 +778,8 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) { while (!FromMBB->succ_empty()) { MachineBasicBlock *Succ = *FromMBB->succ_begin(); - // If probability list is empty it means we don't use it (disabled optimization). + // If probability list is empty it means we don't use it (disabled + // optimization). if (!FromMBB->Probs.empty()) { auto Prob = *FromMBB->Probs.begin(); addSuccessor(Succ, Prob); @@ -798,13 +805,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) { FromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. - for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), - ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) - for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { - MachineOperand &MO = MI->getOperand(i); - if (MO.getMBB() == FromMBB) - MO.setMBB(this); - } + Succ->replacePhiUsesWith(FromMBB, this); } normalizeSuccProbs(); } @@ -907,8 +908,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, if (!OI->isReg() || OI->getReg() == 0 || !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; - unsigned Reg = OI->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || + Register Reg = OI->getReg(); + if (Register::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(*MI)) { KilledRegs.push_back(Reg); LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI); @@ -928,7 +929,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, if (!OI->isReg() || OI->getReg() == 0) continue; - unsigned Reg = OI->getReg(); + Register Reg = OI->getReg(); if (!is_contained(UsedRegs, Reg)) UsedRegs.push_back(Reg); } @@ -979,13 +980,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, } } - // Fix PHI nodes in Succ so they refer to NMBB instead of this - for (MachineBasicBlock::instr_iterator - i = Succ->instr_begin(),e = Succ->instr_end(); - i != e && i->isPHI(); ++i) - for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) - if (i->getOperand(ni+1).getMBB() == this) - i->getOperand(ni+1).setMBB(NMBB); + // Fix PHI nodes in Succ so they refer to NMBB instead of this. + Succ->replacePhiUsesWith(this, NMBB); // Inherit live-ins from the successor for (const auto &LI : Succ->liveins()) @@ -1000,7 +996,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false)) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(&*I); LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I); break; @@ -1033,7 +1029,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { if (I->getOperand(ni+1).getMBB() == NMBB) { MachineOperand &MO = I->getOperand(ni); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); PHISrcRegs.insert(Reg); if (MO.isUndef()) continue; @@ -1049,7 +1045,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, MachineRegisterInfo *MRI = &getParent()->getRegInfo(); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) continue; @@ -1217,6 +1213,16 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, replaceSuccessor(Old, New); } +void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old, + MachineBasicBlock *New) { + for (MachineInstr &MI : phis()) + for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { + MachineOperand &MO = MI.getOperand(i); + if (MO.getMBB() == Old) + MO.setMBB(New); + } +} + /// Various pieces of code can cause excess edges in the CFG to be inserted. If /// we have proven that MBB can only branch to DestA and DestB, remove any other /// MBB successors from the CFG. DestA and DestB can be null. diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 639b588766a1..ac19bc0bd8ea 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -79,16 +79,17 @@ STATISTIC(CondBranchTakenFreq, STATISTIC(UncondBranchTakenFreq, "Potential frequency of taking unconditional branches"); -static cl::opt<unsigned> AlignAllBlock("align-all-blocks", - cl::desc("Force the alignment of all " - "blocks in the function."), - cl::init(0), cl::Hidden); +static cl::opt<unsigned> AlignAllBlock( + "align-all-blocks", + cl::desc("Force the alignment of all blocks in the function in log2 format " + "(e.g 4 means align on 16B boundaries)."), + cl::init(0), cl::Hidden); static cl::opt<unsigned> AlignAllNonFallThruBlocks( "align-all-nofallthru-blocks", - cl::desc("Force the alignment of all " - "blocks that have no fall-through predecessors (i.e. don't add " - "nops that are executed)."), + cl::desc("Force the alignment of all blocks that have no fall-through " + "predecessors (i.e. don't add nops that are executed). In log2 " + "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden); // FIXME: Find a good default for this flag and remove the flag. @@ -2763,8 +2764,8 @@ void MachineBlockPlacement::alignBlocks() { if (!L) continue; - unsigned Align = TLI->getPrefLoopAlignment(L); - if (!Align) + const Align Align = TLI->getPrefLoopAlignment(L); + if (Align == 1) continue; // Don't care about loop alignment. // If the block is cold relative to the function entry don't waste space @@ -2981,7 +2982,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { F = &MF; MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>( + MBFI = std::make_unique<BranchFolder::MBFIWrapper>( getAnalysis<MachineBlockFrequencyInfo>()); MLI = &getAnalysis<MachineLoopInfo>(); TII = MF.getSubtarget().getInstrInfo(); @@ -3038,8 +3039,9 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, TailMergeSize); + auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable<MachineModuleInfo>(), MLI, + MMIWP ? &MMIWP->getMMI() : nullptr, MLI, /*AfterPlacement=*/true)) { // Redo the layout if tail merging creates/removes/moves blocks. BlockToChain.clear(); @@ -3062,14 +3064,14 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (AlignAllBlock) // Align all of the blocks in the function to a specific alignment. for (MachineBasicBlock &MBB : MF) - MBB.setAlignment(AlignAllBlock); + MBB.setAlignment(Align(1ULL << AlignAllBlock)); else if (AlignAllNonFallThruBlocks) { // Align all of the blocks that have no fall-through predecessors to a // specific alignment. for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) { auto LayoutPred = std::prev(MBI); if (!LayoutPred->isSuccessor(&*MBI)) - MBI->setAlignment(AlignAllNonFallThruBlocks); + MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks)); } } if (ViewBlockLayoutWithBFI != GVDT_None && diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 2df6d40d9293..d9bd32b2fbab 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -66,6 +67,7 @@ namespace { AliasAnalysis *AA; MachineDominatorTree *DT; MachineRegisterInfo *MRI; + MachineBlockFrequencyInfo *MBFI; public: static char ID; // Pass identification @@ -83,6 +85,8 @@ namespace { AU.addPreservedID(MachineLoopInfoID); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); } void releaseMemory() override { @@ -133,6 +137,11 @@ namespace { bool isPRECandidate(MachineInstr *MI); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); + /// Heuristics to see if it's profitable to move common computations of MBB + /// and MBB1 to CandidateBB. + bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1); }; } // end anonymous namespace @@ -158,15 +167,15 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, for (MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI->isCopy()) continue; - unsigned SrcReg = DefMI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = DefMI->getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) continue; if (DefMI->getOperand(0).getSubReg()) continue; @@ -189,14 +198,16 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); LLVM_DEBUG(dbgs() << "*** to: " << *MI); - // Update matching debug values. - DefMI->changeDebugValuesDefReg(SrcReg); - // Propagate SrcReg of copies to MI. MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); // Coalesce single use copies. if (OnlyOneUse) { + // If (and only if) we've eliminated all uses of the copy, also + // copy-propagate to any debug-users of MI, or they'll be left using + // an undefined value. + DefMI->changeDebugValuesDefReg(SrcReg); + DefMI->eraseFromParent(); ++NumCoalesces; } @@ -271,10 +282,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) continue; // Reading either caller preserved or constant physregs is ok. if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI)) @@ -290,10 +301,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineOperand &MO = MOP.value(); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) continue; // Check against PhysRefs even if the def is "dead". if (PhysRefs.count(Reg)) @@ -367,8 +378,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; if (!MO.isReg() || !MO.isDef()) continue; - unsigned MOReg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(MOReg)) + Register MOReg = MO.getReg(); + if (Register::isVirtualRegister(MOReg)) continue; if (PhysRefs.count(MOReg)) return false; @@ -424,8 +435,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // If CSReg is used at all uses of Reg, CSE should not increase register // pressure of CSReg. bool MayIncreasePressure = true; - if (TargetRegisterInfo::isVirtualRegister(CSReg) && - TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(CSReg) && Register::isVirtualRegister(Reg)) { MayIncreasePressure = false; SmallPtrSet<MachineInstr*, 8> CSUses; for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { @@ -453,8 +463,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // of the redundant computation are copies, do not cse. bool HasVRegUse = false; for (const MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.isUse() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) { HasVRegUse = true; break; } @@ -586,8 +595,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned OldReg = MO.getReg(); - unsigned NewReg = CSMI->getOperand(i).getReg(); + Register OldReg = MO.getReg(); + Register NewReg = CSMI->getOperand(i).getReg(); // Go through implicit defs of CSMI and MI, if a def is not dead at MI, // we should make sure it is not dead at CSMI. @@ -604,8 +613,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { continue; } - assert(TargetRegisterInfo::isVirtualRegister(OldReg) && - TargetRegisterInfo::isVirtualRegister(NewReg) && + assert(Register::isVirtualRegister(OldReg) && + Register::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) { @@ -769,11 +778,11 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) { return false; for (auto def : MI->defs()) - if (!TRI->isVirtualRegister(def.getReg())) + if (!Register::isVirtualRegister(def.getReg())) return false; for (auto use : MI->uses()) - if (use.isReg() && !TRI->isVirtualRegister(use.getReg())) + if (use.isReg() && !Register::isVirtualRegister(use.getReg())) return false; return true; @@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, if (!CMBB->isLegalToHoistInto()) continue; + if (!isProfitableToHoistInto(CMBB, MBB, MBB1)) + continue; + // Two instrs are partial redundant if their basic blocks are reachable // from one to another but one doesn't dominate another. if (CMBB != MBB1) { @@ -812,8 +824,8 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, assert(MI->getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); - unsigned VReg = MI->getOperand(0).getReg(); - unsigned NewReg = MRI->cloneVirtualRegister(VReg); + Register VReg = MI->getOperand(0).getReg(); + Register NewReg = MRI->cloneVirtualRegister(VReg); if (!isProfitableToCSE(NewReg, VReg, CMBB, MI)) continue; MachineInstr &NewMI = @@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { return Changed; } +bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1) { + if (CandidateBB->getParent()->getFunction().hasMinSize()) + return true; + assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); + assert(DT->dominates(CandidateBB, MBB1) && + "CandidateBB should dominate MBB1"); + return MBFI->getBlockFreq(CandidateBB) <= + MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); +} + bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); DT = &getAnalysis<MachineDominatorTree>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); bool ChangedPRE, ChangedCSE; ChangedPRE = PerformSimplePRE(DT); diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index 0584ec0bd2b3..e9f462fd1b37 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -137,7 +137,7 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { MachineInstr *DefInstr = nullptr; // We need a virtual register definition. - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) DefInstr = MRI->getUniqueVRegDef(MO.getReg()); // PHI's have no depth etc. if (DefInstr && DefInstr->isPHI()) @@ -168,7 +168,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IDepth = 0; for (const MachineOperand &MO : InstrPtr->operands()) { // Check for virtual register operand. - if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) continue; if (!MO.isUse()) continue; @@ -223,7 +223,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, for (const MachineOperand &MO : NewRoot->operands()) { // Check for virtual register operand. - if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) continue; if (!MO.isDef()) continue; diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 9fc12ac89e12..ebe76e31dca9 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -119,8 +119,8 @@ public: void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) { assert(MI->isCopy() && "Tracking non-copy?"); - unsigned Def = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); + Register Def = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); // Remember Def is defined by the copy. for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI) @@ -163,8 +163,8 @@ public: // Check that the available copy isn't clobbered by any regmasks between // itself and the destination. - unsigned AvailSrc = AvailCopy->getOperand(1).getReg(); - unsigned AvailDef = AvailCopy->getOperand(0).getReg(); + Register AvailSrc = AvailCopy->getOperand(1).getReg(); + Register AvailDef = AvailCopy->getOperand(0).getReg(); for (const MachineInstr &MI : make_range(AvailCopy->getIterator(), DestCopy.getIterator())) for (const MachineOperand &MO : MI.operands()) @@ -205,8 +205,11 @@ public: } private: + typedef enum { DebugUse = false, RegularUse = true } DebugType; + void ClobberRegister(unsigned Reg); - void ReadRegister(unsigned Reg); + void ReadRegister(unsigned Reg, MachineInstr &Reader, + DebugType DT); void CopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); void forwardUses(MachineInstr &MI); @@ -217,6 +220,9 @@ private: /// Candidates for deletion. SmallSetVector<MachineInstr *, 8> MaybeDeadCopies; + /// Multimap tracking debug users in current BB + DenseMap<MachineInstr*, SmallVector<MachineInstr*, 2>> CopyDbgUsers; + CopyTracker Tracker; bool Changed; @@ -231,13 +237,19 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, "Machine Copy Propagation Pass", false, false) -void MachineCopyPropagation::ReadRegister(unsigned Reg) { +void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader, + DebugType DT) { // If 'Reg' is defined by a copy, the copy is no longer a candidate - // for elimination. + // for elimination. If a copy is "read" by a debug user, record the user + // for propagation. for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) { - LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump()); - MaybeDeadCopies.remove(Copy); + if (DT == RegularUse) { + LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump()); + MaybeDeadCopies.remove(Copy); + } else { + CopyDbgUsers[Copy].push_back(&Reader); + } } } } @@ -250,8 +262,8 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg) { /// isNopCopy("ecx = COPY eax", AH, CL) == false static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src, unsigned Def, const TargetRegisterInfo *TRI) { - unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg(); - unsigned PreviousDef = PreviousCopy.getOperand(0).getReg(); + Register PreviousSrc = PreviousCopy.getOperand(1).getReg(); + Register PreviousDef = PreviousCopy.getOperand(0).getReg(); if (Src == PreviousSrc) { assert(Def == PreviousDef); return true; @@ -288,7 +300,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, // Copy was redundantly redefining either Src or Def. Remove earlier kill // flags between Copy and PrevCopy because the value will be reused now. assert(Copy.isCopy()); - unsigned CopyDef = Copy.getOperand(0).getReg(); + Register CopyDef = Copy.getOperand(0).getReg(); assert(CopyDef == Src || CopyDef == Def); for (MachineInstr &MI : make_range(PrevCopy->getIterator(), Copy.getIterator())) @@ -307,7 +319,7 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - unsigned CopySrcReg = Copy.getOperand(1).getReg(); + Register CopySrcReg = Copy.getOperand(1).getReg(); // If the new register meets the opcode register constraints, then allow // forwarding. @@ -398,9 +410,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (!Copy) continue; - unsigned CopyDstReg = Copy->getOperand(0).getReg(); + Register CopyDstReg = Copy->getOperand(0).getReg(); const MachineOperand &CopySrc = Copy->getOperand(1); - unsigned CopySrcReg = CopySrc.getReg(); + Register CopySrcReg = CopySrc.getReg(); // FIXME: Don't handle partial uses of wider COPYs yet. if (MOUse.getReg() != CopyDstReg) { @@ -456,11 +468,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Analyze copies (which don't overlap themselves). if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(), MI->getOperand(1).getReg())) { - unsigned Def = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); + Register Def = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); - assert(!TargetRegisterInfo::isVirtualRegister(Def) && - !TargetRegisterInfo::isVirtualRegister(Src) && + assert(!Register::isVirtualRegister(Def) && + !Register::isVirtualRegister(Src) && "MachineCopyPropagation should be run after register allocation!"); // The two copies cancel out and the source of the first copy @@ -488,14 +500,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If Src is defined by a previous copy, the previous copy cannot be // eliminated. - ReadRegister(Src); + ReadRegister(Src, *MI, RegularUse); for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.readsReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - ReadRegister(Reg); + ReadRegister(Reg, *MI, RegularUse); } LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); @@ -515,7 +527,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; Tracker.clobberRegister(Reg, *TRI); @@ -529,12 +541,12 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Clobber any earlyclobber regs first. for (const MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isEarlyClobber()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If we have a tied earlyclobber, that means it is also read by this // instruction, so we need to make sure we don't remove it as dead // later. if (MO.isTied()) - ReadRegister(Reg); + ReadRegister(Reg, *MI, RegularUse); Tracker.clobberRegister(Reg, *TRI); } @@ -548,18 +560,18 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { RegMask = &MO; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - assert(!TargetRegisterInfo::isVirtualRegister(Reg) && + assert(!Register::isVirtualRegister(Reg) && "MachineCopyPropagation should be run after register allocation!"); if (MO.isDef() && !MO.isEarlyClobber()) { Defs.push_back(Reg); continue; - } else if (!MO.isDebug() && MO.readsReg()) - ReadRegister(Reg); + } else if (MO.readsReg()) + ReadRegister(Reg, *MI, MO.isDebug() ? DebugUse : RegularUse); } // The instruction has a register mask operand which means that it clobbers @@ -571,7 +583,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDeadCopies.begin(); DI != MaybeDeadCopies.end();) { MachineInstr *MaybeDead = *DI; - unsigned Reg = MaybeDead->getOperand(0).getReg(); + Register Reg = MaybeDead->getOperand(0).getReg(); assert(!MRI->isReserved(Reg)); if (!RegMask->clobbersPhysReg(Reg)) { @@ -609,9 +621,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDead->dump()); assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); - // Update matching debug values. + // Update matching debug values, if any. assert(MaybeDead->isCopy()); - MaybeDead->changeDebugValuesDefReg(MaybeDead->getOperand(1).getReg()); + unsigned SrcReg = MaybeDead->getOperand(1).getReg(); + MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]); MaybeDead->eraseFromParent(); Changed = true; @@ -620,6 +633,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } MaybeDeadCopies.clear(); + CopyDbgUsers.clear(); Tracker.clear(); } diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 1dfba8638c22..706c706d7527 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -18,12 +18,15 @@ using namespace llvm; +namespace llvm { // Always verify dominfo if expensive checking is enabled. #ifdef EXPENSIVE_CHECKS -static bool VerifyMachineDomInfo = true; +bool VerifyMachineDomInfo = true; #else -static bool VerifyMachineDomInfo = false; +bool VerifyMachineDomInfo = false; #endif +} // namespace llvm + static cl::opt<bool, true> VerifyMachineDomInfoX( "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden, cl::desc("Verify machine dominator info (time consuming)")); @@ -64,21 +67,11 @@ void MachineDominatorTree::releaseMemory() { } void MachineDominatorTree::verifyAnalysis() const { - if (DT && VerifyMachineDomInfo) { - MachineFunction &F = *getRoot()->getParent(); - - DomTreeBase<MachineBasicBlock> OtherDT; - OtherDT.recalculate(F); - if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || - DT->compare(OtherDT)) { - errs() << "MachineDominatorTree for function " << F.getName() - << " is not up to date!\nComputed:\n"; - DT->print(errs()); - errs() << "\nActual:\n"; - OtherDT.print(errs()); + if (DT && VerifyMachineDomInfo) + if (!DT->verify(DomTreeT::VerificationLevel::Basic)) { + errs() << "MachineDominatorTree verification failed\n"; abort(); } - } } void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index bae3a4333bda..604f5145b1a0 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -28,25 +28,26 @@ using namespace llvm; -void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { +void MachineFrameInfo::ensureMaxAlignment(Align Alignment) { if (!StackRealignable) - assert(Align <= StackAlignment && - "For targets without stack realignment, Align is out of limit!"); - if (MaxAlignment < Align) MaxAlignment = Align; + assert(Alignment <= StackAlignment && + "For targets without stack realignment, Alignment is out of limit!"); + if (MaxAlignment < Alignment) + MaxAlignment = Alignment; } /// Clamp the alignment if requested and emit a warning. -static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, - unsigned StackAlign) { - if (!ShouldClamp || Align <= StackAlign) - return Align; - LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Align - << " exceeds the stack alignment " << StackAlign +static inline Align clampStackAlignment(bool ShouldClamp, Align Alignment, + Align StackAlignment) { + if (!ShouldClamp || Alignment <= StackAlignment) + return Alignment; + LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Alignment.value() + << " exceeds the stack alignment " << StackAlignment.value() << " when stack realignment is off" << '\n'); - return StackAlign; + return StackAlignment; } -int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, +int MachineFrameInfo::CreateStackObject(uint64_t Size, Align Alignment, bool IsSpillSlot, const AllocaInst *Alloca, uint8_t StackID) { @@ -61,8 +62,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, return Index; } -int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, - unsigned Alignment) { +int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, Align Alignment) { Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -70,7 +70,7 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, return Index; } -int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, +int MachineFrameInfo::CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); @@ -88,7 +88,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. Note that unlike the non-fixed case, if the // stack needs realignment, we can't assume that the stack will in fact be // aligned. - unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align Alignment = + commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, @@ -100,7 +101,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable) { - unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align Alignment = + commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, @@ -232,7 +234,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ OS << "variable sized"; else OS << "size=" << SO.Size; - OS << ", align=" << SO.Alignment; + OS << ", align=" << SO.Alignment.value(); if (i < NumFixedObjects) OS << ", fixed"; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 4df5ce2dcedc..7d2ee230ca9f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -78,10 +78,11 @@ using namespace llvm; #define DEBUG_TYPE "codegen" -static cl::opt<unsigned> -AlignAllFunctions("align-all-functions", - cl::desc("Force the alignment of all functions."), - cl::init(0), cl::Hidden); +static cl::opt<unsigned> AlignAllFunctions( + "align-all-functions", + cl::desc("Force the alignment of all functions in log2 format (e.g. 4 " + "means align on 16B boundaries)."), + cl::init(0), cl::Hidden); static const char *getPropertyName(MachineFunctionProperties::Property Prop) { using P = MachineFunctionProperties::Property; @@ -181,7 +182,7 @@ void MachineFunction::init() { STI->getTargetLowering()->getPrefFunctionAlignment()); if (AlignAllFunctions) - Alignment = AlignAllFunctions; + Alignment = Align(1ULL << AlignAllFunctions); JumpTableInfo = nullptr; @@ -200,7 +201,7 @@ void MachineFunction::init() { "Target-incompatible DataLayout attached\n"); PSVManager = - llvm::make_unique<PseudoSourceValueManager>(*(getSubtarget(). + std::make_unique<PseudoSourceValueManager>(*(getSubtarget(). getInstrInfo())); } @@ -823,30 +824,47 @@ try_next:; return FilterID; } -void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD) { +void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, + const MDNode *MD) { MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true); MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true); I->setPreInstrSymbol(*this, BeginLabel); I->setPostInstrSymbol(*this, EndLabel); - DIType *DI = dyn_cast<DIType>(MD); + const DIType *DI = dyn_cast<DIType>(MD); CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI)); } -void MachineFunction::updateCallSiteInfo(const MachineInstr *Old, - const MachineInstr *New) { - if (!Target.Options.EnableDebugEntryValues || Old == New) - return; +void MachineFunction::moveCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New) { + assert(New->isCall() && "Call site info refers only to call instructions!"); - assert(Old->isCall() && (!New || New->isCall()) && - "Call site info referes only to call instructions!"); - CallSiteInfoMap::iterator CSIt = CallSitesInfo.find(Old); + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old); if (CSIt == CallSitesInfo.end()) return; + CallSiteInfo CSInfo = std::move(CSIt->second); CallSitesInfo.erase(CSIt); - if (New) - CallSitesInfo[New] = CSInfo; + CallSitesInfo[New] = CSInfo; +} + +void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) { + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(MI); + if (CSIt == CallSitesInfo.end()) + return; + CallSitesInfo.erase(CSIt); +} + +void MachineFunction::copyCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New) { + assert(New->isCall() && "Call site info refers only to call instructions!"); + + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old); + if (CSIt == CallSitesInfo.end()) + return; + + CallSiteInfo CSInfo = CSIt->second; + CallSitesInfo[New] = CSInfo; } /// \} @@ -881,13 +899,13 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { // alignment. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: - return TD.getPointerABIAlignment(0); + return TD.getPointerABIAlignment(0).value(); case MachineJumpTableInfo::EK_GPRel64BlockAddress: - return TD.getABIIntegerTypeAlignment(64); + return TD.getABIIntegerTypeAlignment(64).value(); case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: case MachineJumpTableInfo::EK_Custom32: - return TD.getABIIntegerTypeAlignment(32); + return TD.getABIIntegerTypeAlignment(32).value(); case MachineJumpTableInfo::EK_Inline: return 1; } diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 0da4cf3fc90c..03149aa7db4a 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -41,7 +41,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) { if (F.hasAvailableExternallyLinkage()) return false; - MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); + MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); MachineFunction &MF = MMI.getOrCreateMachineFunction(F); MachineFunctionProperties &MFProps = MF.getProperties(); @@ -101,8 +101,8 @@ bool MachineFunctionPass::runOnFunction(Function &F) { } void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineModuleInfo>(); - AU.addPreserved<MachineModuleInfo>(); + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addPreserved<MachineModuleInfoWrapperPass>(); // MachineFunctionPass preserves all LLVM IR passes, but there's no // high-level way to express this. Instead, just list a bunch of diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e5c398a2d10c..fec20b2b1a05 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -636,8 +636,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, if (Check == IgnoreDefs) continue; else if (Check == IgnoreVRegDefs) { - if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()) || - !TargetRegisterInfo::isVirtualRegister(OMO.getReg())) + if (!Register::isVirtualRegister(MO.getReg()) || + !Register::isVirtualRegister(OMO.getReg())) if (!MO.isIdenticalTo(OMO)) return false; } else { @@ -692,8 +692,8 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Reg.isVirtual()) continue; MRI.markUsesInDebugValueAsUndef(Reg); } @@ -832,6 +832,10 @@ const DIExpression *MachineInstr::getDebugExpression() const { return cast<DIExpression>(getOperand(3).getMetadata()); } +bool MachineInstr::isDebugEntryValue() const { + return isDebugValue() && getDebugExpression()->isEntryValue(); +} + const TargetRegisterClass* MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, @@ -873,7 +877,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, } const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( - unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, + Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ExploreBundle) const { // Check every operands inside the bundle if we have // been asked to. @@ -890,7 +894,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( } const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl( - unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { assert(CurRC && "Invalid initial register class"); // Check if Reg is constrained by some of its use/def from MI. @@ -933,7 +937,7 @@ unsigned MachineInstr::getBundleSize() const { /// Returns true if the MachineInstr has an implicit-use operand of exactly /// the given register (not considering sub/super-registers). -bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const { +bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg) @@ -946,12 +950,12 @@ bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const { /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. int MachineInstr::findRegisterUseOperandIdx( - unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const { + Register Reg, bool isKill, const TargetRegisterInfo *TRI) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg))) @@ -965,7 +969,7 @@ int MachineInstr::findRegisterUseOperandIdx( /// indicating if this instruction reads or writes Reg. This also considers /// partial defines. std::pair<bool,bool> -MachineInstr::readsWritesVirtualRegister(unsigned Reg, +MachineInstr::readsWritesVirtualRegister(Register Reg, SmallVectorImpl<unsigned> *Ops) const { bool PartDef = false; // Partial redefine. bool FullDef = false; // Full define. @@ -994,9 +998,9 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg, /// that are not dead are skipped. If TargetRegisterInfo is non-null, then it /// also checks if there is a def of a super-register. int -MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, +MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap, const TargetRegisterInfo *TRI) const { - bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg); + bool isPhys = Register::isPhysicalRegister(Reg); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); // Accept regmask operands when Overlap is set. @@ -1005,10 +1009,9 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, return i; if (!MO.isReg() || !MO.isDef()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); bool Found = (MOReg == Reg); - if (!Found && TRI && isPhys && - TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (!Found && TRI && isPhys && Register::isPhysicalRegister(MOReg)) { if (Overlap) Found = TRI->regsOverlap(MOReg, Reg); else @@ -1142,10 +1145,10 @@ void MachineInstr::clearKillInfo() { } } -void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg, +void MachineInstr::substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (Register::isPhysicalRegister(ToReg)) { if (SubIdx) ToReg = RegInfo.getSubReg(ToReg, SubIdx); for (MachineOperand &MO : operands()) { @@ -1165,7 +1168,7 @@ void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg, /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. -bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { +bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. // // Treat volatile loads as stores. This is not strictly necessary for @@ -1194,7 +1197,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { return true; } -bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other, +bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const { const MachineFunction *MF = getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -1206,7 +1209,7 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other, return false; // Let the target decide if memory accesses cannot possibly overlap. - if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA)) + if (TII->areMemAccessesTriviallyDisjoint(*this, Other)) return false; // FIXME: Need to handle multiple memory operands to support all targets. @@ -1312,7 +1315,7 @@ bool MachineInstr::hasOrderedMemoryRef() const { /// isDereferenceableInvariantLoad - Return true if this instruction will never /// trap and is loading from a location whose value is invariant across a run of /// this function. -bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const { +bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. if (!mayLoad()) return false; @@ -1364,7 +1367,7 @@ unsigned MachineInstr::isConstantValuePHI() const { assert(getNumOperands() >= 3 && "It's illegal to have a PHI without source operands"); - unsigned Reg = getOperand(1).getReg(); + Register Reg = getOperand(1).getReg(); for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) if (getOperand(i).getReg() != Reg) return 0; @@ -1726,7 +1729,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, MFI = &MF->getFrameInfo(); Context = &MF->getFunction().getContext(); } else { - CtxPtr = llvm::make_unique<LLVMContext>(); + CtxPtr = std::make_unique<LLVMContext>(); Context = CtxPtr.get(); } @@ -1780,10 +1783,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << '\n'; } -bool MachineInstr::addRegisterKilled(unsigned IncomingReg, +bool MachineInstr::addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool isPhysReg = Register::isPhysicalRegister(IncomingReg); bool hasAliases = isPhysReg && MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); bool Found = false; @@ -1799,7 +1802,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, if (MO.isDebug()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -1814,8 +1817,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, MO.setIsKill(); Found = true; } - } else if (hasAliases && MO.isKill() && - TargetRegisterInfo::isPhysicalRegister(Reg)) { + } else if (hasAliases && MO.isKill() && Register::isPhysicalRegister(Reg)) { // A super-register kill already exists. if (RegInfo->isSuperRegister(IncomingReg, Reg)) return true; @@ -1847,23 +1849,23 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, return Found; } -void MachineInstr::clearRegisterKills(unsigned Reg, +void MachineInstr::clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo) { - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + if (!Register::isPhysicalRegister(Reg)) RegInfo = nullptr; for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; - unsigned OpReg = MO.getReg(); + Register OpReg = MO.getReg(); if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg) MO.setIsKill(false); } } -bool MachineInstr::addRegisterDead(unsigned Reg, +bool MachineInstr::addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg); + bool isPhysReg = Register::isPhysicalRegister(Reg); bool hasAliases = isPhysReg && MCRegAliasIterator(Reg, RegInfo, false).isValid(); bool Found = false; @@ -1872,7 +1874,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg, MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; @@ -1880,7 +1882,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg, MO.setIsDead(); Found = true; } else if (hasAliases && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(MOReg)) { + Register::isPhysicalRegister(MOReg)) { // There exists a super-register that's marked dead. if (RegInfo->isSuperRegister(Reg, MOReg)) return true; @@ -1913,7 +1915,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg, return true; } -void MachineInstr::clearRegisterDeads(unsigned Reg) { +void MachineInstr::clearRegisterDeads(Register Reg) { for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) continue; @@ -1921,7 +1923,7 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) { } } -void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { +void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) { for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) continue; @@ -1929,9 +1931,9 @@ void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { } } -void MachineInstr::addRegisterDefined(unsigned Reg, +void MachineInstr::addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo); if (MO) return; @@ -1947,7 +1949,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg, true /*IsImp*/)); } -void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, +void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs, const TargetRegisterInfo &TRI) { bool HasRegMask = false; for (MachineOperand &MO : operands()) { @@ -1956,18 +1958,19 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, continue; } if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg.isPhysical()) + continue; // If there are no uses, including partial uses, the def is dead. if (llvm::none_of(UsedRegs, - [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + [&](MCRegister Use) { return TRI.regsOverlap(Use, Reg); })) MO.setIsDead(); } // This is a call with a register mask operand. // Mask clobbers are always dead, so add defs for the non-dead defines. if (HasRegMask) - for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + for (ArrayRef<Register>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); I != E; ++I) addRegisterDefined(*I, &TRI); } @@ -1979,8 +1982,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); for (const MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.isDef() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. HashComponents.push_back(hash_value(MO)); @@ -2012,7 +2014,7 @@ void MachineInstr::emitError(StringRef Msg) const { MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, const MDNode *Variable, + Register Reg, const MDNode *Variable, const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); @@ -2048,7 +2050,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - bool IsIndirect, unsigned Reg, + bool IsIndirect, Register Reg, const MDNode *Variable, const MDNode *Expr) { MachineFunction &MF = *BB.getParent(); MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr); @@ -2118,10 +2120,24 @@ void MachineInstr::collectDebugValues( } } -void MachineInstr::changeDebugValuesDefReg(unsigned Reg) { +void MachineInstr::changeDebugValuesDefReg(Register Reg) { // Collect matching debug values. SmallVector<MachineInstr *, 2> DbgValues; - collectDebugValues(DbgValues); + + if (!getOperand(0).isReg()) + return; + + unsigned DefReg = getOperand(0).getReg(); + auto *MRI = getRegInfo(); + for (auto &MO : MRI->use_operands(DefReg)) { + auto *DI = MO.getParent(); + if (!DI->isDebugValue()) + continue; + if (DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() == DefReg){ + DbgValues.push_back(DI); + } + } // Propagate Reg to debug value instructions. for (auto *DBI : DbgValues) diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 32e266e9401e..feb849ced353 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -154,10 +154,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, continue; } - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (LocalDefSet.count(Reg)) { MO.setIsInternalRead(); if (MO.isKill()) @@ -177,7 +177,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, for (unsigned i = 0, e = Defs.size(); i != e; ++i) { MachineOperand &MO = *Defs[i]; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -194,7 +194,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, DeadDefSet.erase(Reg); } - if (!MO.isDead()) { + if (!MO.isDead() && Register::isPhysicalRegister(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; if (LocalDefSet.insert(SubReg).second) @@ -316,7 +316,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, bool AllDefsDead = true; PhysRegInfo PRI = {false, false, false, false, false, false, false, false}; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + assert(Register::isPhysicalRegister(Reg) && "analyzePhysReg not given a physical register!"); for (; isValid(); ++*this) { MachineOperand &MO = deref(); @@ -329,8 +329,8 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); - if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) + Register MOReg = MO.getReg(); + if (!MOReg || !Register::isPhysicalRegister(MOReg)) continue; if (!TRI->regsOverlap(MOReg, Reg)) diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 1107e609c258..6a898ff6ef88 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -153,7 +153,6 @@ namespace { AU.addRequired<MachineDominatorTree>(); AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -424,10 +423,10 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + assert(Register::isPhysicalRegister(Reg) && "Not expecting virtual register!"); if (!MO.isDef()) { @@ -526,7 +525,7 @@ void MachineLICMBase::HoistRegionPostRA() { for (const MachineOperand &MO : TI->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) @@ -554,7 +553,7 @@ void MachineLICMBase::HoistRegionPostRA() { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)) { // If it's using a non-loop-invariant register, then it's obviously @@ -852,8 +851,8 @@ MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isImplicit()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; // FIXME: It seems bad to use RegSeen only for some of these calculations. @@ -922,12 +921,12 @@ static bool isInvariantStore(const MachineInstr &MI, // Check that all register operands are caller-preserved physical registers. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If operand is a virtual register, check if it comes from a copy of a // physical register. - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) Reg = TRI->lookThruCopyLike(MO.getReg(), MRI); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return false; if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF())) return false; @@ -955,17 +954,17 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI, const MachineFunction *MF = MI.getMF(); // Check that we are copying a constant physical register. - unsigned CopySrcReg = MI.getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + Register CopySrcReg = MI.getOperand(1).getReg(); + if (Register::isVirtualRegister(CopySrcReg)) return false; if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF)) return false; - unsigned CopyDstReg = MI.getOperand(0).getReg(); + Register CopyDstReg = MI.getOperand(0).getReg(); // Check if any of the uses of the copy are invariant stores. - assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) && - "copy dst is not a virtual reg"); + assert(Register::isVirtualRegister(CopyDstReg) && + "copy dst is not a virtual reg"); for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) { if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI)) @@ -1010,11 +1009,11 @@ bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // Don't hoist an instruction that uses or defines a physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -1061,8 +1060,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { // A PHI may cause a copy to be inserted. @@ -1104,7 +1103,7 @@ bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, const MachineOperand &MO = UseMI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (MOReg != Reg) continue; @@ -1132,8 +1131,8 @@ bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const { if (!DefMO.isReg() || !DefMO.isDef()) continue; --NumDefs; - unsigned Reg = DefMO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = DefMO.getReg(); + if (Register::isPhysicalRegister(Reg)) continue; if (!TII->hasLowDefLatency(SchedModel, MI, i)) @@ -1225,8 +1224,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI); @@ -1304,7 +1303,7 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { MachineFunction &MF = *MI->getMF(); const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. - unsigned Reg = MRI->createVirtualRegister(RC); + Register Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg, @@ -1378,20 +1377,20 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI, // Physical registers may not differ here. assert((!MO.isReg() || MO.getReg() == 0 || - !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + !Register::isPhysicalRegister(MO.getReg()) || MO.getReg() == Dup->getOperand(i).getReg()) && "Instructions with different phys regs are not identical!"); if (MO.isReg() && MO.isDef() && - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + !Register::isPhysicalRegister(MO.getReg())) Defs.push_back(i); } SmallVector<const TargetRegisterClass*, 2> OrigRCs; for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Idx = Defs[i]; - unsigned Reg = MI->getOperand(Idx).getReg(); - unsigned DupReg = Dup->getOperand(Idx).getReg(); + Register Reg = MI->getOperand(Idx).getReg(); + Register DupReg = Dup->getOperand(Idx).getReg(); OrigRCs.push_back(MRI->getRegClass(DupReg)); if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { @@ -1403,8 +1402,8 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI, } for (unsigned Idx : Defs) { - unsigned Reg = MI->getOperand(Idx).getReg(); - unsigned DupReg = Dup->getOperand(Idx).getReg(); + Register Reg = MI->getOperand(Idx).getReg(); + Register DupReg = Dup->getOperand(Idx).getReg(); MRI->replaceRegWith(Reg, DupReg); MRI->clearKillFlags(DupReg); } diff --git a/lib/CodeGen/MachineLoopUtils.cpp b/lib/CodeGen/MachineLoopUtils.cpp new file mode 100644 index 000000000000..e074b76082f0 --- /dev/null +++ b/lib/CodeGen/MachineLoopUtils.cpp @@ -0,0 +1,132 @@ +//=- MachineLoopUtils.cpp - Functions for manipulating loops ----------------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineLoopUtils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +using namespace llvm; + +namespace { +// MI's parent and BB are clones of each other. Find the equivalent copy of MI +// in BB. +MachineInstr &findEquivalentInstruction(MachineInstr &MI, + MachineBasicBlock *BB) { + MachineBasicBlock *PB = MI.getParent(); + unsigned Offset = std::distance(PB->instr_begin(), MachineBasicBlock::instr_iterator(MI)); + return *std::next(BB->instr_begin(), Offset); +} +} // namespace + +MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, + MachineBasicBlock *Loop, + MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) { + MachineFunction &MF = *Loop->getParent(); + MachineBasicBlock *Preheader = *Loop->pred_begin(); + if (Preheader == Loop) + Preheader = *std::next(Loop->pred_begin()); + MachineBasicBlock *Exit = *Loop->succ_begin(); + if (Exit == Loop) + Exit = *std::next(Loop->succ_begin()); + + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(Loop->getBasicBlock()); + if (Direction == LPD_Front) + MF.insert(Loop->getIterator(), NewBB); + else + MF.insert(std::next(Loop->getIterator()), NewBB); + + // FIXME: Add DenseMapInfo trait for Register so we can use it as a key. + DenseMap<unsigned, Register> Remaps; + auto InsertPt = NewBB->end(); + for (MachineInstr &MI : *Loop) { + MachineInstr *NewMI = MF.CloneMachineInstr(&MI); + NewBB->insert(InsertPt, NewMI); + for (MachineOperand &MO : NewMI->defs()) { + Register OrigR = MO.getReg(); + if (OrigR.isPhysical()) + continue; + Register &R = Remaps[OrigR]; + R = MRI.createVirtualRegister(MRI.getRegClass(OrigR)); + MO.setReg(R); + + if (Direction == LPD_Back) { + // Replace all uses outside the original loop with the new register. + // FIXME: is the use_iterator stable enough to mutate register uses + // while iterating? + SmallVector<MachineOperand *, 4> Uses; + for (auto &Use : MRI.use_operands(OrigR)) + if (Use.getParent()->getParent() != Loop) + Uses.push_back(&Use); + for (auto *Use : Uses) { + MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg())); + Use->setReg(R); + } + } + } + } + + for (auto I = NewBB->getFirstNonPHI(); I != NewBB->end(); ++I) + for (MachineOperand &MO : I->uses()) + if (MO.isReg() && Remaps.count(MO.getReg())) + MO.setReg(Remaps[MO.getReg()]); + + for (auto I = NewBB->begin(); I->isPHI(); ++I) { + MachineInstr &MI = *I; + unsigned LoopRegIdx = 3, InitRegIdx = 1; + if (MI.getOperand(2).getMBB() != Preheader) + std::swap(LoopRegIdx, InitRegIdx); + MachineInstr &OrigPhi = findEquivalentInstruction(MI, Loop); + assert(OrigPhi.isPHI()); + if (Direction == LPD_Front) { + // When peeling front, we are only left with the initial value from the + // preheader. + Register R = MI.getOperand(LoopRegIdx).getReg(); + if (Remaps.count(R)) + R = Remaps[R]; + OrigPhi.getOperand(InitRegIdx).setReg(R); + MI.RemoveOperand(LoopRegIdx + 1); + MI.RemoveOperand(LoopRegIdx + 0); + } else { + // When peeling back, the initial value is the loop-carried value from + // the original loop. + Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg(); + MI.getOperand(LoopRegIdx).setReg(LoopReg); + MI.RemoveOperand(InitRegIdx + 1); + MI.RemoveOperand(InitRegIdx + 0); + } + } + + DebugLoc DL; + if (Direction == LPD_Front) { + Preheader->replaceSuccessor(Loop, NewBB); + NewBB->addSuccessor(Loop); + Loop->replacePhiUsesWith(Preheader, NewBB); + if (TII->removeBranch(*Preheader) > 0) + TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL); + TII->removeBranch(*NewBB); + TII->insertBranch(*NewBB, Loop, nullptr, {}, DL); + } else { + Loop->replaceSuccessor(Exit, NewBB); + Exit->replacePhiUsesWith(Loop, NewBB); + NewBB->addSuccessor(Exit); + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + bool CanAnalyzeBr = !TII->analyzeBranch(*Loop, TBB, FBB, Cond); + (void)CanAnalyzeBr; + assert(CanAnalyzeBr && "Must be able to analyze the loop branch!"); + TII->removeBranch(*Loop); + TII->insertBranch(*Loop, TBB == Exit ? NewBB : TBB, + FBB == Exit ? NewBB : FBB, Cond, DL); + if (TII->removeBranch(*NewBB) > 0) + TII->insertBranch(*NewBB, Exit, nullptr, {}, DL); + } + + return NewBB; +} diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index aadcd7319799..e0b4e9cac229 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -36,11 +36,6 @@ using namespace llvm; using namespace llvm::dwarf; -// Handle the Pass registration stuff necessary to use DataLayout's. -INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", - "Machine Module Information", false, false) -char MachineModuleInfo::ID = 0; - // Out of line virtual method. MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; @@ -121,7 +116,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { BBCallbacks.back().setMap(this); Entry.Index = BBCallbacks.size() - 1; Entry.Fn = BB->getParent(); - Entry.Symbols.push_back(Context.createTempSymbol()); + Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken())); return Entry.Symbols; } @@ -193,27 +188,15 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2)); } -MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) - : ImmutablePass(ID), TM(*TM), - Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), - TM->getObjFileLowering(), nullptr, false) { - initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); -} - -MachineModuleInfo::~MachineModuleInfo() = default; - -bool MachineModuleInfo::doInitialization(Module &M) { +void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; CurCallSite = 0; UsesMSVCFloatingPoint = UsesMorestackAddr = false; HasSplitStack = HasNosplitStack = false; AddrLabelSymbols = nullptr; - TheModule = &M; - DbgInfoAvailable = !llvm::empty(M.debug_compile_units()); - return false; } -bool MachineModuleInfo::doFinalization(Module &M) { +void MachineModuleInfo::finalize() { Personalities.clear(); delete AddrLabelSymbols; @@ -223,10 +206,30 @@ bool MachineModuleInfo::doFinalization(Module &M) { delete ObjFileMMI; ObjFileMMI = nullptr; +} - return false; +MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) + : TM(std::move(MMI.TM)), + Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(), + MMI.TM.getObjFileLowering(), nullptr, nullptr, false) { + ObjFileMMI = MMI.ObjFileMMI; + CurCallSite = MMI.CurCallSite; + UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint; + UsesMorestackAddr = MMI.UsesMorestackAddr; + HasSplitStack = MMI.HasSplitStack; + HasNosplitStack = MMI.HasNosplitStack; + AddrLabelSymbols = MMI.AddrLabelSymbols; + TheModule = MMI.TheModule; } +MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) + : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), + TM->getObjFileLowering(), nullptr, nullptr, false) { + initialize(); +} + +MachineModuleInfo::~MachineModuleInfo() { finalize(); } + //===- Address of Block Management ----------------------------------------===// ArrayRef<MCSymbol *> @@ -305,12 +308,13 @@ public: FreeMachineFunction() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineModuleInfo>(); - AU.addPreserved<MachineModuleInfo>(); + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addPreserved<MachineModuleInfoWrapperPass>(); } bool runOnFunction(Function &F) override { - MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); + MachineModuleInfo &MMI = + getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); MMI.deleteMachineFunctionFor(F); return true; } @@ -327,3 +331,36 @@ char FreeMachineFunction::ID; FunctionPass *llvm::createFreeMachineFunctionPass() { return new FreeMachineFunction(); } + +MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass( + const LLVMTargetMachine *TM) + : ImmutablePass(ID), MMI(TM) { + initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +// Handle the Pass registration stuff necessary to use DataLayout's. +INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo", + "Machine Module Information", false, false) +char MachineModuleInfoWrapperPass::ID = 0; + +bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { + MMI.initialize(); + MMI.TheModule = &M; + MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + return false; +} + +bool MachineModuleInfoWrapperPass::doFinalization(Module &M) { + MMI.finalize(); + return false; +} + +AnalysisKey MachineModuleAnalysis::Key; + +MachineModuleInfo MachineModuleAnalysis::run(Module &M, + ModuleAnalysisManager &) { + MachineModuleInfo MMI(TM); + MMI.TheModule = &M; + MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + return MMI; +} diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp index 4fa4ea7f6cf5..8b19501ec3cf 100644 --- a/lib/CodeGen/MachineOperand.cpp +++ b/lib/CodeGen/MachineOperand.cpp @@ -49,7 +49,7 @@ static MachineFunction *getMFIfAvailable(MachineOperand &MO) { getMFIfAvailable(const_cast<const MachineOperand &>(MO))); } -void MachineOperand::setReg(unsigned Reg) { +void MachineOperand::setReg(Register Reg) { if (getReg() == Reg) return; // No change. @@ -71,9 +71,9 @@ void MachineOperand::setReg(unsigned Reg) { SmallContents.RegNo = Reg; } -void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, +void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Reg.isVirtual()); if (SubIdx && getSubReg()) SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); setReg(Reg); @@ -81,8 +81,8 @@ void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, setSubReg(SubIdx); } -void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); +void MachineOperand::substPhysReg(MCRegister Reg, const TargetRegisterInfo &TRI) { + assert(Reg.isPhysical()); if (getSubReg()) { Reg = TRI.getSubReg(Reg, getSubReg()); // Note that getSubReg() may return 0 if the sub-register doesn't exist. @@ -114,7 +114,7 @@ void MachineOperand::setIsDef(bool Val) { bool MachineOperand::isRenamable() const { assert(isReg() && "Wrong MachineOperand accessor"); - assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + assert(Register::isPhysicalRegister(getReg()) && "isRenamable should only be checked on physical registers"); if (!IsRenamable) return false; @@ -132,7 +132,7 @@ bool MachineOperand::isRenamable() const { void MachineOperand::setIsRenamable(bool Val) { assert(isReg() && "Wrong MachineOperand accessor"); - assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + assert(Register::isPhysicalRegister(getReg()) && "setIsRenamable should only be called on physical registers"); IsRenamable = Val; } @@ -169,7 +169,7 @@ void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { } void MachineOperand::ChangeToES(const char *SymName, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an external symbol"); @@ -182,7 +182,7 @@ void MachineOperand::ChangeToES(const char *SymName, } void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into a global address"); @@ -215,7 +215,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) { } void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into a FrameIndex"); @@ -230,7 +230,7 @@ void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. -void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, +void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { MachineRegisterInfo *RegInfo = nullptr; @@ -333,6 +333,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getIntrinsicID() == Other.getIntrinsicID(); case MachineOperand::MO_Predicate: return getPredicate() == Other.getPredicate(); + case MachineOperand::MO_ShuffleMask: + return getShuffleMask() == Other.getShuffleMask(); } llvm_unreachable("Invalid machine operand type"); } @@ -381,6 +383,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); case MachineOperand::MO_Predicate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); + case MachineOperand::MO_ShuffleMask: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getShuffleMask()); } llvm_unreachable("Invalid machine operand type"); } @@ -425,12 +429,10 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, return; } - int Reg = TRI->getLLVMRegNum(DwarfReg, true); - if (Reg == -1) { + if (Optional<unsigned> Reg = TRI->getLLVMRegNum(DwarfReg, true)) + OS << printReg(*Reg, TRI); + else OS << "<badreg>"; - return; - } - OS << printReg(Reg, TRI); } static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB, @@ -746,7 +748,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, printTargetFlags(OS, *this); switch (getType()) { case MachineOperand::MO_Register: { - unsigned Reg = getReg(); + Register Reg = getReg(); if (isImplicit()) OS << (isDef() ? "implicit-def " : "implicit "); else if (PrintDef && isDef()) @@ -762,13 +764,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "undef "; if (isEarlyClobber()) OS << "early-clobber "; - if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable()) + if (Register::isPhysicalRegister(getReg()) && isRenamable()) OS << "renamable "; // isDebug() is exactly true for register operands of a DBG_VALUE. So we // simply infer it when parsing and do not need to print it. const MachineRegisterInfo *MRI = nullptr; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (const MachineFunction *MF = getMFIfAvailable(*this)) { MRI = &MF->getRegInfo(); } @@ -783,7 +785,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << ".subreg" << SubReg; } // Print the register class / bank. - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (const MachineFunction *MF = getMFIfAvailable(*this)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) { @@ -936,6 +938,20 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, << CmpInst::getPredicateName(Pred) << ')'; break; } + case MachineOperand::MO_ShuffleMask: + OS << "shufflemask("; + const Constant* C = getShuffleMask(); + const int NumElts = C->getType()->getVectorNumElements(); + + StringRef Separator; + for (int I = 0; I != NumElts; ++I) { + OS << Separator; + C->getAggregateElement(I)->printAsOperand(OS, false, MST); + Separator = ", "; + } + + OS << ')'; + break; } } @@ -963,7 +979,8 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, return false; return isDereferenceableAndAlignedPointer( - BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL); + BasePtr, Align::None(), APInt(DL.getPointerSizeInBits(), Offset + Size), + DL); } /// getConstantPool - Return a MachinePointerInfo record that refers to the @@ -1049,17 +1066,6 @@ uint64_t MachineMemOperand::getAlignment() const { return MinAlign(getBaseAlignment(), getOffset()); } -void MachineMemOperand::print(raw_ostream &OS) const { - ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST); -} - -void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { - SmallVector<StringRef, 0> SSNs; - LLVMContext Ctx; - print(OS, MST, SSNs, Ctx, nullptr, nullptr); -} - void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, SmallVectorImpl<StringRef> &SSNs, const LLVMContext &Context, diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 27db9106b337..b82403ae1b85 100644 --- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -76,7 +76,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction( else MBFI = nullptr; - ORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI); + ORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI); return false; } diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 80a235aeaa5c..8cd66825a58a 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -846,8 +846,8 @@ struct MachineOutliner : public ModulePass { StringRef getPassName() const override { return "Machine Outliner"; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineModuleInfo>(); - AU.addPreserved<MachineModuleInfo>(); + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addPreserved<MachineModuleInfoWrapperPass>(); AU.setPreservesAll(); ModulePass::getAnalysisUsage(AU); } @@ -1128,7 +1128,7 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF, IRBuilder<> Builder(EntryBB); Builder.CreateRetVoid(); - MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); + MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); const TargetSubtargetInfo &STI = MF.getSubtarget(); @@ -1260,7 +1260,7 @@ bool MachineOutliner::outline(Module &M, true /* isImp = true */)); } if (MI.isCall()) - MI.getMF()->updateCallSiteInfo(&MI); + MI.getMF()->eraseCallSiteInfo(&MI); }; // Copy over the defs in the outlined range. // First inst in outlined range <-- Anything that's defined in this @@ -1303,6 +1303,12 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, if (F.empty()) continue; + // Disable outlining from noreturn functions right now. Noreturn requires + // special handling for the case where what we are outlining could be a + // tail call. + if (F.hasFnAttribute(Attribute::NoReturn)) + continue; + // There's something in F. Check if it has a MachineFunction associated with // it. MachineFunction *MF = MMI.getMachineFunction(F); @@ -1421,7 +1427,7 @@ bool MachineOutliner::runOnModule(Module &M) { if (M.empty()) return false; - MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); + MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); // If the user passed -enable-machine-outliner=always or // -enable-machine-outliner, the pass will run on all functions in the module. diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index 54df522d371a..89c9f6093a97 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -56,6 +56,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePipeliner.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ModuloSchedule.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" @@ -153,6 +154,17 @@ static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden, static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false)); +static cl::opt<bool> EmitTestAnnotations( + "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), + cl::desc("Instead of emitting the pipelined code, annotate instructions " + "with the generated schedule for feeding into the " + "-modulo-schedule-test pass")); + +static cl::opt<bool> ExperimentalCodeGen( + "pipeliner-experimental-cg", cl::Hidden, cl::init(false), + cl::desc( + "Use the experimental peeling code generator for software pipelining")); + namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. @@ -314,7 +326,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; - if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) { + if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { LLVM_DEBUG( dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n"); NumFailLoop++; @@ -349,7 +361,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { // If the operand uses a subregister, replace it with a new register // without subregisters, and generate a copy to the new register. - unsigned NewReg = MRI.createVirtualRegister(RC); + Register NewReg = MRI.createVirtualRegister(RC); MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB(); MachineBasicBlock::iterator At = PredB.getFirstTerminator(); const DebugLoc &DL = PredB.findDebugLoc(At); @@ -515,14 +527,49 @@ void SwingSchedulerDAG::schedule() { return; } - generatePipelinedLoop(Schedule); + // Generate the schedule as a ModuloSchedule. + DenseMap<MachineInstr *, int> Cycles, Stages; + std::vector<MachineInstr *> OrderedInsts; + for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle(); + ++Cycle) { + for (SUnit *SU : Schedule.getInstructions(Cycle)) { + OrderedInsts.push_back(SU->getInstr()); + Cycles[SU->getInstr()] = Cycle; + Stages[SU->getInstr()] = Schedule.stageScheduled(SU); + } + } + DenseMap<MachineInstr *, std::pair<unsigned, int64_t>> NewInstrChanges; + for (auto &KV : NewMIs) { + Cycles[KV.first] = Cycles[KV.second]; + Stages[KV.first] = Stages[KV.second]; + NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)]; + } + + ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles), + std::move(Stages)); + if (EmitTestAnnotations) { + assert(NewInstrChanges.empty() && + "Cannot serialize a schedule with InstrChanges!"); + ModuloScheduleTestAnnotater MSTI(MF, MS); + MSTI.annotate(); + return; + } + // The experimental code generator can't work if there are InstChanges. + if (ExperimentalCodeGen && NewInstrChanges.empty()) { + PeelingModuloScheduleExpander MSE(MF, MS, &LIS); + MSE.expand(); + } else { + ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges)); + MSE.expand(); + MSE.cleanup(); + } ++NumPipelined; } /// Clean up after the software pipeliner runs. void SwingSchedulerDAG::finishBlock() { - for (MachineInstr *I : NewMIs) - MF.DeleteMachineInstr(I); + for (auto &KV : NewMIs) + MF.DeleteMachineInstr(KV.second); NewMIs.clear(); // Call the superclass. @@ -546,14 +593,6 @@ static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."); } -/// Return the Phi register value that comes from the incoming block. -static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { - for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) - if (Phi.getOperand(i + 1).getMBB() != LoopBB) - return Phi.getOperand(i).getReg(); - return 0; -} - /// Return the Phi register value that comes the loop block. static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) @@ -658,7 +697,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { if (BaseOp1->isIdenticalTo(*BaseOp2) && (int)Offset1 < (int)Offset2) { - assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && + assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) && "What happened to the chain edge?"); SDep Dep(Load, SDep::Barrier); Dep.setLatency(1); @@ -730,7 +769,7 @@ void SwingSchedulerDAG::updatePhiDependences() { MOI != MOE; ++MOI) { if (!MOI->isReg()) continue; - unsigned Reg = MOI->getReg(); + Register Reg = MOI->getReg(); if (MOI->isDef()) { // If the register is used by a Phi, then create an anti dependence. for (MachineRegisterInfo::use_instr_iterator @@ -809,7 +848,7 @@ void SwingSchedulerDAG::changeDependences() { continue; // Get the MI and SUnit for the instruction that defines the original base. - unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg(); + Register OrigBase = I.getInstr()->getOperand(BasePos).getReg(); MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase); if (!DefMI) continue; @@ -958,7 +997,7 @@ struct FuncUnitSorter { unsigned F1 = 0, F2 = 0; unsigned MFUs1 = minFuncUnits(IS1, F1); unsigned MFUs2 = minFuncUnits(IS2, F2); - if (MFUs1 == 1 && MFUs2 == 1) + if (MFUs1 == MFUs2) return Resources.lookup(F1) < Resources.lookup(F2); return MFUs1 > MFUs2; } @@ -1514,8 +1553,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, continue; for (const MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isUse()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) Uses.insert(Reg); else if (MRI.isAllocatable(Reg)) for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) @@ -1525,8 +1564,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, for (SUnit *SU : NS) for (const MachineOperand &MO : SU->getInstr()->operands()) if (MO.isReg() && MO.isDef() && !MO.isDead()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) { if (!Uses.count(Reg)) LiveOutRegs.push_back(RegisterMaskPair(Reg, LaneBitmask::getNone())); @@ -2012,836 +2051,6 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { return scheduleFound && Schedule.getMaxStageCount() > 0; } -/// Given a schedule for the loop, generate a new version of the loop, -/// and replace the old version. This function generates a prolog -/// that contains the initial iterations in the pipeline, and kernel -/// loop, and the epilogue that contains the code for the final -/// iterations. -void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) { - // Create a new basic block for the kernel and add it to the CFG. - MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); - - unsigned MaxStageCount = Schedule.getMaxStageCount(); - - // Remember the registers that are used in different stages. The index is - // the iteration, or stage, that the instruction is scheduled in. This is - // a map between register names in the original block and the names created - // in each stage of the pipelined loop. - ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; - InstrMapTy InstrMap; - - SmallVector<MachineBasicBlock *, 4> PrologBBs; - - MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader(); - assert(PreheaderBB != nullptr && - "Need to add code to handle loops w/o preheader"); - // Generate the prolog instructions that set up the pipeline. - generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs); - MF.insert(BB->getIterator(), KernelBB); - - // Rearrange the instructions to generate the new, pipelined loop, - // and update register names as needed. - for (int Cycle = Schedule.getFirstCycle(), - LastCycle = Schedule.getFinalCycle(); - Cycle <= LastCycle; ++Cycle) { - std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle); - // This inner loop schedules each instruction in the cycle. - for (SUnit *CI : CycleInstrs) { - if (CI->getInstr()->isPHI()) - continue; - unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr())); - MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum); - updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap); - KernelBB->push_back(NewMI); - InstrMap[NewMI] = CI->getInstr(); - } - } - - // Copy any terminator instructions to the new kernel, and update - // names as needed. - for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), - E = BB->instr_end(); - I != E; ++I) { - MachineInstr *NewMI = MF.CloneMachineInstr(&*I); - updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap); - KernelBB->push_back(NewMI); - InstrMap[NewMI] = &*I; - } - - KernelBB->transferSuccessors(BB); - KernelBB->replaceSuccessor(BB, KernelBB); - - generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, - VRMap, InstrMap, MaxStageCount, MaxStageCount, false); - generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap, - InstrMap, MaxStageCount, MaxStageCount, false); - - LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); - - SmallVector<MachineBasicBlock *, 4> EpilogBBs; - // Generate the epilog instructions to complete the pipeline. - generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs, - PrologBBs); - - // We need this step because the register allocation doesn't handle some - // situations well, so we insert copies to help out. - splitLifetimes(KernelBB, EpilogBBs, Schedule); - - // Remove dead instructions due to loop induction variables. - removeDeadInstructions(KernelBB, EpilogBBs); - - // Add branches between prolog and epilog blocks. - addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap); - - // Remove the original loop since it's no longer referenced. - for (auto &I : *BB) - LIS.RemoveMachineInstrFromMaps(I); - BB->clear(); - BB->eraseFromParent(); - - delete[] VRMap; -} - -/// Generate the pipeline prolog code. -void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, - MBBVectorTy &PrologBBs) { - MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader(); - assert(PreheaderBB != nullptr && - "Need to add code to handle loops w/o preheader"); - MachineBasicBlock *PredBB = PreheaderBB; - InstrMapTy InstrMap; - - // Generate a basic block for each stage, not including the last stage, - // which will be generated in the kernel. Each basic block may contain - // instructions from multiple stages/iterations. - for (unsigned i = 0; i < LastStage; ++i) { - // Create and insert the prolog basic block prior to the original loop - // basic block. The original loop is removed later. - MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); - PrologBBs.push_back(NewBB); - MF.insert(BB->getIterator(), NewBB); - NewBB->transferSuccessors(PredBB); - PredBB->addSuccessor(NewBB); - PredBB = NewBB; - - // Generate instructions for each appropriate stage. Process instructions - // in original program order. - for (int StageNum = i; StageNum >= 0; --StageNum) { - for (MachineBasicBlock::iterator BBI = BB->instr_begin(), - BBE = BB->getFirstTerminator(); - BBI != BBE; ++BBI) { - if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) { - if (BBI->isPHI()) - continue; - MachineInstr *NewMI = - cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule); - updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule, - VRMap); - NewBB->push_back(NewMI); - InstrMap[NewMI] = &*BBI; - } - } - } - rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap); - LLVM_DEBUG({ - dbgs() << "prolog:\n"; - NewBB->dump(); - }); - } - - PredBB->replaceSuccessor(BB, KernelBB); - - // Check if we need to remove the branch from the preheader to the original - // loop, and replace it with a branch to the new loop. - unsigned numBranches = TII->removeBranch(*PreheaderBB); - if (numBranches) { - SmallVector<MachineOperand, 0> Cond; - TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc()); - } -} - -/// Generate the pipeline epilog code. The epilog code finishes the iterations -/// that were started in either the prolog or the kernel. We create a basic -/// block for each stage that needs to complete. -void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, - MBBVectorTy &PrologBBs) { - // We need to change the branch from the kernel to the first epilog block, so - // this call to analyze branch uses the kernel rather than the original BB. - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector<MachineOperand, 4> Cond; - bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond); - assert(!checkBranch && "generateEpilog must be able to analyze the branch"); - if (checkBranch) - return; - - MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin(); - if (*LoopExitI == KernelBB) - ++LoopExitI; - assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor"); - MachineBasicBlock *LoopExitBB = *LoopExitI; - - MachineBasicBlock *PredBB = KernelBB; - MachineBasicBlock *EpilogStart = LoopExitBB; - InstrMapTy InstrMap; - - // Generate a basic block for each stage, not including the last stage, - // which was generated for the kernel. Each basic block may contain - // instructions from multiple stages/iterations. - int EpilogStage = LastStage + 1; - for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) { - MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(); - EpilogBBs.push_back(NewBB); - MF.insert(BB->getIterator(), NewBB); - - PredBB->replaceSuccessor(LoopExitBB, NewBB); - NewBB->addSuccessor(LoopExitBB); - - if (EpilogStart == LoopExitBB) - EpilogStart = NewBB; - - // Add instructions to the epilog depending on the current block. - // Process instructions in original program order. - for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) { - for (auto &BBI : *BB) { - if (BBI.isPHI()) - continue; - MachineInstr *In = &BBI; - if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) { - // Instructions with memoperands in the epilog are updated with - // conservative values. - MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0); - updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap); - NewBB->push_back(NewMI); - InstrMap[NewMI] = In; - } - } - } - generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, - VRMap, InstrMap, LastStage, EpilogStage, i == 1); - generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap, - InstrMap, LastStage, EpilogStage, i == 1); - PredBB = NewBB; - - LLVM_DEBUG({ - dbgs() << "epilog:\n"; - NewBB->dump(); - }); - } - - // Fix any Phi nodes in the loop exit block. - for (MachineInstr &MI : *LoopExitBB) { - if (!MI.isPHI()) - break; - for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { - MachineOperand &MO = MI.getOperand(i); - if (MO.getMBB() == BB) - MO.setMBB(PredBB); - } - } - - // Create a branch to the new epilog from the kernel. - // Remove the original branch and add a new branch to the epilog. - TII->removeBranch(*KernelBB); - TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); - // Add a branch to the loop exit. - if (EpilogBBs.size() > 0) { - MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); - SmallVector<MachineOperand, 4> Cond1; - TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc()); - } -} - -/// Replace all uses of FromReg that appear outside the specified -/// basic block with ToReg. -static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, - MachineBasicBlock *MBB, - MachineRegisterInfo &MRI, - LiveIntervals &LIS) { - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), - E = MRI.use_end(); - I != E;) { - MachineOperand &O = *I; - ++I; - if (O.getParent()->getParent() != MBB) - O.setReg(ToReg); - } - if (!LIS.hasInterval(ToReg)) - LIS.createEmptyInterval(ToReg); -} - -/// Return true if the register has a use that occurs outside the -/// specified loop. -static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, - MachineRegisterInfo &MRI) { - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), - E = MRI.use_end(); - I != E; ++I) - if (I->getParent()->getParent() != BB) - return true; - return false; -} - -/// Generate Phis for the specific block in the generated pipelined code. -/// This function looks at the Phis from the original code to guide the -/// creation of new Phis. -void SwingSchedulerDAG::generateExistingPhis( - MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, - bool IsLast) { - // Compute the stage number for the initial value of the Phi, which - // comes from the prolog. The prolog to use depends on to which kernel/ - // epilog that we're adding the Phi. - unsigned PrologStage = 0; - unsigned PrevStage = 0; - bool InKernel = (LastStageNum == CurStageNum); - if (InKernel) { - PrologStage = LastStageNum - 1; - PrevStage = CurStageNum; - } else { - PrologStage = LastStageNum - (CurStageNum - LastStageNum); - PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1; - } - - for (MachineBasicBlock::iterator BBI = BB->instr_begin(), - BBE = BB->getFirstNonPHI(); - BBI != BBE; ++BBI) { - unsigned Def = BBI->getOperand(0).getReg(); - - unsigned InitVal = 0; - unsigned LoopVal = 0; - getPhiRegs(*BBI, BB, InitVal, LoopVal); - - unsigned PhiOp1 = 0; - // The Phi value from the loop body typically is defined in the loop, but - // not always. So, we need to check if the value is defined in the loop. - unsigned PhiOp2 = LoopVal; - if (VRMap[LastStageNum].count(LoopVal)) - PhiOp2 = VRMap[LastStageNum][LoopVal]; - - int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI)); - int LoopValStage = - Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal))); - unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum); - if (NumStages == 0) { - // We don't need to generate a Phi anymore, but we need to rename any uses - // of the Phi value. - unsigned NewReg = VRMap[PrevStage][LoopVal]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI, - Def, InitVal, NewReg); - if (VRMap[CurStageNum].count(LoopVal)) - VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; - } - // Adjust the number of Phis needed depending on the number of prologs left, - // and the distance from where the Phi is first scheduled. The number of - // Phis cannot exceed the number of prolog stages. Each stage can - // potentially define two values. - unsigned MaxPhis = PrologStage + 2; - if (!InKernel && (int)PrologStage <= LoopValStage) - MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); - unsigned NumPhis = std::min(NumStages, MaxPhis); - - unsigned NewReg = 0; - unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; - // In the epilog, we may need to look back one stage to get the correct - // Phi name because the epilog and prolog blocks execute the same stage. - // The correct name is from the previous block only when the Phi has - // been completely scheduled prior to the epilog, and Phi value is not - // needed in multiple stages. - int StageDiff = 0; - if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 && - NumPhis == 1) - StageDiff = 1; - // Adjust the computations below when the phi and the loop definition - // are scheduled in different stages. - if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage) - StageDiff = StageScheduled - LoopValStage; - for (unsigned np = 0; np < NumPhis; ++np) { - // If the Phi hasn't been scheduled, then use the initial Phi operand - // value. Otherwise, use the scheduled version of the instruction. This - // is a little complicated when a Phi references another Phi. - if (np > PrologStage || StageScheduled >= (int)LastStageNum) - PhiOp1 = InitVal; - // Check if the Phi has already been scheduled in a prolog stage. - else if (PrologStage >= AccessStage + StageDiff + np && - VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) - PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; - // Check if the Phi has already been scheduled, but the loop instruction - // is either another Phi, or doesn't occur in the loop. - else if (PrologStage >= AccessStage + StageDiff + np) { - // If the Phi references another Phi, we need to examine the other - // Phi to get the correct value. - PhiOp1 = LoopVal; - MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1); - int Indirects = 1; - while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) { - int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1)); - if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects) - PhiOp1 = getInitPhiReg(*InstOp1, BB); - else - PhiOp1 = getLoopPhiReg(*InstOp1, BB); - InstOp1 = MRI.getVRegDef(PhiOp1); - int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1)); - int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0); - if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np && - VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) { - PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1]; - break; - } - ++Indirects; - } - } else - PhiOp1 = InitVal; - // If this references a generated Phi in the kernel, get the Phi operand - // from the incoming block. - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - - MachineInstr *PhiInst = MRI.getVRegDef(LoopVal); - bool LoopDefIsPhi = PhiInst && PhiInst->isPHI(); - // In the epilog, a map lookup is needed to get the value from the kernel, - // or previous epilog block. How is does this depends on if the - // instruction is scheduled in the previous block. - if (!InKernel) { - int StageDiffAdj = 0; - if (LoopValStage != -1 && StageScheduled > LoopValStage) - StageDiffAdj = StageScheduled - LoopValStage; - // Use the loop value defined in the kernel, unless the kernel - // contains the last definition of the Phi. - if (np == 0 && PrevStage == LastStageNum && - (StageScheduled != 0 || LoopValStage != 0) && - VRMap[PrevStage - StageDiffAdj].count(LoopVal)) - PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal]; - // Use the value defined by the Phi. We add one because we switch - // from looking at the loop value to the Phi definition. - else if (np > 0 && PrevStage == LastStageNum && - VRMap[PrevStage - np + 1].count(Def)) - PhiOp2 = VRMap[PrevStage - np + 1][Def]; - // Use the loop value defined in the kernel. - else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 && - VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) - PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; - // Use the value defined by the Phi, unless we're generating the first - // epilog and the Phi refers to a Phi in a different stage. - else if (VRMap[PrevStage - np].count(Def) && - (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled))) - PhiOp2 = VRMap[PrevStage - np][Def]; - } - - // Check if we can reuse an existing Phi. This occurs when a Phi - // references another Phi, and the other Phi is scheduled in an - // earlier stage. We can try to reuse an existing Phi up until the last - // stage of the current Phi. - if (LoopDefIsPhi) { - if (static_cast<int>(PrologStage - np) >= StageScheduled) { - int LVNumStages = Schedule.getStagesForPhi(LoopVal); - int StageDiff = (StageScheduled - LoopValStage); - LVNumStages -= StageDiff; - // Make sure the loop value Phi has been processed already. - if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { - NewReg = PhiOp2; - unsigned ReuseStage = CurStageNum; - if (Schedule.isLoopCarried(this, *PhiInst)) - ReuseStage -= LVNumStages; - // Check if the Phi to reuse has been generated yet. If not, then - // there is nothing to reuse. - if (VRMap[ReuseStage - np].count(LoopVal)) { - NewReg = VRMap[ReuseStage - np][LoopVal]; - - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, Def, NewReg); - // Update the map with the new Phi name. - VRMap[CurStageNum - np][Def] = NewReg; - PhiOp2 = NewReg; - if (VRMap[LastStageNum - np - 1].count(LoopVal)) - PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; - - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - continue; - } - } - } - if (InKernel && StageDiff > 0 && - VRMap[CurStageNum - StageDiff - np].count(LoopVal)) - PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; - } - - const TargetRegisterClass *RC = MRI.getRegClass(Def); - NewReg = MRI.createVirtualRegister(RC); - - MachineInstrBuilder NewPhi = - BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::PHI), NewReg); - NewPhi.addReg(PhiOp1).addMBB(BB1); - NewPhi.addReg(PhiOp2).addMBB(BB2); - if (np == 0) - InstrMap[NewPhi] = &*BBI; - - // We define the Phis after creating the new pipelined code, so - // we need to rename the Phi values in scheduled instructions. - - unsigned PrevReg = 0; - if (InKernel && VRMap[PrevStage - np].count(LoopVal)) - PrevReg = VRMap[PrevStage - np][LoopVal]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI, - Def, NewReg, PrevReg); - // If the Phi has been scheduled, use the new name for rewriting. - if (VRMap[CurStageNum - np].count(Def)) { - unsigned R = VRMap[CurStageNum - np][Def]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI, - R, NewReg); - } - - // Check if we need to rename any uses that occurs after the loop. The - // register to replace depends on whether the Phi is scheduled in the - // epilog. - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - - // In the kernel, a dependent Phi uses the value from this Phi. - if (InKernel) - PhiOp2 = NewReg; - - // Update the map with the new Phi name. - VRMap[CurStageNum - np][Def] = NewReg; - } - - while (NumPhis++ < NumStages) { - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis, - &*BBI, Def, NewReg, 0); - } - - // Check if we need to rename a Phi that has been eliminated due to - // scheduling. - if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal)) - replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS); - } -} - -/// Generate Phis for the specified block in the generated pipelined code. -/// These are new Phis needed because the definition is scheduled after the -/// use in the pipelined sequence. -void SwingSchedulerDAG::generatePhis( - MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, - bool IsLast) { - // Compute the stage number that contains the initial Phi value, and - // the Phi from the previous stage. - unsigned PrologStage = 0; - unsigned PrevStage = 0; - unsigned StageDiff = CurStageNum - LastStageNum; - bool InKernel = (StageDiff == 0); - if (InKernel) { - PrologStage = LastStageNum - 1; - PrevStage = CurStageNum; - } else { - PrologStage = LastStageNum - StageDiff; - PrevStage = LastStageNum + StageDiff - 1; - } - - for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(), - BBE = BB->instr_end(); - BBI != BBE; ++BBI) { - for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = BBI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI)); - assert(StageScheduled != -1 && "Expecting scheduled instruction."); - unsigned Def = MO.getReg(); - unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum); - // An instruction scheduled in stage 0 and is used after the loop - // requires a phi in the epilog for the last definition from either - // the kernel or prolog. - if (!InKernel && NumPhis == 0 && StageScheduled == 0 && - hasUseAfterLoop(Def, BB, MRI)) - NumPhis = 1; - if (!InKernel && (unsigned)StageScheduled > PrologStage) - continue; - - unsigned PhiOp2 = VRMap[PrevStage][Def]; - if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) - if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) - PhiOp2 = getLoopPhiReg(*InstOp2, BB2); - // The number of Phis can't exceed the number of prolog stages. The - // prolog stage number is zero based. - if (NumPhis > PrologStage + 1 - StageScheduled) - NumPhis = PrologStage + 1 - StageScheduled; - for (unsigned np = 0; np < NumPhis; ++np) { - unsigned PhiOp1 = VRMap[PrologStage][Def]; - if (np <= PrologStage) - PhiOp1 = VRMap[PrologStage - np][Def]; - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) - PhiOp1 = getInitPhiReg(*InstOp1, NewBB); - } - if (!InKernel) - PhiOp2 = VRMap[PrevStage - np][Def]; - - const TargetRegisterClass *RC = MRI.getRegClass(Def); - unsigned NewReg = MRI.createVirtualRegister(RC); - - MachineInstrBuilder NewPhi = - BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::PHI), NewReg); - NewPhi.addReg(PhiOp1).addMBB(BB1); - NewPhi.addReg(PhiOp2).addMBB(BB2); - if (np == 0) - InstrMap[NewPhi] = &*BBI; - - // Rewrite uses and update the map. The actions depend upon whether - // we generating code for the kernel or epilog blocks. - if (InKernel) { - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, PhiOp1, NewReg); - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, PhiOp2, NewReg); - - PhiOp2 = NewReg; - VRMap[PrevStage - np - 1][Def] = NewReg; - } else { - VRMap[CurStageNum - np][Def] = NewReg; - if (np == NumPhis - 1) - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, Def, NewReg); - } - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - } - } - } -} - -/// Remove instructions that generate values with no uses. -/// Typically, these are induction variable operations that generate values -/// used in the loop itself. A dead instruction has a definition with -/// no uses, or uses that occur in the original loop only. -void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs) { - // For each epilog block, check that the value defined by each instruction - // is used. If not, delete it. - for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), - MBE = EpilogBBs.rend(); - MBB != MBE; ++MBB) - for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), - ME = (*MBB)->instr_rend(); - MI != ME;) { - // From DeadMachineInstructionElem. Don't delete inline assembly. - if (MI->isInlineAsm()) { - ++MI; - continue; - } - bool SawStore = false; - // Check if it's safe to remove the instruction due to side effects. - // We can, and want to, remove Phis here. - if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) { - ++MI; - continue; - } - bool used = true; - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - unsigned reg = MOI->getReg(); - // Assume physical registers are used, unless they are marked dead. - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - used = !MOI->isDead(); - if (used) - break; - continue; - } - unsigned realUses = 0; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), - EI = MRI.use_end(); - UI != EI; ++UI) { - // Check if there are any uses that occur only in the original - // loop. If so, that's not a real use. - if (UI->getParent()->getParent() != BB) { - realUses++; - used = true; - break; - } - } - if (realUses > 0) - break; - used = false; - } - if (!used) { - LIS.RemoveMachineInstrFromMaps(*MI); - MI++->eraseFromParent(); - continue; - } - ++MI; - } - // In the kernel block, check if we can remove a Phi that generates a value - // used in an instruction removed in the epilog block. - for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), - BBE = KernelBB->getFirstNonPHI(); - BBI != BBE;) { - MachineInstr *MI = &*BBI; - ++BBI; - unsigned reg = MI->getOperand(0).getReg(); - if (MRI.use_begin(reg) == MRI.use_end()) { - LIS.RemoveMachineInstrFromMaps(*MI); - MI->eraseFromParent(); - } - } -} - -/// For loop carried definitions, we split the lifetime of a virtual register -/// that has uses past the definition in the next iteration. A copy with a new -/// virtual register is inserted before the definition, which helps with -/// generating a better register assignment. -/// -/// v1 = phi(a, v2) v1 = phi(a, v2) -/// v2 = phi(b, v3) v2 = phi(b, v3) -/// v3 = .. v4 = copy v1 -/// .. = V1 v3 = .. -/// .. = v4 -void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs, - SMSchedule &Schedule) { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - for (auto &PHI : KernelBB->phis()) { - unsigned Def = PHI.getOperand(0).getReg(); - // Check for any Phi definition that used as an operand of another Phi - // in the same block. - for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), - E = MRI.use_instr_end(); - I != E; ++I) { - if (I->isPHI() && I->getParent() == KernelBB) { - // Get the loop carried definition. - unsigned LCDef = getLoopPhiReg(PHI, KernelBB); - if (!LCDef) - continue; - MachineInstr *MI = MRI.getVRegDef(LCDef); - if (!MI || MI->getParent() != KernelBB || MI->isPHI()) - continue; - // Search through the rest of the block looking for uses of the Phi - // definition. If one occurs, then split the lifetime. - unsigned SplitReg = 0; - for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), - KernelBB->instr_end())) - if (BBJ.readsRegister(Def)) { - // We split the lifetime when we find the first use. - if (SplitReg == 0) { - SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); - BuildMI(*KernelBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), SplitReg) - .addReg(Def); - } - BBJ.substituteRegister(Def, SplitReg, 0, *TRI); - } - if (!SplitReg) - continue; - // Search through each of the epilog blocks for any uses to be renamed. - for (auto &Epilog : EpilogBBs) - for (auto &I : *Epilog) - if (I.readsRegister(Def)) - I.substituteRegister(Def, SplitReg, 0, *TRI); - break; - } - } - } -} - -/// Remove the incoming block from the Phis in a basic block. -static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { - for (MachineInstr &MI : *BB) { - if (!MI.isPHI()) - break; - for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) - if (MI.getOperand(i + 1).getMBB() == Incoming) { - MI.RemoveOperand(i + 1); - MI.RemoveOperand(i); - break; - } - } -} - -/// Create branches from each prolog basic block to the appropriate epilog -/// block. These edges are needed if the loop ends before reaching the -/// kernel. -void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB, - MBBVectorTy &PrologBBs, - MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs, - SMSchedule &Schedule, ValueMapTy *VRMap) { - assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); - MachineInstr *IndVar = Pass.LI.LoopInductionVar; - MachineInstr *Cmp = Pass.LI.LoopCompare; - MachineBasicBlock *LastPro = KernelBB; - MachineBasicBlock *LastEpi = KernelBB; - - // Start from the blocks connected to the kernel and work "out" - // to the first prolog and the last epilog blocks. - SmallVector<MachineInstr *, 4> PrevInsts; - unsigned MaxIter = PrologBBs.size() - 1; - unsigned LC = UINT_MAX; - unsigned LCMin = UINT_MAX; - for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { - // Add branches to the prolog that go to the corresponding - // epilog, and the fall-thru prolog/kernel block. - MachineBasicBlock *Prolog = PrologBBs[j]; - MachineBasicBlock *Epilog = EpilogBBs[i]; - // We've executed one iteration, so decrement the loop count and check for - // the loop end. - SmallVector<MachineOperand, 4> Cond; - // Check if the LOOP0 has already been removed. If so, then there is no need - // to reduce the trip count. - if (LC != 0) - LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond, - PrevInsts, j, MaxIter); - - // Record the value of the first trip count, which is used to determine if - // branches and blocks can be removed for constant trip counts. - if (LCMin == UINT_MAX) - LCMin = LC; - - unsigned numAdded = 0; - if (TargetRegisterInfo::isVirtualRegister(LC)) { - Prolog->addSuccessor(Epilog); - numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); - } else if (j >= LCMin) { - Prolog->addSuccessor(Epilog); - Prolog->removeSuccessor(LastPro); - LastEpi->removeSuccessor(Epilog); - numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc()); - removePhis(Epilog, LastEpi); - // Remove the blocks that are no longer referenced. - if (LastPro != LastEpi) { - LastEpi->clear(); - LastEpi->eraseFromParent(); - } - LastPro->clear(); - LastPro->eraseFromParent(); - } else { - numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); - removePhis(Epilog, Prolog); - } - LastPro = Prolog; - LastEpi = Epilog; - for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(), - E = Prolog->instr_rend(); - I != E && numAdded > 0; ++I, --numAdded) - updateInstruction(&*I, false, j, 0, Schedule, VRMap); - } -} - /// Return true if we can compute the amount the instruction changes /// during each iteration. Set Delta to the amount of the change. bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { @@ -2854,7 +2063,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { if (!BaseOp->isReg()) return false; - unsigned BaseReg = BaseOp->getReg(); + Register BaseReg = BaseOp->getReg(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Check if there is a Phi. If so, get the definition in the loop. @@ -2874,261 +2083,6 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { return true; } -/// Update the memory operand with a new offset when the pipeliner -/// generates a new copy of the instruction that refers to a -/// different memory location. -void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI, - MachineInstr &OldMI, unsigned Num) { - if (Num == 0) - return; - // If the instruction has memory operands, then adjust the offset - // when the instruction appears in different stages. - if (NewMI.memoperands_empty()) - return; - SmallVector<MachineMemOperand *, 2> NewMMOs; - for (MachineMemOperand *MMO : NewMI.memoperands()) { - // TODO: Figure out whether isAtomic is really necessary (see D57601). - if (MMO->isVolatile() || MMO->isAtomic() || - (MMO->isInvariant() && MMO->isDereferenceable()) || - (!MMO->getValue())) { - NewMMOs.push_back(MMO); - continue; - } - unsigned Delta; - if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { - int64_t AdjOffset = Delta * Num; - NewMMOs.push_back( - MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); - } else { - NewMMOs.push_back( - MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); - } - } - NewMI.setMemRefs(MF, NewMMOs); -} - -/// Clone the instruction for the new pipelined loop and update the -/// memory operands, if needed. -MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI, - unsigned CurStageNum, - unsigned InstStageNum) { - MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); - // Check for tied operands in inline asm instructions. This should be handled - // elsewhere, but I'm not sure of the best solution. - if (OldMI->isInlineAsm()) - for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { - const auto &MO = OldMI->getOperand(i); - if (MO.isReg() && MO.isUse()) - break; - unsigned UseIdx; - if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) - NewMI->tieOperands(i, UseIdx); - } - updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); - return NewMI; -} - -/// Clone the instruction for the new pipelined loop. If needed, this -/// function updates the instruction using the values saved in the -/// InstrChanges structure. -MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI, - unsigned CurStageNum, - unsigned InstStageNum, - SMSchedule &Schedule) { - MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); - DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It = - InstrChanges.find(getSUnit(OldMI)); - if (It != InstrChanges.end()) { - std::pair<unsigned, int64_t> RegAndOffset = It->second; - unsigned BasePos, OffsetPos; - if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos)) - return nullptr; - int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm(); - MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first); - if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum) - NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum); - NewMI->getOperand(OffsetPos).setImm(NewOffset); - } - updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); - return NewMI; -} - -/// Update the machine instruction with new virtual registers. This -/// function may change the defintions and/or uses. -void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef, - unsigned CurStageNum, - unsigned InstrStageNum, - SMSchedule &Schedule, - ValueMapTy *VRMap) { - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - unsigned reg = MO.getReg(); - if (MO.isDef()) { - // Create a new virtual register for the definition. - const TargetRegisterClass *RC = MRI.getRegClass(reg); - unsigned NewReg = MRI.createVirtualRegister(RC); - MO.setReg(NewReg); - VRMap[CurStageNum][reg] = NewReg; - if (LastDef) - replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS); - } else if (MO.isUse()) { - MachineInstr *Def = MRI.getVRegDef(reg); - // Compute the stage that contains the last definition for instruction. - int DefStageNum = Schedule.stageScheduled(getSUnit(Def)); - unsigned StageNum = CurStageNum; - if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) { - // Compute the difference in stages between the defintion and the use. - unsigned StageDiff = (InstrStageNum - DefStageNum); - // Make an adjustment to get the last definition. - StageNum -= StageDiff; - } - if (VRMap[StageNum].count(reg)) - MO.setReg(VRMap[StageNum][reg]); - } - } -} - -/// Return the instruction in the loop that defines the register. -/// If the definition is a Phi, then follow the Phi operand to -/// the instruction in the loop. -MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) { - SmallPtrSet<MachineInstr *, 8> Visited; - MachineInstr *Def = MRI.getVRegDef(Reg); - while (Def->isPHI()) { - if (!Visited.insert(Def).second) - break; - for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) - if (Def->getOperand(i + 1).getMBB() == BB) { - Def = MRI.getVRegDef(Def->getOperand(i).getReg()); - break; - } - } - return Def; -} - -/// Return the new name for the value from the previous stage. -unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage, - unsigned LoopVal, unsigned LoopStage, - ValueMapTy *VRMap, - MachineBasicBlock *BB) { - unsigned PrevVal = 0; - if (StageNum > PhiStage) { - MachineInstr *LoopInst = MRI.getVRegDef(LoopVal); - if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal)) - // The name is defined in the previous stage. - PrevVal = VRMap[StageNum - 1][LoopVal]; - else if (VRMap[StageNum].count(LoopVal)) - // The previous name is defined in the current stage when the instruction - // order is swapped. - PrevVal = VRMap[StageNum][LoopVal]; - else if (!LoopInst->isPHI() || LoopInst->getParent() != BB) - // The loop value hasn't yet been scheduled. - PrevVal = LoopVal; - else if (StageNum == PhiStage + 1) - // The loop value is another phi, which has not been scheduled. - PrevVal = getInitPhiReg(*LoopInst, BB); - else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB) - // The loop value is another phi, which has been scheduled. - PrevVal = - getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB), - LoopStage, VRMap, BB); - } - return PrevVal; -} - -/// Rewrite the Phi values in the specified block to use the mappings -/// from the initial operand. Once the Phi is scheduled, we switch -/// to using the loop value instead of the Phi value, so those names -/// do not need to be rewritten. -void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB, - unsigned StageNum, - SMSchedule &Schedule, - ValueMapTy *VRMap, - InstrMapTy &InstrMap) { - for (auto &PHI : BB->phis()) { - unsigned InitVal = 0; - unsigned LoopVal = 0; - getPhiRegs(PHI, BB, InitVal, LoopVal); - unsigned PhiDef = PHI.getOperand(0).getReg(); - - unsigned PhiStage = - (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef))); - unsigned LoopStage = - (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal))); - unsigned NumPhis = Schedule.getStagesForPhi(PhiDef); - if (NumPhis > StageNum) - NumPhis = StageNum; - for (unsigned np = 0; np <= NumPhis; ++np) { - unsigned NewVal = - getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); - if (!NewVal) - NewVal = InitVal; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI, - PhiDef, NewVal); - } - } -} - -/// Rewrite a previously scheduled instruction to use the register value -/// from the new instruction. Make sure the instruction occurs in the -/// basic block, and we don't change the uses in the new instruction. -void SwingSchedulerDAG::rewriteScheduledInstr( - MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap, - unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, - unsigned NewReg, unsigned PrevReg) { - bool InProlog = (CurStageNum < Schedule.getMaxStageCount()); - int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum; - // Rewrite uses that have been scheduled already to use the new - // Phi register. - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), - EI = MRI.use_end(); - UI != EI;) { - MachineOperand &UseOp = *UI; - MachineInstr *UseMI = UseOp.getParent(); - ++UI; - if (UseMI->getParent() != BB) - continue; - if (UseMI->isPHI()) { - if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg) - continue; - if (getLoopPhiReg(*UseMI, BB) != OldReg) - continue; - } - InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI); - assert(OrigInstr != InstrMap.end() && "Instruction not scheduled."); - SUnit *OrigMISU = getSUnit(OrigInstr->second); - int StageSched = Schedule.stageScheduled(OrigMISU); - int CycleSched = Schedule.cycleScheduled(OrigMISU); - unsigned ReplaceReg = 0; - // This is the stage for the scheduled instruction. - if (StagePhi == StageSched && Phi->isPHI()) { - int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi)); - if (PrevReg && InProlog) - ReplaceReg = PrevReg; - else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) && - (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI())) - ReplaceReg = PrevReg; - else - ReplaceReg = NewReg; - } - // The scheduled instruction occurs before the scheduled Phi, and the - // Phi is not loop carried. - if (!InProlog && StagePhi + 1 == StageSched && - !Schedule.isLoopCarried(this, *Phi)) - ReplaceReg = NewReg; - if (StagePhi > StageSched && Phi->isPHI()) - ReplaceReg = NewReg; - if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) - ReplaceReg = NewReg; - if (ReplaceReg) { - MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); - UseOp.setReg(ReplaceReg); - } - } -} - /// Check if we can change the instruction to use an offset value from the /// previous iteration. If so, return true and set the base and offset values /// so that we can rewrite the load, if necessary. @@ -3147,7 +2101,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, unsigned BasePosLd, OffsetPosLd; if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd)) return false; - unsigned BaseReg = MI->getOperand(BasePosLd).getReg(); + Register BaseReg = MI->getOperand(BasePosLd).getReg(); // Look for the Phi instruction. MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); @@ -3202,7 +2156,7 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, unsigned BasePos, OffsetPos; if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) return; - unsigned BaseReg = MI->getOperand(BasePos).getReg(); + Register BaseReg = MI->getOperand(BasePos).getReg(); MachineInstr *LoopDef = findDefInLoop(BaseReg); int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef)); int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef)); @@ -3221,11 +2175,29 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, NewMI->getOperand(OffsetPos).setImm(NewOffset); SU->setInstr(NewMI); MISUnitMap[NewMI] = SU; - NewMIs.insert(NewMI); + NewMIs[MI] = NewMI; } } } +/// Return the instruction in the loop that defines the register. +/// If the definition is a Phi, then follow the Phi operand to +/// the instruction in the loop. +MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) { + SmallPtrSet<MachineInstr *, 8> Visited; + MachineInstr *Def = MRI.getVRegDef(Reg); + while (Def->isPHI()) { + if (!Visited.insert(Def).second) + break; + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) + if (Def->getOperand(i + 1).getMBB() == BB) { + Def = MRI.getVRegDef(Def->getOperand(i).getReg()); + break; + } + } + return Def; +} + /// Return true for an order or output dependence that is loop carried /// potentially. A dependence is loop carried if the destination defines a valu /// that may be used or defined by the source in a subsequent iteration. @@ -3499,10 +2471,10 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, ++I, ++Pos) { for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); unsigned BasePos, OffsetPos; if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) if (MI->getOperand(BasePos).getReg() == Reg) @@ -3676,7 +2648,7 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { assert(StageDef != -1 && "Instruction should have been scheduled."); for (auto &SI : SU.Succs) if (SI.isAssignedRegDep()) - if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg())) + if (Register::isPhysicalRegister(SI.getReg())) if (stageScheduled(SI.getSUnit()) != StageDef) return false; } @@ -3810,7 +2782,7 @@ void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) { NewMI->getOperand(OffsetPos).setImm(NewOffset); SU->setInstr(NewMI); MISUnitMap[NewMI] = SU; - NewMIs.insert(NewMI); + NewMIs[MI] = NewMI; } } OverlapReg = 0; @@ -3847,40 +2819,6 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { ScheduledInstrs[cycle].push_front(*I); } } - // Iterate over the definitions in each instruction, and compute the - // stage difference for each use. Keep the maximum value. - for (auto &I : InstrToCycle) { - int DefStage = stageScheduled(I.first); - MachineInstr *MI = I.first->getInstr(); - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &Op = MI->getOperand(i); - if (!Op.isReg() || !Op.isDef()) - continue; - - unsigned Reg = Op.getReg(); - unsigned MaxDiff = 0; - bool PhiIsSwapped = false; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), - EI = MRI.use_end(); - UI != EI; ++UI) { - MachineOperand &UseOp = *UI; - MachineInstr *UseMI = UseOp.getParent(); - SUnit *SUnitUse = SSD->getSUnit(UseMI); - int UseStage = stageScheduled(SUnitUse); - unsigned Diff = 0; - if (UseStage != -1 && UseStage >= DefStage) - Diff = UseStage - DefStage; - if (MI->isPHI()) { - if (isLoopCarried(SSD, *MI)) - ++Diff; - else - PhiIsSwapped = true; - } - MaxDiff = std::max(Diff, MaxDiff); - } - RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped); - } - } // Erase all the elements in the later stages. Only one iteration should // remain in the scheduled list, and it contains all the instructions. @@ -4085,4 +3023,3 @@ void ResourceManager::clearResources() { return DFAResources->clearResources(); std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0); } - diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp index 7f220ed1fd8f..f4daff667e86 100644 --- a/lib/CodeGen/MachinePostDominators.cpp +++ b/lib/CodeGen/MachinePostDominators.cpp @@ -17,7 +17,9 @@ using namespace llvm; namespace llvm { template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTreeBase -} + +extern bool VerifyMachineDomInfo; +} // namespace llvm char MachinePostDominatorTree::ID = 0; @@ -25,33 +27,52 @@ char MachinePostDominatorTree::ID = 0; INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", "MachinePostDominator Tree Construction", true, true) -MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { +MachinePostDominatorTree::MachinePostDominatorTree() + : MachineFunctionPass(ID), PDT(nullptr) { initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); - DT = new PostDomTreeBase<MachineBasicBlock>(); } -FunctionPass * -MachinePostDominatorTree::createMachinePostDominatorTreePass() { +FunctionPass *MachinePostDominatorTree::createMachinePostDominatorTreePass() { return new MachinePostDominatorTree(); } -bool -MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { - DT->recalculate(F); +bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + PDT = std::make_unique<PostDomTreeT>(); + PDT->recalculate(F); return false; } -MachinePostDominatorTree::~MachinePostDominatorTree() { - delete DT; -} - -void -MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { +void MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } -void -MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const { - DT->print(OS); +MachineBasicBlock *MachinePostDominatorTree::findNearestCommonDominator( + ArrayRef<MachineBasicBlock *> Blocks) const { + assert(!Blocks.empty()); + + MachineBasicBlock *NCD = Blocks.front(); + for (MachineBasicBlock *BB : Blocks.drop_front()) { + NCD = PDT->findNearestCommonDominator(NCD, BB); + + // Stop when the root is reached. + if (PDT->isVirtualRoot(PDT->getNode(NCD))) + return nullptr; + } + + return NCD; +} + +void MachinePostDominatorTree::verifyAnalysis() const { + if (PDT && VerifyMachineDomInfo) + if (!PDT->verify(PostDomTreeT::VerificationLevel::Basic)) { + errs() << "MachinePostDominatorTree verification failed\n"; + + abort(); + } +} + +void MachinePostDominatorTree::print(llvm::raw_ostream &OS, + const Module *M) const { + PDT->print(OS); } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index f0fd0405d69d..b88d4ea462ef 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -144,7 +144,7 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) { } unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); + unsigned Reg = Register::index2VirtReg(getNumVirtRegs()); VRegInfo.grow(Reg); RegAllocHints.grow(Reg); insertVRegByName(Name, Reg); @@ -202,7 +202,7 @@ void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); } void MachineRegisterInfo::clearVirtRegs() { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (!VRegInfo[Reg].second) continue; verifyUseList(Reg); @@ -255,7 +255,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { void MachineRegisterInfo::verifyUseLists() const { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) - verifyUseList(TargetRegisterInfo::index2VirtReg(i)); + verifyUseList(Register::index2VirtReg(i)); for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i) verifyUseList(i); #endif @@ -386,7 +386,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { MachineOperand &O = *I; ++I; - if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (Register::isPhysicalRegister(ToReg)) { O.substPhysReg(ToReg, *TRI); } else { O.setReg(ToReg); @@ -498,7 +498,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const { // Lane masks are only defined for vregs. - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const TargetRegisterClass &TRC = *getRegClass(Reg); return TRC.getLaneMask(); } @@ -517,7 +517,7 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { } bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + assert(Register::isPhysicalRegister(PhysReg)); const TargetRegisterInfo *TRI = getTargetRegisterInfo(); if (TRI->isConstantPhysReg(PhysReg)) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index e8b42047b49f..258a5f9e0482 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -95,7 +95,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB, while (I != BB->end() && I->isPHI()) { bool Same = true; for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { - unsigned SrcReg = I->getOperand(i).getReg(); + Register SrcReg = I->getOperand(i).getReg(); MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB(); if (AVals[SrcBB] != SrcReg) { Same = false; @@ -118,7 +118,7 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode, const TargetRegisterClass *RC, MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); } @@ -292,7 +292,7 @@ public: MachineSSAUpdater *Updater) { // Insert an implicit_def to represent an undef value. MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, - BB, BB->getFirstTerminator(), + BB, BB->getFirstNonPHI(), Updater->VRC, Updater->MRI, Updater->TII); return NewDef->getOperand(0).getReg(); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index ae1170ad1be6..f0721ea3b76d 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -82,6 +82,10 @@ cl::opt<bool> DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout")); +cl::opt<bool> VerifyScheduling( + "verify-misched", cl::Hidden, + cl::desc("Verify machine instrs before and after machine scheduling")); + } // end namespace llvm #ifndef NDEBUG @@ -122,9 +126,6 @@ static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true)); -static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, - cl::desc("Verify machine instrs before and after machine scheduling")); - // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -198,6 +199,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID; INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) @@ -210,7 +212,7 @@ MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) { void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(MachineDominatorsID); + AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<TargetPassConfig>(); @@ -234,7 +236,7 @@ PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(MachineDominatorsID); + AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -933,8 +935,8 @@ void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { if (TrackLaneMasks && !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; // Ignore re-defs. @@ -985,7 +987,7 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, "ShouldTrackLaneMasks requires ShouldTrackPressure"); } -// Setup the register pressure trackers for the top scheduled top and bottom +// Setup the register pressure trackers for the top scheduled and bottom // scheduled regions. void ScheduleDAGMILive::initRegPressure() { VRegUses.clear(); @@ -1095,7 +1097,7 @@ void ScheduleDAGMILive::updatePressureDiffs( for (const RegisterMaskPair &P : LiveUses) { unsigned Reg = P.RegUnit; /// FIXME: Currently assuming single-use physregs. - if (!TRI->isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; if (ShouldTrackLaneMasks) { @@ -1319,8 +1321,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // Visit each live out vreg def to find def/use pairs that cross iterations. for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { unsigned Reg = P.RegUnit; - if (!TRI->isVirtualRegister(Reg)) - continue; + if (!Register::isVirtualRegister(Reg)) + continue; const LiveInterval &LI = LIS->getInterval(Reg); const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); if (!DefVNI) @@ -1538,14 +1540,14 @@ namespace llvm { std::unique_ptr<ScheduleDAGMutation> createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI) + return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI) : nullptr; } std::unique_ptr<ScheduleDAGMutation> createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI) + return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI) : nullptr; } @@ -1657,7 +1659,7 @@ namespace llvm { std::unique_ptr<ScheduleDAGMutation> createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return llvm::make_unique<CopyConstrain>(TII, TRI); + return std::make_unique<CopyConstrain>(TII, TRI); } } // end namespace llvm @@ -1687,13 +1689,13 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // Check for pure vreg copies. const MachineOperand &SrcOp = Copy->getOperand(1); - unsigned SrcReg = SrcOp.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) + Register SrcReg = SrcOp.getReg(); + if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) return; const MachineOperand &DstOp = Copy->getOperand(0); - unsigned DstReg = DstOp.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead()) + Register DstReg = DstOp.getReg(); + if (!Register::isVirtualRegister(DstReg) || DstOp.isDead()) return; // Check if either the dest or source is local. If it's live across a back @@ -2914,14 +2916,12 @@ int biasPhysReg(const SUnit *SU, bool isTop) { unsigned UnscheduledOper = isTop ? 0 : 1; // If we have already scheduled the physreg produce/consumer, immediately // schedule the copy. - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(ScheduledOper).getReg())) + if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg())) return 1; // If the physreg is at the boundary, defer it. Otherwise schedule it // immediately to free the dependent. We can hoist the copy later. bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(UnscheduledOper).getReg())) + if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg())) return AtBoundary ? -1 : 1; } @@ -2931,7 +2931,7 @@ int biasPhysReg(const SUnit *SU, bool isTop) { // physical registers. bool DoBias = true; for (const MachineOperand &Op : MI->defs()) { - if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) { + if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) { DoBias = false; break; } @@ -3259,7 +3259,8 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { // Find already scheduled copies with a single physreg dependence and move // them just above the scheduled instruction. for (SDep &Dep : Deps) { - if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg())) + if (Dep.getKind() != SDep::Data || + !Register::isPhysicalRegister(Dep.getReg())) continue; SUnit *DepSU = Dep.getSUnit(); if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) @@ -3298,7 +3299,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// default scheduler if the target does not set a default. ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { ScheduleDAGMILive *DAG = - new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C)); + new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate @@ -3450,7 +3451,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { } ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { - return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C), + return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C), /*RemoveKillFlags=*/true); } @@ -3561,10 +3562,10 @@ public: } // end anonymous namespace static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true)); + return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true)); } static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false)); + return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false)); } static MachineSchedRegistry ILPMaxRegistry( @@ -3658,7 +3659,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { assert((TopDown || !ForceTopDown) && "-misched-topdown incompatible with -misched-bottomup"); return new ScheduleDAGMILive( - C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown)); + C, std::make_unique<InstructionShuffler>(Alternate, TopDown)); } static MachineSchedRegistry ShufflerRegistry( diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 41db2c88ce50..27a2e7023f22 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -36,8 +36,9 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" @@ -114,15 +115,12 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachinePostDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addPreserved<MachinePostDominatorTree>(); AU.addPreserved<MachineLoopInfo>(); if (UseBlockFreqInfo) AU.addRequired<MachineBlockFrequencyInfo>(); @@ -195,11 +193,10 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, if (!MI.isCopy()) return false; - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - !TargetRegisterInfo::isVirtualRegister(DstReg) || - !MRI->hasOneNonDBGUse(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(SrcReg) || + !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg)) return false; const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); @@ -233,8 +230,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "Only makes sense for vregs"); + assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs"); // Ignore debug uses because debug info doesn't affect the code. if (MRI->use_nodbg_empty(Reg)) @@ -416,13 +412,13 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // We don't move live definitions of physical registers, // so sinking their uses won't enable any opportunities. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; // If this instruction is the only user of a virtual register, @@ -615,10 +611,10 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -817,8 +813,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (Reg == 0 || !Register::isPhysicalRegister(Reg)) + continue; if (SuccToSinkTo->isLiveIn(Reg)) return false; } @@ -958,8 +955,9 @@ private: /// Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; - /// Track DBG_VALUEs of (unmodified) register units. - DenseMap<unsigned, TinyPtrVector<MachineInstr*>> SeenDbgInstrs; + /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an + /// entry in this map for each unit it touches. + DenseMap<unsigned, TinyPtrVector<MachineInstr *>> SeenDbgInstrs; /// Sink Copy instructions unused in the same block close to their uses in /// successors. @@ -1030,7 +1028,7 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, const TargetRegisterInfo *TRI) { for (auto U : UsedOpsInCopy) { MachineOperand &MO = MI->getOperand(U); - unsigned SrcReg = MO.getReg(); + Register SrcReg = MO.getReg(); if (!UsedRegUnits.available(SrcReg)) { MachineBasicBlock::iterator NI = std::next(MI->getIterator()); for (MachineInstr &UI : make_range(NI, CurBB.end())) { @@ -1053,7 +1051,7 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S) SuccBB->removeLiveIn(*S); for (auto U : UsedOpsInCopy) { - unsigned Reg = MI->getOperand(U).getReg(); + Register Reg = MI->getOperand(U).getReg(); if (!SuccBB->isLiveIn(Reg)) SuccBB->addLiveIn(Reg); } @@ -1069,7 +1067,7 @@ static bool hasRegisterDependency(MachineInstr *MI, MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { @@ -1094,6 +1092,14 @@ static bool hasRegisterDependency(MachineInstr *MI, return HasRegDependency; } +static SmallSet<unsigned, 4> getRegUnits(unsigned Reg, + const TargetRegisterInfo *TRI) { + SmallSet<unsigned, 4> RegUnits; + for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI) + RegUnits.insert(*RI); + return RegUnits; +} + bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineFunction &MF, const TargetRegisterInfo *TRI, @@ -1130,15 +1136,17 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // for DBG_VALUEs later, record them when they're encountered. if (MI->isDebugValue()) { auto &MO = MI->getOperand(0); - if (MO.isReg() && TRI->isPhysicalRegister(MO.getReg())) { + if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { // Bail if we can already tell the sink would be rejected, rather // than needlessly accumulating lots of DBG_VALUEs. if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) continue; - // Record debug use of this register. - SeenDbgInstrs[MO.getReg()].push_back(MI); + // Record debug use of each reg unit. + SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI); + for (unsigned Reg : Units) + SeenDbgInstrs[Reg].push_back(MI); } continue; } @@ -1177,15 +1185,22 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && "Unexpected predecessor"); - // Collect DBG_VALUEs that must sink with this copy. + // Collect DBG_VALUEs that must sink with this copy. We've previously + // recorded which reg units that DBG_VALUEs read, if this instruction + // writes any of those units then the corresponding DBG_VALUEs must sink. + SetVector<MachineInstr *> DbgValsToSinkSet; SmallVector<MachineInstr *, 4> DbgValsToSink; for (auto &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned reg = MO.getReg(); - for (auto *MI : SeenDbgInstrs.lookup(reg)) - DbgValsToSink.push_back(MI); + + SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI); + for (unsigned Reg : Units) + for (auto *MI : SeenDbgInstrs.lookup(Reg)) + DbgValsToSinkSet.insert(MI); } + DbgValsToSink.insert(DbgValsToSink.begin(), DbgValsToSinkSet.begin(), + DbgValsToSinkSet.end()); // Clear the kill flag if SrcReg is killed between MI and the end of the // block. diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index f9505df4e7f4..66a3bc2f8cc4 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -634,7 +634,7 @@ struct DataDep { /// Create a DataDep from an SSA form virtual register. DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) : UseOp(UseOp) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + assert(Register::isVirtualRegister(VirtReg)); MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); assert(!DefI.atEnd() && "Register has no defs"); DefMI = DefI->getParent(); @@ -660,10 +660,10 @@ static bool getDataDeps(const MachineInstr &UseMI, const MachineOperand &MO = *I; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { HasPhysRegs = true; continue; } @@ -687,7 +687,7 @@ static void getPHIDeps(const MachineInstr &UseMI, assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI"); for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) { if (UseMI.getOperand(i + 1).getMBB() == Pred) { - unsigned Reg = UseMI.getOperand(i).getReg(); + Register Reg = UseMI.getOperand(i).getReg(); Deps.push_back(DataDep(MRI, Reg, i)); return; } @@ -708,8 +708,8 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, const MachineOperand &MO = *MI; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; // Track live defs and kills for updating RegUnits. if (MO.isDef()) { @@ -765,7 +765,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { assert(TBI.HasValidInstrHeights && "Missing height info"); unsigned MaxLen = 0; for (const LiveInReg &LIR : TBI.LiveIns) { - if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + if (!Register::isVirtualRegister(LIR.Reg)) continue; const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. @@ -902,8 +902,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, const MachineOperand &MO = *MOI; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (MO.readsReg()) ReadOps.push_back(MI.getOperandNo(MOI)); @@ -930,7 +930,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, // Now we know the height of MI. Update any regunits read. for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { - unsigned Reg = MI.getOperand(ReadOps[i]).getReg(); + Register Reg = MI.getOperand(ReadOps[i]).getReg(); for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { LiveRegUnit &LRU = RegUnits[*Units]; // Set the height to the highest reader of the unit. @@ -979,7 +979,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef<const MachineBasicBlock*> Trace) { assert(!Trace.empty() && "Trace should contain at least one block"); unsigned Reg = DefMI->getOperand(DefOp).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const MachineBasicBlock *DefMBB = DefMI->getParent(); // Reg is live-in to all blocks in Trace that follow DefMBB. @@ -1026,7 +1026,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { if (MBB) { TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; for (LiveInReg &LI : TBI.LiveIns) { - if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + if (Register::isVirtualRegister(LI.Reg)) { // For virtual registers, the def latency is included. unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; if (Height < LI.Height) diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 0ad792ac62cf..969743edca52 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -22,7 +22,6 @@ // the verifier errors. //===----------------------------------------------------------------------===// -#include "LiveRangeCalc.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -37,6 +36,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -122,7 +122,7 @@ namespace { // Add Reg and any sub-registers to RV void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) RV.push_back(*SubRegs); } @@ -159,7 +159,7 @@ namespace { // Add register to vregsPassed if it belongs there. Return true if // anything changed. bool addPassed(unsigned Reg) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (regsKilled.count(Reg) || regsLiveOut.count(Reg)) return false; @@ -178,7 +178,7 @@ namespace { // Add register to vregsRequired if it belongs there. Return true if // anything changed. bool addRequired(unsigned Reg) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (regsLiveOut.count(Reg)) return false; @@ -552,7 +552,7 @@ void MachineVerifier::report_context_vreg(unsigned VReg) const { } void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const { - if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) { + if (Register::isVirtualRegister(VRegOrUnit)) { report_context_vreg(VRegOrUnit); } else { errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n'; @@ -797,7 +797,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { regsLive.clear(); if (MRI->tracksLiveness()) { for (const auto &LI : MBB->liveins()) { - if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) { + if (!Register::isPhysicalRegister(LI.PhysReg)) { report("MBB live-in list contains non-physical register", MBB); continue; } @@ -957,7 +957,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { // Generic opcodes must not have physical register operands. for (unsigned I = 0; I < MI->getNumOperands(); ++I) { const MachineOperand *MO = &MI->getOperand(I); - if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg())) + if (MO->isReg() && Register::isPhysicalRegister(MO->getReg())) report("Generic instruction cannot have physical register", MO, I); } @@ -1368,7 +1368,108 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } } + switch (IntrID) { + case Intrinsic::memcpy: + if (MI->getNumOperands() != 5) + report("Expected memcpy intrinsic to have 5 operands", MI); + break; + case Intrinsic::memmove: + if (MI->getNumOperands() != 5) + report("Expected memmove intrinsic to have 5 operands", MI); + break; + case Intrinsic::memset: + if (MI->getNumOperands() != 5) + report("Expected memset intrinsic to have 5 operands", MI); + break; + } + break; + } + case TargetOpcode::G_SEXT_INREG: { + if (!MI->getOperand(2).isImm()) { + report("G_SEXT_INREG expects an immediate operand #2", MI); + break; + } + + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + verifyVectorElementMatch(DstTy, SrcTy, MI); + + int64_t Imm = MI->getOperand(2).getImm(); + if (Imm <= 0) + report("G_SEXT_INREG size must be >= 1", MI); + if (Imm >= SrcTy.getScalarSizeInBits()) + report("G_SEXT_INREG size must be less than source bit width", MI); + break; + } + case TargetOpcode::G_SHUFFLE_VECTOR: { + const MachineOperand &MaskOp = MI->getOperand(3); + if (!MaskOp.isShuffleMask()) { + report("Incorrect mask operand type for G_SHUFFLE_VECTOR", MI); + break; + } + + const Constant *Mask = MaskOp.getShuffleMask(); + auto *MaskVT = dyn_cast<VectorType>(Mask->getType()); + if (!MaskVT || !MaskVT->getElementType()->isIntegerTy(32)) { + report("Invalid shufflemask constant type", MI); + break; + } + + if (!Mask->getAggregateElement(0u)) { + report("Invalid shufflemask constant type", MI); + break; + } + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg()); + LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg()); + + if (Src0Ty != Src1Ty) + report("Source operands must be the same type", MI); + + if (Src0Ty.getScalarType() != DstTy.getScalarType()) + report("G_SHUFFLE_VECTOR cannot change element type", MI); + + // Don't check that all operands are vector because scalars are used in + // place of 1 element vectors. + int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1; + int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; + + SmallVector<int, 32> MaskIdxes; + ShuffleVectorInst::getShuffleMask(Mask, MaskIdxes); + + if (static_cast<int>(MaskIdxes.size()) != DstNumElts) + report("Wrong result type for shufflemask", MI); + + for (int Idx : MaskIdxes) { + if (Idx < 0) + continue; + + if (Idx >= 2 * SrcNumElts) + report("Out of bounds shuffle index", MI); + } + + break; + } + case TargetOpcode::G_DYN_STACKALLOC: { + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &AllocOp = MI->getOperand(1); + const MachineOperand &AlignOp = MI->getOperand(2); + + if (!DstOp.isReg() || !MRI->getType(DstOp.getReg()).isPointer()) { + report("dst operand 0 must be a pointer type", MI); + break; + } + + if (!AllocOp.isReg() || !MRI->getType(AllocOp.getReg()).isScalar()) { + report("src operand 1 must be a scalar reg type", MI); + break; + } + + if (!AlignOp.isImm()) { + report("src operand 2 must be an immediate type", MI); + break; + } break; } default: @@ -1525,11 +1626,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Operand should be tied", MO, MONum); else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) report("Tied def doesn't match MCInstrDesc", MO, MONum); - else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) { + else if (Register::isPhysicalRegister(MO->getReg())) { const MachineOperand &MOTied = MI->getOperand(TiedTo); if (!MOTied.isReg()) report("Tied counterpart must be a register", &MOTied, TiedTo); - else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) && + else if (Register::isPhysicalRegister(MOTied.getReg()) && MO->getReg() != MOTied.getReg()) report("Tied physical registers must match.", &MOTied, TiedTo); } @@ -1543,7 +1644,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { switch (MO->getType()) { case MachineOperand::MO_Register: { - const unsigned Reg = MO->getReg(); + const Register Reg = MO->getReg(); if (!Reg) return; if (MRI->tracksLiveness() && !MI->isDebugValue()) @@ -1581,7 +1682,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check register classes. unsigned SubIdx = MO->getSubReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (SubIdx) { report("Illegal subregister index for physical register", MO, MONum); return; @@ -1817,7 +1918,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, if (MO->isDead()) { LiveQueryResult LRQ = LR.Query(DefIdx); if (!LRQ.isDeadDef()) { - assert(TargetRegisterInfo::isVirtualRegister(VRegOrUnit) && + assert(Register::isVirtualRegister(VRegOrUnit) && "Expecting a virtual register."); // A dead subreg def only tells us that the specific subreg is dead. There // could be other non-dead defs of other subregs, or we could have other @@ -1845,8 +1946,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { addRegWithSubRegs(regsKilled, Reg); // Check that LiveVars knows this kill. - if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && - MO->isKill()) { + if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) { LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); if (!is_contained(VI.Kills, MI)) report("Kill missing from LiveVariables", MO, MONum); @@ -1856,7 +1956,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts && !LiveInts->isNotInMIMap(*MI)) { SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI); // Check the cached regunit intervals. - if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + if (Register::isPhysicalRegister(Reg) && !isReserved(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { if (MRI->isReservedRegUnit(*Units)) continue; @@ -1865,7 +1965,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } } - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (LiveInts->hasInterval(Reg)) { // This is a virtual register interval. const LiveInterval &LI = LiveInts->getInterval(Reg); @@ -1900,7 +2000,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Use of a dead register. if (!regsLive.count(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // Reserved registers may be used even when 'dead'. bool Bad = !isReserved(Reg); // We are fine if just any subregister has a defined value. @@ -1922,7 +2022,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!MOP.isReg() || !MOP.isImplicit()) continue; - if (!TargetRegisterInfo::isPhysicalRegister(MOP.getReg())) + if (!Register::isPhysicalRegister(MOP.getReg())) continue; for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid(); @@ -1960,7 +2060,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { addRegWithSubRegs(regsDefined, Reg); // Verify SSA form. - if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + if (MRI->isSSA() && Register::isVirtualRegister(Reg) && std::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); @@ -1969,7 +2069,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI); DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg); @@ -2007,7 +2107,7 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { while (!regMasks.empty()) { const uint32_t *Mask = regMasks.pop_back_val(); for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I) - if (TargetRegisterInfo::isPhysicalRegister(*I) && + if (Register::isPhysicalRegister(*I) && MachineOperand::clobbersPhysReg(Mask, *I)) regsDead.push_back(*I); } @@ -2119,8 +2219,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { if (MODef.isTied() || MODef.isImplicit() || MODef.isInternalRead() || MODef.isEarlyClobber() || MODef.isDebug()) report("Unexpected flag on PHI operand", &MODef, 0); - unsigned DefReg = MODef.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + Register DefReg = MODef.getReg(); + if (!Register::isVirtualRegister(DefReg)) report("Expected first PHI operand to be a virtual register", &MODef, 0); for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) { @@ -2212,7 +2312,7 @@ void MachineVerifier::visitMachineFunctionAfter() { void MachineVerifier::verifyLiveVariables() { assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; @@ -2238,7 +2338,7 @@ void MachineVerifier::verifyLiveVariables() { void MachineVerifier::verifyLiveIntervals() { assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); // Spilling and splitting may leave unused registers around. Skip them. if (MRI->reg_nodbg_empty(Reg)) @@ -2315,11 +2415,11 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || !MOI->isDef()) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (MOI->getReg() != Reg) continue; } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + if (!Register::isPhysicalRegister(MOI->getReg()) || !TRI->hasRegUnit(MOI->getReg(), Reg)) continue; } @@ -2402,7 +2502,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, return; // RegUnit intervals are allowed dead phis. - if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() && + if (!Register::isVirtualRegister(Reg) && VNI->isPHIDef() && S.start == VNI->def && S.end == VNI->def.getDeadSlot()) return; @@ -2446,7 +2546,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // The following checks only apply to virtual registers. Physreg liveness // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; @@ -2519,8 +2619,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, while (true) { assert(LiveInts->isLiveInToMBB(LR, &*MFI)); // We don't know how to track physregs into a landing pad. - if (!TargetRegisterInfo::isVirtualRegister(Reg) && - MFI->isEHPad()) { + if (!Register::isVirtualRegister(Reg) && MFI->isEHPad()) { if (&*MFI == EndMBB) break; ++MFI; @@ -2580,7 +2679,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { unsigned Reg = LI.reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); verifyLiveRange(LI, Reg); LaneBitmask Mask; diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 2db1e86905a4..d21eae222af0 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -176,7 +176,7 @@ std::unique_ptr<ScheduleDAGMutation> llvm::createMacroFusionDAGMutation( ShouldSchedulePredTy shouldScheduleAdjacent) { if(EnableMacroFusion) - return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true); + return std::make_unique<MacroFusion>(shouldScheduleAdjacent, true); return nullptr; } @@ -184,6 +184,6 @@ std::unique_ptr<ScheduleDAGMutation> llvm::createBranchMacroFusionDAGMutation( ShouldSchedulePredTy shouldScheduleAdjacent) { if(EnableMacroFusion) - return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false); + return std::make_unique<MacroFusion>(shouldScheduleAdjacent, false); return nullptr; } diff --git a/lib/CodeGen/ModuloSchedule.cpp b/lib/CodeGen/ModuloSchedule.cpp new file mode 100644 index 000000000000..7ce3c5861801 --- /dev/null +++ b/lib/CodeGen/ModuloSchedule.cpp @@ -0,0 +1,2022 @@ +//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ModuloSchedule.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopUtils.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "pipeliner" +using namespace llvm; + +void ModuloSchedule::print(raw_ostream &OS) { + for (MachineInstr *MI : ScheduledInstrs) + OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI; +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleExpander implementation +//===----------------------------------------------------------------------===// + +/// Return the register values for the operands of a Phi instruction. +/// This function assume the instruction is a Phi. +static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, + unsigned &InitVal, unsigned &LoopVal) { + assert(Phi.isPHI() && "Expecting a Phi."); + + InitVal = 0; + LoopVal = 0; + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() != Loop) + InitVal = Phi.getOperand(i).getReg(); + else + LoopVal = Phi.getOperand(i).getReg(); + + assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."); +} + +/// Return the Phi register value that comes from the incoming block. +static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() != LoopBB) + return Phi.getOperand(i).getReg(); + return 0; +} + +/// Return the Phi register value that comes the loop block. +static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() == LoopBB) + return Phi.getOperand(i).getReg(); + return 0; +} + +void ModuloScheduleExpander::expand() { + BB = Schedule.getLoop()->getTopBlock(); + Preheader = *BB->pred_begin(); + if (Preheader == BB) + Preheader = *std::next(BB->pred_begin()); + + // Iterate over the definitions in each instruction, and compute the + // stage difference for each use. Keep the maximum value. + for (MachineInstr *MI : Schedule.getInstructions()) { + int DefStage = Schedule.getStage(MI); + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || !Op.isDef()) + continue; + + Register Reg = Op.getReg(); + unsigned MaxDiff = 0; + bool PhiIsSwapped = false; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), + EI = MRI.use_end(); + UI != EI; ++UI) { + MachineOperand &UseOp = *UI; + MachineInstr *UseMI = UseOp.getParent(); + int UseStage = Schedule.getStage(UseMI); + unsigned Diff = 0; + if (UseStage != -1 && UseStage >= DefStage) + Diff = UseStage - DefStage; + if (MI->isPHI()) { + if (isLoopCarried(*MI)) + ++Diff; + else + PhiIsSwapped = true; + } + MaxDiff = std::max(Diff, MaxDiff); + } + RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped); + } + } + + generatePipelinedLoop(); +} + +void ModuloScheduleExpander::generatePipelinedLoop() { + LoopInfo = TII->analyzeLoopForPipelining(BB); + assert(LoopInfo && "Must be able to analyze loop!"); + + // Create a new basic block for the kernel and add it to the CFG. + MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + + unsigned MaxStageCount = Schedule.getNumStages() - 1; + + // Remember the registers that are used in different stages. The index is + // the iteration, or stage, that the instruction is scheduled in. This is + // a map between register names in the original block and the names created + // in each stage of the pipelined loop. + ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; + InstrMapTy InstrMap; + + SmallVector<MachineBasicBlock *, 4> PrologBBs; + + // Generate the prolog instructions that set up the pipeline. + generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs); + MF.insert(BB->getIterator(), KernelBB); + + // Rearrange the instructions to generate the new, pipelined loop, + // and update register names as needed. + for (MachineInstr *CI : Schedule.getInstructions()) { + if (CI->isPHI()) + continue; + unsigned StageNum = Schedule.getStage(CI); + MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum); + updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap); + KernelBB->push_back(NewMI); + InstrMap[NewMI] = CI; + } + + // Copy any terminator instructions to the new kernel, and update + // names as needed. + for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), + E = BB->instr_end(); + I != E; ++I) { + MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + updateInstruction(NewMI, false, MaxStageCount, 0, VRMap); + KernelBB->push_back(NewMI); + InstrMap[NewMI] = &*I; + } + + NewKernel = KernelBB; + KernelBB->transferSuccessors(BB); + KernelBB->replaceSuccessor(BB, KernelBB); + + generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, + InstrMap, MaxStageCount, MaxStageCount, false); + generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, + MaxStageCount, MaxStageCount, false); + + LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); + + SmallVector<MachineBasicBlock *, 4> EpilogBBs; + // Generate the epilog instructions to complete the pipeline. + generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs); + + // We need this step because the register allocation doesn't handle some + // situations well, so we insert copies to help out. + splitLifetimes(KernelBB, EpilogBBs); + + // Remove dead instructions due to loop induction variables. + removeDeadInstructions(KernelBB, EpilogBBs); + + // Add branches between prolog and epilog blocks. + addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap); + + delete[] VRMap; +} + +void ModuloScheduleExpander::cleanup() { + // Remove the original loop since it's no longer referenced. + for (auto &I : *BB) + LIS.RemoveMachineInstrFromMaps(I); + BB->clear(); + BB->eraseFromParent(); +} + +/// Generate the pipeline prolog code. +void ModuloScheduleExpander::generateProlog(unsigned LastStage, + MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, + MBBVectorTy &PrologBBs) { + MachineBasicBlock *PredBB = Preheader; + InstrMapTy InstrMap; + + // Generate a basic block for each stage, not including the last stage, + // which will be generated in the kernel. Each basic block may contain + // instructions from multiple stages/iterations. + for (unsigned i = 0; i < LastStage; ++i) { + // Create and insert the prolog basic block prior to the original loop + // basic block. The original loop is removed later. + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + PrologBBs.push_back(NewBB); + MF.insert(BB->getIterator(), NewBB); + NewBB->transferSuccessors(PredBB); + PredBB->addSuccessor(NewBB); + PredBB = NewBB; + + // Generate instructions for each appropriate stage. Process instructions + // in original program order. + for (int StageNum = i; StageNum >= 0; --StageNum) { + for (MachineBasicBlock::iterator BBI = BB->instr_begin(), + BBE = BB->getFirstTerminator(); + BBI != BBE; ++BBI) { + if (Schedule.getStage(&*BBI) == StageNum) { + if (BBI->isPHI()) + continue; + MachineInstr *NewMI = + cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum); + updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap); + NewBB->push_back(NewMI); + InstrMap[NewMI] = &*BBI; + } + } + } + rewritePhiValues(NewBB, i, VRMap, InstrMap); + LLVM_DEBUG({ + dbgs() << "prolog:\n"; + NewBB->dump(); + }); + } + + PredBB->replaceSuccessor(BB, KernelBB); + + // Check if we need to remove the branch from the preheader to the original + // loop, and replace it with a branch to the new loop. + unsigned numBranches = TII->removeBranch(*Preheader); + if (numBranches) { + SmallVector<MachineOperand, 0> Cond; + TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc()); + } +} + +/// Generate the pipeline epilog code. The epilog code finishes the iterations +/// that were started in either the prolog or the kernel. We create a basic +/// block for each stage that needs to complete. +void ModuloScheduleExpander::generateEpilog(unsigned LastStage, + MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, + MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs) { + // We need to change the branch from the kernel to the first epilog block, so + // this call to analyze branch uses the kernel rather than the original BB. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond); + assert(!checkBranch && "generateEpilog must be able to analyze the branch"); + if (checkBranch) + return; + + MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin(); + if (*LoopExitI == KernelBB) + ++LoopExitI; + assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor"); + MachineBasicBlock *LoopExitBB = *LoopExitI; + + MachineBasicBlock *PredBB = KernelBB; + MachineBasicBlock *EpilogStart = LoopExitBB; + InstrMapTy InstrMap; + + // Generate a basic block for each stage, not including the last stage, + // which was generated for the kernel. Each basic block may contain + // instructions from multiple stages/iterations. + int EpilogStage = LastStage + 1; + for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) { + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(); + EpilogBBs.push_back(NewBB); + MF.insert(BB->getIterator(), NewBB); + + PredBB->replaceSuccessor(LoopExitBB, NewBB); + NewBB->addSuccessor(LoopExitBB); + + if (EpilogStart == LoopExitBB) + EpilogStart = NewBB; + + // Add instructions to the epilog depending on the current block. + // Process instructions in original program order. + for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) { + for (auto &BBI : *BB) { + if (BBI.isPHI()) + continue; + MachineInstr *In = &BBI; + if ((unsigned)Schedule.getStage(In) == StageNum) { + // Instructions with memoperands in the epilog are updated with + // conservative values. + MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0); + updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap); + NewBB->push_back(NewMI); + InstrMap[NewMI] = In; + } + } + } + generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, + InstrMap, LastStage, EpilogStage, i == 1); + generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, + LastStage, EpilogStage, i == 1); + PredBB = NewBB; + + LLVM_DEBUG({ + dbgs() << "epilog:\n"; + NewBB->dump(); + }); + } + + // Fix any Phi nodes in the loop exit block. + LoopExitBB->replacePhiUsesWith(BB, PredBB); + + // Create a branch to the new epilog from the kernel. + // Remove the original branch and add a new branch to the epilog. + TII->removeBranch(*KernelBB); + TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); + // Add a branch to the loop exit. + if (EpilogBBs.size() > 0) { + MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); + SmallVector<MachineOperand, 4> Cond1; + TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc()); + } +} + +/// Replace all uses of FromReg that appear outside the specified +/// basic block with ToReg. +static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, + MachineBasicBlock *MBB, + MachineRegisterInfo &MRI, + LiveIntervals &LIS) { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), + E = MRI.use_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + if (O.getParent()->getParent() != MBB) + O.setReg(ToReg); + } + if (!LIS.hasInterval(ToReg)) + LIS.createEmptyInterval(ToReg); +} + +/// Return true if the register has a use that occurs outside the +/// specified loop. +static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, + MachineRegisterInfo &MRI) { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), + E = MRI.use_end(); + I != E; ++I) + if (I->getParent()->getParent() != BB) + return true; + return false; +} + +/// Generate Phis for the specific block in the generated pipelined code. +/// This function looks at the Phis from the original code to guide the +/// creation of new Phis. +void ModuloScheduleExpander::generateExistingPhis( + MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + // Compute the stage number for the initial value of the Phi, which + // comes from the prolog. The prolog to use depends on to which kernel/ + // epilog that we're adding the Phi. + unsigned PrologStage = 0; + unsigned PrevStage = 0; + bool InKernel = (LastStageNum == CurStageNum); + if (InKernel) { + PrologStage = LastStageNum - 1; + PrevStage = CurStageNum; + } else { + PrologStage = LastStageNum - (CurStageNum - LastStageNum); + PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1; + } + + for (MachineBasicBlock::iterator BBI = BB->instr_begin(), + BBE = BB->getFirstNonPHI(); + BBI != BBE; ++BBI) { + Register Def = BBI->getOperand(0).getReg(); + + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(*BBI, BB, InitVal, LoopVal); + + unsigned PhiOp1 = 0; + // The Phi value from the loop body typically is defined in the loop, but + // not always. So, we need to check if the value is defined in the loop. + unsigned PhiOp2 = LoopVal; + if (VRMap[LastStageNum].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum][LoopVal]; + + int StageScheduled = Schedule.getStage(&*BBI); + int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal)); + unsigned NumStages = getStagesForReg(Def, CurStageNum); + if (NumStages == 0) { + // We don't need to generate a Phi anymore, but we need to rename any uses + // of the Phi value. + unsigned NewReg = VRMap[PrevStage][LoopVal]; + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def, + InitVal, NewReg); + if (VRMap[CurStageNum].count(LoopVal)) + VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; + } + // Adjust the number of Phis needed depending on the number of prologs left, + // and the distance from where the Phi is first scheduled. The number of + // Phis cannot exceed the number of prolog stages. Each stage can + // potentially define two values. + unsigned MaxPhis = PrologStage + 2; + if (!InKernel && (int)PrologStage <= LoopValStage) + MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); + unsigned NumPhis = std::min(NumStages, MaxPhis); + + unsigned NewReg = 0; + unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; + // In the epilog, we may need to look back one stage to get the correct + // Phi name because the epilog and prolog blocks execute the same stage. + // The correct name is from the previous block only when the Phi has + // been completely scheduled prior to the epilog, and Phi value is not + // needed in multiple stages. + int StageDiff = 0; + if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 && + NumPhis == 1) + StageDiff = 1; + // Adjust the computations below when the phi and the loop definition + // are scheduled in different stages. + if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage) + StageDiff = StageScheduled - LoopValStage; + for (unsigned np = 0; np < NumPhis; ++np) { + // If the Phi hasn't been scheduled, then use the initial Phi operand + // value. Otherwise, use the scheduled version of the instruction. This + // is a little complicated when a Phi references another Phi. + if (np > PrologStage || StageScheduled >= (int)LastStageNum) + PhiOp1 = InitVal; + // Check if the Phi has already been scheduled in a prolog stage. + else if (PrologStage >= AccessStage + StageDiff + np && + VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) + PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; + // Check if the Phi has already been scheduled, but the loop instruction + // is either another Phi, or doesn't occur in the loop. + else if (PrologStage >= AccessStage + StageDiff + np) { + // If the Phi references another Phi, we need to examine the other + // Phi to get the correct value. + PhiOp1 = LoopVal; + MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1); + int Indirects = 1; + while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) { + int PhiStage = Schedule.getStage(InstOp1); + if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects) + PhiOp1 = getInitPhiReg(*InstOp1, BB); + else + PhiOp1 = getLoopPhiReg(*InstOp1, BB); + InstOp1 = MRI.getVRegDef(PhiOp1); + int PhiOpStage = Schedule.getStage(InstOp1); + int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0); + if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np && + VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) { + PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1]; + break; + } + ++Indirects; + } + } else + PhiOp1 = InitVal; + // If this references a generated Phi in the kernel, get the Phi operand + // from the incoming block. + if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) + if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) + PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); + + MachineInstr *PhiInst = MRI.getVRegDef(LoopVal); + bool LoopDefIsPhi = PhiInst && PhiInst->isPHI(); + // In the epilog, a map lookup is needed to get the value from the kernel, + // or previous epilog block. How is does this depends on if the + // instruction is scheduled in the previous block. + if (!InKernel) { + int StageDiffAdj = 0; + if (LoopValStage != -1 && StageScheduled > LoopValStage) + StageDiffAdj = StageScheduled - LoopValStage; + // Use the loop value defined in the kernel, unless the kernel + // contains the last definition of the Phi. + if (np == 0 && PrevStage == LastStageNum && + (StageScheduled != 0 || LoopValStage != 0) && + VRMap[PrevStage - StageDiffAdj].count(LoopVal)) + PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal]; + // Use the value defined by the Phi. We add one because we switch + // from looking at the loop value to the Phi definition. + else if (np > 0 && PrevStage == LastStageNum && + VRMap[PrevStage - np + 1].count(Def)) + PhiOp2 = VRMap[PrevStage - np + 1][Def]; + // Use the loop value defined in the kernel. + else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 && + VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) + PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; + // Use the value defined by the Phi, unless we're generating the first + // epilog and the Phi refers to a Phi in a different stage. + else if (VRMap[PrevStage - np].count(Def) && + (!LoopDefIsPhi || (PrevStage != LastStageNum) || + (LoopValStage == StageScheduled))) + PhiOp2 = VRMap[PrevStage - np][Def]; + } + + // Check if we can reuse an existing Phi. This occurs when a Phi + // references another Phi, and the other Phi is scheduled in an + // earlier stage. We can try to reuse an existing Phi up until the last + // stage of the current Phi. + if (LoopDefIsPhi) { + if (static_cast<int>(PrologStage - np) >= StageScheduled) { + int LVNumStages = getStagesForPhi(LoopVal); + int StageDiff = (StageScheduled - LoopValStage); + LVNumStages -= StageDiff; + // Make sure the loop value Phi has been processed already. + if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { + NewReg = PhiOp2; + unsigned ReuseStage = CurStageNum; + if (isLoopCarried(*PhiInst)) + ReuseStage -= LVNumStages; + // Check if the Phi to reuse has been generated yet. If not, then + // there is nothing to reuse. + if (VRMap[ReuseStage - np].count(LoopVal)) { + NewReg = VRMap[ReuseStage - np][LoopVal]; + + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, + Def, NewReg); + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + PhiOp2 = NewReg; + if (VRMap[LastStageNum - np - 1].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; + + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + continue; + } + } + } + if (InKernel && StageDiff > 0 && + VRMap[CurStageNum - StageDiff - np].count(LoopVal)) + PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; + } + + const TargetRegisterClass *RC = MRI.getRegClass(Def); + NewReg = MRI.createVirtualRegister(RC); + + MachineInstrBuilder NewPhi = + BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + NewPhi.addReg(PhiOp1).addMBB(BB1); + NewPhi.addReg(PhiOp2).addMBB(BB2); + if (np == 0) + InstrMap[NewPhi] = &*BBI; + + // We define the Phis after creating the new pipelined code, so + // we need to rename the Phi values in scheduled instructions. + + unsigned PrevReg = 0; + if (InKernel && VRMap[PrevStage - np].count(LoopVal)) + PrevReg = VRMap[PrevStage - np][LoopVal]; + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, + NewReg, PrevReg); + // If the Phi has been scheduled, use the new name for rewriting. + if (VRMap[CurStageNum - np].count(Def)) { + unsigned R = VRMap[CurStageNum - np][Def]; + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R, + NewReg); + } + + // Check if we need to rename any uses that occurs after the loop. The + // register to replace depends on whether the Phi is scheduled in the + // epilog. + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + + // In the kernel, a dependent Phi uses the value from this Phi. + if (InKernel) + PhiOp2 = NewReg; + + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + } + + while (NumPhis++ < NumStages) { + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def, + NewReg, 0); + } + + // Check if we need to rename a Phi that has been eliminated due to + // scheduling. + if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal)) + replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS); + } +} + +/// Generate Phis for the specified block in the generated pipelined code. +/// These are new Phis needed because the definition is scheduled after the +/// use in the pipelined sequence. +void ModuloScheduleExpander::generatePhis( + MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + // Compute the stage number that contains the initial Phi value, and + // the Phi from the previous stage. + unsigned PrologStage = 0; + unsigned PrevStage = 0; + unsigned StageDiff = CurStageNum - LastStageNum; + bool InKernel = (StageDiff == 0); + if (InKernel) { + PrologStage = LastStageNum - 1; + PrevStage = CurStageNum; + } else { + PrologStage = LastStageNum - StageDiff; + PrevStage = LastStageNum + StageDiff - 1; + } + + for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(), + BBE = BB->instr_end(); + BBI != BBE; ++BBI) { + for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = BBI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || + !Register::isVirtualRegister(MO.getReg())) + continue; + + int StageScheduled = Schedule.getStage(&*BBI); + assert(StageScheduled != -1 && "Expecting scheduled instruction."); + Register Def = MO.getReg(); + unsigned NumPhis = getStagesForReg(Def, CurStageNum); + // An instruction scheduled in stage 0 and is used after the loop + // requires a phi in the epilog for the last definition from either + // the kernel or prolog. + if (!InKernel && NumPhis == 0 && StageScheduled == 0 && + hasUseAfterLoop(Def, BB, MRI)) + NumPhis = 1; + if (!InKernel && (unsigned)StageScheduled > PrologStage) + continue; + + unsigned PhiOp2 = VRMap[PrevStage][Def]; + if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) + if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) + PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + // The number of Phis can't exceed the number of prolog stages. The + // prolog stage number is zero based. + if (NumPhis > PrologStage + 1 - StageScheduled) + NumPhis = PrologStage + 1 - StageScheduled; + for (unsigned np = 0; np < NumPhis; ++np) { + unsigned PhiOp1 = VRMap[PrologStage][Def]; + if (np <= PrologStage) + PhiOp1 = VRMap[PrologStage - np][Def]; + if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { + if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) + PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); + if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) + PhiOp1 = getInitPhiReg(*InstOp1, NewBB); + } + if (!InKernel) + PhiOp2 = VRMap[PrevStage - np][Def]; + + const TargetRegisterClass *RC = MRI.getRegClass(Def); + Register NewReg = MRI.createVirtualRegister(RC); + + MachineInstrBuilder NewPhi = + BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + NewPhi.addReg(PhiOp1).addMBB(BB1); + NewPhi.addReg(PhiOp2).addMBB(BB2); + if (np == 0) + InstrMap[NewPhi] = &*BBI; + + // Rewrite uses and update the map. The actions depend upon whether + // we generating code for the kernel or epilog blocks. + if (InKernel) { + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1, + NewReg); + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2, + NewReg); + + PhiOp2 = NewReg; + VRMap[PrevStage - np - 1][Def] = NewReg; + } else { + VRMap[CurStageNum - np][Def] = NewReg; + if (np == NumPhis - 1) + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, + NewReg); + } + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + } + } + } +} + +/// Remove instructions that generate values with no uses. +/// Typically, these are induction variable operations that generate values +/// used in the loop itself. A dead instruction has a definition with +/// no uses, or uses that occur in the original loop only. +void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs) { + // For each epilog block, check that the value defined by each instruction + // is used. If not, delete it. + for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), + MBE = EpilogBBs.rend(); + MBB != MBE; ++MBB) + for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), + ME = (*MBB)->instr_rend(); + MI != ME;) { + // From DeadMachineInstructionElem. Don't delete inline assembly. + if (MI->isInlineAsm()) { + ++MI; + continue; + } + bool SawStore = false; + // Check if it's safe to remove the instruction due to side effects. + // We can, and want to, remove Phis here. + if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) { + ++MI; + continue; + } + bool used = true; + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + Register reg = MOI->getReg(); + // Assume physical registers are used, unless they are marked dead. + if (Register::isPhysicalRegister(reg)) { + used = !MOI->isDead(); + if (used) + break; + continue; + } + unsigned realUses = 0; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), + EI = MRI.use_end(); + UI != EI; ++UI) { + // Check if there are any uses that occur only in the original + // loop. If so, that's not a real use. + if (UI->getParent()->getParent() != BB) { + realUses++; + used = true; + break; + } + } + if (realUses > 0) + break; + used = false; + } + if (!used) { + LIS.RemoveMachineInstrFromMaps(*MI); + MI++->eraseFromParent(); + continue; + } + ++MI; + } + // In the kernel block, check if we can remove a Phi that generates a value + // used in an instruction removed in the epilog block. + for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), + BBE = KernelBB->getFirstNonPHI(); + BBI != BBE;) { + MachineInstr *MI = &*BBI; + ++BBI; + Register reg = MI->getOperand(0).getReg(); + if (MRI.use_begin(reg) == MRI.use_end()) { + LIS.RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + } +} + +/// For loop carried definitions, we split the lifetime of a virtual register +/// that has uses past the definition in the next iteration. A copy with a new +/// virtual register is inserted before the definition, which helps with +/// generating a better register assignment. +/// +/// v1 = phi(a, v2) v1 = phi(a, v2) +/// v2 = phi(b, v3) v2 = phi(b, v3) +/// v3 = .. v4 = copy v1 +/// .. = V1 v3 = .. +/// .. = v4 +void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (auto &PHI : KernelBB->phis()) { + Register Def = PHI.getOperand(0).getReg(); + // Check for any Phi definition that used as an operand of another Phi + // in the same block. + for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), + E = MRI.use_instr_end(); + I != E; ++I) { + if (I->isPHI() && I->getParent() == KernelBB) { + // Get the loop carried definition. + unsigned LCDef = getLoopPhiReg(PHI, KernelBB); + if (!LCDef) + continue; + MachineInstr *MI = MRI.getVRegDef(LCDef); + if (!MI || MI->getParent() != KernelBB || MI->isPHI()) + continue; + // Search through the rest of the block looking for uses of the Phi + // definition. If one occurs, then split the lifetime. + unsigned SplitReg = 0; + for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), + KernelBB->instr_end())) + if (BBJ.readsRegister(Def)) { + // We split the lifetime when we find the first use. + if (SplitReg == 0) { + SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); + BuildMI(*KernelBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), SplitReg) + .addReg(Def); + } + BBJ.substituteRegister(Def, SplitReg, 0, *TRI); + } + if (!SplitReg) + continue; + // Search through each of the epilog blocks for any uses to be renamed. + for (auto &Epilog : EpilogBBs) + for (auto &I : *Epilog) + if (I.readsRegister(Def)) + I.substituteRegister(Def, SplitReg, 0, *TRI); + break; + } + } + } +} + +/// Remove the incoming block from the Phis in a basic block. +static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { + for (MachineInstr &MI : *BB) { + if (!MI.isPHI()) + break; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) + if (MI.getOperand(i + 1).getMBB() == Incoming) { + MI.RemoveOperand(i + 1); + MI.RemoveOperand(i); + break; + } + } +} + +/// Create branches from each prolog basic block to the appropriate epilog +/// block. These edges are needed if the loop ends before reaching the +/// kernel. +void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, + MBBVectorTy &PrologBBs, + MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs, + ValueMapTy *VRMap) { + assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); + MachineBasicBlock *LastPro = KernelBB; + MachineBasicBlock *LastEpi = KernelBB; + + // Start from the blocks connected to the kernel and work "out" + // to the first prolog and the last epilog blocks. + SmallVector<MachineInstr *, 4> PrevInsts; + unsigned MaxIter = PrologBBs.size() - 1; + for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { + // Add branches to the prolog that go to the corresponding + // epilog, and the fall-thru prolog/kernel block. + MachineBasicBlock *Prolog = PrologBBs[j]; + MachineBasicBlock *Epilog = EpilogBBs[i]; + + SmallVector<MachineOperand, 4> Cond; + Optional<bool> StaticallyGreater = + LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond); + unsigned numAdded = 0; + if (!StaticallyGreater.hasValue()) { + Prolog->addSuccessor(Epilog); + numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); + } else if (*StaticallyGreater == false) { + Prolog->addSuccessor(Epilog); + Prolog->removeSuccessor(LastPro); + LastEpi->removeSuccessor(Epilog); + numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc()); + removePhis(Epilog, LastEpi); + // Remove the blocks that are no longer referenced. + if (LastPro != LastEpi) { + LastEpi->clear(); + LastEpi->eraseFromParent(); + } + if (LastPro == KernelBB) { + LoopInfo->disposed(); + NewKernel = nullptr; + } + LastPro->clear(); + LastPro->eraseFromParent(); + } else { + numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); + removePhis(Epilog, Prolog); + } + LastPro = Prolog; + LastEpi = Epilog; + for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(), + E = Prolog->instr_rend(); + I != E && numAdded > 0; ++I, --numAdded) + updateInstruction(&*I, false, j, 0, VRMap); + } + + if (NewKernel) { + LoopInfo->setPreheader(PrologBBs[MaxIter]); + LoopInfo->adjustTripCount(-(MaxIter + 1)); + } +} + +/// Return true if we can compute the amount the instruction changes +/// during each iteration. Set Delta to the amount of the change. +bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const MachineOperand *BaseOp; + int64_t Offset; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + return false; + + if (!BaseOp->isReg()) + return false; + + Register BaseReg = BaseOp->getReg(); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Check if there is a Phi. If so, get the definition in the loop. + MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); + if (BaseDef && BaseDef->isPHI()) { + BaseReg = getLoopPhiReg(*BaseDef, MI.getParent()); + BaseDef = MRI.getVRegDef(BaseReg); + } + if (!BaseDef) + return false; + + int D = 0; + if (!TII->getIncrementValue(*BaseDef, D) && D >= 0) + return false; + + Delta = D; + return true; +} + +/// Update the memory operand with a new offset when the pipeliner +/// generates a new copy of the instruction that refers to a +/// different memory location. +void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI, + MachineInstr &OldMI, + unsigned Num) { + if (Num == 0) + return; + // If the instruction has memory operands, then adjust the offset + // when the instruction appears in different stages. + if (NewMI.memoperands_empty()) + return; + SmallVector<MachineMemOperand *, 2> NewMMOs; + for (MachineMemOperand *MMO : NewMI.memoperands()) { + // TODO: Figure out whether isAtomic is really necessary (see D57601). + if (MMO->isVolatile() || MMO->isAtomic() || + (MMO->isInvariant() && MMO->isDereferenceable()) || + (!MMO->getValue())) { + NewMMOs.push_back(MMO); + continue; + } + unsigned Delta; + if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { + int64_t AdjOffset = Delta * Num; + NewMMOs.push_back( + MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); + } else { + NewMMOs.push_back( + MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); + } + } + NewMI.setMemRefs(MF, NewMMOs); +} + +/// Clone the instruction for the new pipelined loop and update the +/// memory operands, if needed. +MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI, + unsigned CurStageNum, + unsigned InstStageNum) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + // Check for tied operands in inline asm instructions. This should be handled + // elsewhere, but I'm not sure of the best solution. + if (OldMI->isInlineAsm()) + for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { + const auto &MO = OldMI->getOperand(i); + if (MO.isReg() && MO.isUse()) + break; + unsigned UseIdx; + if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) + NewMI->tieOperands(i, UseIdx); + } + updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); + return NewMI; +} + +/// Clone the instruction for the new pipelined loop. If needed, this +/// function updates the instruction using the values saved in the +/// InstrChanges structure. +MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr( + MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + auto It = InstrChanges.find(OldMI); + if (It != InstrChanges.end()) { + std::pair<unsigned, int64_t> RegAndOffset = It->second; + unsigned BasePos, OffsetPos; + if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos)) + return nullptr; + int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm(); + MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first); + if (Schedule.getStage(LoopDef) > (signed)InstStageNum) + NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum); + NewMI->getOperand(OffsetPos).setImm(NewOffset); + } + updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); + return NewMI; +} + +/// Update the machine instruction with new virtual registers. This +/// function may change the defintions and/or uses. +void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, + bool LastDef, + unsigned CurStageNum, + unsigned InstrStageNum, + ValueMapTy *VRMap) { + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + continue; + Register reg = MO.getReg(); + if (MO.isDef()) { + // Create a new virtual register for the definition. + const TargetRegisterClass *RC = MRI.getRegClass(reg); + Register NewReg = MRI.createVirtualRegister(RC); + MO.setReg(NewReg); + VRMap[CurStageNum][reg] = NewReg; + if (LastDef) + replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS); + } else if (MO.isUse()) { + MachineInstr *Def = MRI.getVRegDef(reg); + // Compute the stage that contains the last definition for instruction. + int DefStageNum = Schedule.getStage(Def); + unsigned StageNum = CurStageNum; + if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) { + // Compute the difference in stages between the defintion and the use. + unsigned StageDiff = (InstrStageNum - DefStageNum); + // Make an adjustment to get the last definition. + StageNum -= StageDiff; + } + if (VRMap[StageNum].count(reg)) + MO.setReg(VRMap[StageNum][reg]); + } + } +} + +/// Return the instruction in the loop that defines the register. +/// If the definition is a Phi, then follow the Phi operand to +/// the instruction in the loop. +MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) { + SmallPtrSet<MachineInstr *, 8> Visited; + MachineInstr *Def = MRI.getVRegDef(Reg); + while (Def->isPHI()) { + if (!Visited.insert(Def).second) + break; + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) + if (Def->getOperand(i + 1).getMBB() == BB) { + Def = MRI.getVRegDef(Def->getOperand(i).getReg()); + break; + } + } + return Def; +} + +/// Return the new name for the value from the previous stage. +unsigned ModuloScheduleExpander::getPrevMapVal( + unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage, + ValueMapTy *VRMap, MachineBasicBlock *BB) { + unsigned PrevVal = 0; + if (StageNum > PhiStage) { + MachineInstr *LoopInst = MRI.getVRegDef(LoopVal); + if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal)) + // The name is defined in the previous stage. + PrevVal = VRMap[StageNum - 1][LoopVal]; + else if (VRMap[StageNum].count(LoopVal)) + // The previous name is defined in the current stage when the instruction + // order is swapped. + PrevVal = VRMap[StageNum][LoopVal]; + else if (!LoopInst->isPHI() || LoopInst->getParent() != BB) + // The loop value hasn't yet been scheduled. + PrevVal = LoopVal; + else if (StageNum == PhiStage + 1) + // The loop value is another phi, which has not been scheduled. + PrevVal = getInitPhiReg(*LoopInst, BB); + else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB) + // The loop value is another phi, which has been scheduled. + PrevVal = + getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB), + LoopStage, VRMap, BB); + } + return PrevVal; +} + +/// Rewrite the Phi values in the specified block to use the mappings +/// from the initial operand. Once the Phi is scheduled, we switch +/// to using the loop value instead of the Phi value, so those names +/// do not need to be rewritten. +void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB, + unsigned StageNum, + ValueMapTy *VRMap, + InstrMapTy &InstrMap) { + for (auto &PHI : BB->phis()) { + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(PHI, BB, InitVal, LoopVal); + Register PhiDef = PHI.getOperand(0).getReg(); + + unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef)); + unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal)); + unsigned NumPhis = getStagesForPhi(PhiDef); + if (NumPhis > StageNum) + NumPhis = StageNum; + for (unsigned np = 0; np <= NumPhis; ++np) { + unsigned NewVal = + getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); + if (!NewVal) + NewVal = InitVal; + rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef, + NewVal); + } + } +} + +/// Rewrite a previously scheduled instruction to use the register value +/// from the new instruction. Make sure the instruction occurs in the +/// basic block, and we don't change the uses in the new instruction. +void ModuloScheduleExpander::rewriteScheduledInstr( + MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum, + unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg, + unsigned PrevReg) { + bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1); + int StagePhi = Schedule.getStage(Phi) + PhiNum; + // Rewrite uses that have been scheduled already to use the new + // Phi register. + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), + EI = MRI.use_end(); + UI != EI;) { + MachineOperand &UseOp = *UI; + MachineInstr *UseMI = UseOp.getParent(); + ++UI; + if (UseMI->getParent() != BB) + continue; + if (UseMI->isPHI()) { + if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg) + continue; + if (getLoopPhiReg(*UseMI, BB) != OldReg) + continue; + } + InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI); + assert(OrigInstr != InstrMap.end() && "Instruction not scheduled."); + MachineInstr *OrigMI = OrigInstr->second; + int StageSched = Schedule.getStage(OrigMI); + int CycleSched = Schedule.getCycle(OrigMI); + unsigned ReplaceReg = 0; + // This is the stage for the scheduled instruction. + if (StagePhi == StageSched && Phi->isPHI()) { + int CyclePhi = Schedule.getCycle(Phi); + if (PrevReg && InProlog) + ReplaceReg = PrevReg; + else if (PrevReg && !isLoopCarried(*Phi) && + (CyclePhi <= CycleSched || OrigMI->isPHI())) + ReplaceReg = PrevReg; + else + ReplaceReg = NewReg; + } + // The scheduled instruction occurs before the scheduled Phi, and the + // Phi is not loop carried. + if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi)) + ReplaceReg = NewReg; + if (StagePhi > StageSched && Phi->isPHI()) + ReplaceReg = NewReg; + if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) + ReplaceReg = NewReg; + if (ReplaceReg) { + MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); + UseOp.setReg(ReplaceReg); + } + } +} + +bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) { + if (!Phi.isPHI()) + return false; + unsigned DefCycle = Schedule.getCycle(&Phi); + int DefStage = Schedule.getStage(&Phi); + + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal); + MachineInstr *Use = MRI.getVRegDef(LoopVal); + if (!Use || Use->isPHI()) + return true; + unsigned LoopCycle = Schedule.getCycle(Use); + int LoopStage = Schedule.getStage(Use); + return (LoopCycle > DefCycle) || (LoopStage <= DefStage); +} + +//===----------------------------------------------------------------------===// +// PeelingModuloScheduleExpander implementation +//===----------------------------------------------------------------------===// +// This is a reimplementation of ModuloScheduleExpander that works by creating +// a fully correct steady-state kernel and peeling off the prolog and epilogs. +//===----------------------------------------------------------------------===// + +namespace { +// Remove any dead phis in MBB. Dead phis either have only one block as input +// (in which case they are the identity) or have no uses. +void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, + LiveIntervals *LIS) { + bool Changed = true; + while (Changed) { + Changed = false; + for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) { + MachineInstr &MI = *I++; + assert(MI.isPHI()); + if (MRI.use_empty(MI.getOperand(0).getReg())) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(MI); + MI.eraseFromParent(); + Changed = true; + } else if (MI.getNumExplicitOperands() == 3) { + MRI.constrainRegClass(MI.getOperand(1).getReg(), + MRI.getRegClass(MI.getOperand(0).getReg())); + MRI.replaceRegWith(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg()); + if (LIS) + LIS->RemoveMachineInstrFromMaps(MI); + MI.eraseFromParent(); + Changed = true; + } + } + } +} + +/// Rewrites the kernel block in-place to adhere to the given schedule. +/// KernelRewriter holds all of the state required to perform the rewriting. +class KernelRewriter { + ModuloSchedule &S; + MachineBasicBlock *BB; + MachineBasicBlock *PreheaderBB, *ExitBB; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + LiveIntervals *LIS; + + // Map from register class to canonical undef register for that class. + DenseMap<const TargetRegisterClass *, Register> Undefs; + // Map from <LoopReg, InitReg> to phi register for all created phis. Note that + // this map is only used when InitReg is non-undef. + DenseMap<std::pair<unsigned, unsigned>, Register> Phis; + // Map from LoopReg to phi register where the InitReg is undef. + DenseMap<Register, Register> UndefPhis; + + // Reg is used by MI. Return the new register MI should use to adhere to the + // schedule. Insert phis as necessary. + Register remapUse(Register Reg, MachineInstr &MI); + // Insert a phi that carries LoopReg from the loop body and InitReg otherwise. + // If InitReg is not given it is chosen arbitrarily. It will either be undef + // or will be chosen so as to share another phi. + Register phi(Register LoopReg, Optional<Register> InitReg = {}, + const TargetRegisterClass *RC = nullptr); + // Create an undef register of the given register class. + Register undef(const TargetRegisterClass *RC); + +public: + KernelRewriter(MachineLoop &L, ModuloSchedule &S, + LiveIntervals *LIS = nullptr); + void rewrite(); +}; +} // namespace + +KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S, + LiveIntervals *LIS) + : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()), + ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()), + TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) { + PreheaderBB = *BB->pred_begin(); + if (PreheaderBB == BB) + PreheaderBB = *std::next(BB->pred_begin()); +} + +void KernelRewriter::rewrite() { + // Rearrange the loop to be in schedule order. Note that the schedule may + // contain instructions that are not owned by the loop block (InstrChanges and + // friends), so we gracefully handle unowned instructions and delete any + // instructions that weren't in the schedule. + auto InsertPt = BB->getFirstTerminator(); + MachineInstr *FirstMI = nullptr; + for (MachineInstr *MI : S.getInstructions()) { + if (MI->isPHI()) + continue; + if (MI->getParent()) + MI->removeFromParent(); + BB->insert(InsertPt, MI); + if (!FirstMI) + FirstMI = MI; + } + assert(FirstMI && "Failed to find first MI in schedule"); + + // At this point all of the scheduled instructions are between FirstMI + // and the end of the block. Kill from the first non-phi to FirstMI. + for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(*I); + (I++)->eraseFromParent(); + } + + // Now remap every instruction in the loop. + for (MachineInstr &MI : *BB) { + if (MI.isPHI() || MI.isTerminator()) + continue; + for (MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit()) + continue; + Register Reg = remapUse(MO.getReg(), MI); + MO.setReg(Reg); + } + } + EliminateDeadPhis(BB, MRI, LIS); + + // Ensure a phi exists for all instructions that are either referenced by + // an illegal phi or by an instruction outside the loop. This allows us to + // treat remaps of these values the same as "normal" values that come from + // loop-carried phis. + for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) { + if (MI->isPHI()) { + Register R = MI->getOperand(0).getReg(); + phi(R); + continue; + } + + for (MachineOperand &Def : MI->defs()) { + for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) { + if (MI.getParent() != BB) { + phi(Def.getReg()); + break; + } + } + } + } +} + +Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { + MachineInstr *Producer = MRI.getUniqueVRegDef(Reg); + if (!Producer) + return Reg; + + int ConsumerStage = S.getStage(&MI); + if (!Producer->isPHI()) { + // Non-phi producers are simple to remap. Insert as many phis as the + // difference between the consumer and producer stages. + if (Producer->getParent() != BB) + // Producer was not inside the loop. Use the register as-is. + return Reg; + int ProducerStage = S.getStage(Producer); + assert(ConsumerStage != -1 && + "In-loop consumer should always be scheduled!"); + assert(ConsumerStage >= ProducerStage); + unsigned StageDiff = ConsumerStage - ProducerStage; + + for (unsigned I = 0; I < StageDiff; ++I) + Reg = phi(Reg); + return Reg; + } + + // First, dive through the phi chain to find the defaults for the generated + // phis. + SmallVector<Optional<Register>, 4> Defaults; + Register LoopReg = Reg; + auto LoopProducer = Producer; + while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) { + LoopReg = getLoopPhiReg(*LoopProducer, BB); + Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB)); + LoopProducer = MRI.getUniqueVRegDef(LoopReg); + assert(LoopProducer); + } + int LoopProducerStage = S.getStage(LoopProducer); + + Optional<Register> IllegalPhiDefault; + + if (LoopProducerStage == -1) { + // Do nothing. + } else if (LoopProducerStage > ConsumerStage) { + // This schedule is only representable if ProducerStage == ConsumerStage+1. + // In addition, Consumer's cycle must be scheduled after Producer in the + // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP + // functions. +#ifndef NDEBUG // Silence unused variables in non-asserts mode. + int LoopProducerCycle = S.getCycle(LoopProducer); + int ConsumerCycle = S.getCycle(&MI); +#endif + assert(LoopProducerCycle <= ConsumerCycle); + assert(LoopProducerStage == ConsumerStage + 1); + // Peel off the first phi from Defaults and insert a phi between producer + // and consumer. This phi will not be at the front of the block so we + // consider it illegal. It will only exist during the rewrite process; it + // needs to exist while we peel off prologs because these could take the + // default value. After that we can replace all uses with the loop producer + // value. + IllegalPhiDefault = Defaults.front(); + Defaults.erase(Defaults.begin()); + } else { + assert(ConsumerStage >= LoopProducerStage); + int StageDiff = ConsumerStage - LoopProducerStage; + if (StageDiff > 0) { + LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size() + << " to " << (Defaults.size() + StageDiff) << "\n"); + // If we need more phis than we have defaults for, pad out with undefs for + // the earliest phis, which are at the end of the defaults chain (the + // chain is in reverse order). + Defaults.resize(Defaults.size() + StageDiff, Defaults.empty() + ? Optional<Register>() + : Defaults.back()); + } + } + + // Now we know the number of stages to jump back, insert the phi chain. + auto DefaultI = Defaults.rbegin(); + while (DefaultI != Defaults.rend()) + LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg)); + + if (IllegalPhiDefault.hasValue()) { + // The consumer optionally consumes LoopProducer in the same iteration + // (because the producer is scheduled at an earlier cycle than the consumer) + // or the initial value. To facilitate this we create an illegal block here + // by embedding a phi in the middle of the block. We will fix this up + // immediately prior to pruning. + auto RC = MRI.getRegClass(Reg); + Register R = MRI.createVirtualRegister(RC); + BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(IllegalPhiDefault.getValue()) + .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect. + .addReg(LoopReg) + .addMBB(BB); // Block choice is arbitrary and has no effect. + return R; + } + + return LoopReg; +} + +Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, + const TargetRegisterClass *RC) { + // If the init register is not undef, try and find an existing phi. + if (InitReg.hasValue()) { + auto I = Phis.find({LoopReg, InitReg.getValue()}); + if (I != Phis.end()) + return I->second; + } else { + for (auto &KV : Phis) { + if (KV.first.first == LoopReg) + return KV.second; + } + } + + // InitReg is either undef or no existing phi takes InitReg as input. Try and + // find a phi that takes undef as input. + auto I = UndefPhis.find(LoopReg); + if (I != UndefPhis.end()) { + Register R = I->second; + if (!InitReg.hasValue()) + // Found a phi taking undef as input, and this input is undef so return + // without any more changes. + return R; + // Found a phi taking undef as input, so rewrite it to take InitReg. + MachineInstr *MI = MRI.getVRegDef(R); + MI->getOperand(1).setReg(InitReg.getValue()); + Phis.insert({{LoopReg, InitReg.getValue()}, R}); + MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); + UndefPhis.erase(I); + return R; + } + + // Failed to find any existing phi to reuse, so create a new one. + if (!RC) + RC = MRI.getRegClass(LoopReg); + Register R = MRI.createVirtualRegister(RC); + if (InitReg.hasValue()) + MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); + BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(InitReg.hasValue() ? *InitReg : undef(RC)) + .addMBB(PreheaderBB) + .addReg(LoopReg) + .addMBB(BB); + if (!InitReg.hasValue()) + UndefPhis[LoopReg] = R; + else + Phis[{LoopReg, *InitReg}] = R; + return R; +} + +Register KernelRewriter::undef(const TargetRegisterClass *RC) { + Register &R = Undefs[RC]; + if (R == 0) { + // Create an IMPLICIT_DEF that defines this register if we need it. + // All uses of this should be removed by the time we have finished unrolling + // prologs and epilogs. + R = MRI.createVirtualRegister(RC); + auto *InsertBB = &PreheaderBB->getParent()->front(); + BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), R); + } + return R; +} + +namespace { +/// Describes an operand in the kernel of a pipelined loop. Characteristics of +/// the operand are discovered, such as how many in-loop PHIs it has to jump +/// through and defaults for these phis. +class KernelOperandInfo { + MachineBasicBlock *BB; + MachineRegisterInfo &MRI; + SmallVector<Register, 4> PhiDefaults; + MachineOperand *Source; + MachineOperand *Target; + +public: + KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI, + const SmallPtrSetImpl<MachineInstr *> &IllegalPhis) + : MRI(MRI) { + Source = MO; + BB = MO->getParent()->getParent(); + while (isRegInLoop(MO)) { + MachineInstr *MI = MRI.getVRegDef(MO->getReg()); + if (MI->isFullCopy()) { + MO = &MI->getOperand(1); + continue; + } + if (!MI->isPHI()) + break; + // If this is an illegal phi, don't count it in distance. + if (IllegalPhis.count(MI)) { + MO = &MI->getOperand(3); + continue; + } + + Register Default = getInitPhiReg(*MI, BB); + MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1) + : &MI->getOperand(3); + PhiDefaults.push_back(Default); + } + Target = MO; + } + + bool operator==(const KernelOperandInfo &Other) const { + return PhiDefaults.size() == Other.PhiDefaults.size(); + } + + void print(raw_ostream &OS) const { + OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in " + << *Source->getParent(); + } + +private: + bool isRegInLoop(MachineOperand *MO) { + return MO->isReg() && MO->getReg().isVirtual() && + MRI.getVRegDef(MO->getReg())->getParent() == BB; + } +}; +} // namespace + +MachineBasicBlock * +PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) { + MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII); + if (LPD == LPD_Front) + PeeledFront.push_back(NewBB); + else + PeeledBack.push_front(NewBB); + for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator(); + ++I, ++NI) { + CanonicalMIs[&*I] = &*I; + CanonicalMIs[&*NI] = &*I; + BlockMIs[{NewBB, &*I}] = &*NI; + BlockMIs[{BB, &*I}] = &*I; + } + return NewBB; +} + +void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { + BitVector LS(Schedule.getNumStages(), true); + BitVector AS(Schedule.getNumStages(), true); + LiveStages[BB] = LS; + AvailableStages[BB] = AS; + + // Peel out the prologs. + LS.reset(); + for (int I = 0; I < Schedule.getNumStages() - 1; ++I) { + LS[I] = 1; + Prologs.push_back(peelKernel(LPD_Front)); + LiveStages[Prologs.back()] = LS; + AvailableStages[Prologs.back()] = LS; + } + + // Create a block that will end up as the new loop exiting block (dominated by + // all prologs and epilogs). It will only contain PHIs, in the same order as + // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property + // that the exiting block is a (sub) clone of BB. This in turn gives us the + // property that any value deffed in BB but used outside of BB is used by a + // PHI in the exiting block. + MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock(); + + // Push out the epilogs, again in reverse order. + // We can't assume anything about the minumum loop trip count at this point, + // so emit a fairly complex epilog: + // K[0, 1, 2] // Kernel runs stages 0, 1, 2 + // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0]. + // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2. + // + // This creates a single-successor single-predecessor sequence of blocks for + // each epilog, which are kept this way for simplicity at this stage and + // cleaned up by the optimizer later. + for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) { + Epilogs.push_back(nullptr); + for (int J = Schedule.getNumStages() - 1; J >= I; --J) { + LS.reset(); + LS[J] = 1; + Epilogs.back() = peelKernel(LPD_Back); + LiveStages[Epilogs.back()] = LS; + AvailableStages[Epilogs.back()] = AS; + } + } + + // Now we've defined all the prolog and epilog blocks as a fallthrough + // sequence, add the edges that will be followed if the loop trip count is + // lower than the number of stages (connecting prologs directly with epilogs). + auto PI = Prologs.begin(); + auto EI = Epilogs.begin(); + assert(Prologs.size() == Epilogs.size()); + for (; PI != Prologs.end(); ++PI, ++EI) { + MachineBasicBlock *Pred = *(*EI)->pred_begin(); + (*PI)->addSuccessor(*EI); + for (MachineInstr &MI : (*EI)->phis()) { + Register Reg = MI.getOperand(1).getReg(); + MachineInstr *Use = MRI.getUniqueVRegDef(Reg); + if (Use && Use->getParent() == Pred) + Reg = getEquivalentRegisterIn(Reg, *PI); + MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false)); + MI.addOperand(MachineOperand::CreateMBB(*PI)); + } + } + + // Create a list of all blocks in order. + SmallVector<MachineBasicBlock *, 8> Blocks; + llvm::copy(PeeledFront, std::back_inserter(Blocks)); + Blocks.push_back(BB); + llvm::copy(PeeledBack, std::back_inserter(Blocks)); + + // Iterate in reverse order over all instructions, remapping as we go. + for (MachineBasicBlock *B : reverse(Blocks)) { + for (auto I = B->getFirstInstrTerminator()->getReverseIterator(); + I != std::next(B->getFirstNonPHI()->getReverseIterator());) { + MachineInstr *MI = &*I++; + rewriteUsesOf(MI); + } + } + // Now all remapping has been done, we're free to optimize the generated code. + for (MachineBasicBlock *B : reverse(Blocks)) + EliminateDeadPhis(B, MRI, LIS); + EliminateDeadPhis(ExitingBB, MRI, LIS); +} + +MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() { + MachineFunction &MF = *BB->getParent(); + MachineBasicBlock *Exit = *BB->succ_begin(); + if (Exit == BB) + Exit = *std::next(BB->succ_begin()); + + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + MF.insert(std::next(BB->getIterator()), NewBB); + + // Clone all phis in BB into NewBB and rewrite. + for (MachineInstr &MI : BB->phis()) { + auto RC = MRI.getRegClass(MI.getOperand(0).getReg()); + Register OldR = MI.getOperand(3).getReg(); + Register R = MRI.createVirtualRegister(RC); + SmallVector<MachineInstr *, 4> Uses; + for (MachineInstr &Use : MRI.use_instructions(OldR)) + if (Use.getParent() != BB) + Uses.push_back(&Use); + for (MachineInstr *Use : Uses) + Use->substituteRegister(OldR, R, /*SubIdx=*/0, + *MRI.getTargetRegisterInfo()); + MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(OldR) + .addMBB(BB); + BlockMIs[{NewBB, &MI}] = NI; + CanonicalMIs[NI] = &MI; + } + BB->replaceSuccessor(Exit, NewBB); + Exit->replacePhiUsesWith(BB, NewBB); + NewBB->addSuccessor(Exit); + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond); + (void)CanAnalyzeBr; + assert(CanAnalyzeBr && "Must be able to analyze the loop branch!"); + TII->removeBranch(*BB); + TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB, + Cond, DebugLoc()); + TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc()); + return NewBB; +} + +Register +PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg, + MachineBasicBlock *BB) { + MachineInstr *MI = MRI.getUniqueVRegDef(Reg); + unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg); + return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg(); +} + +void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) { + if (MI->isPHI()) { + // This is an illegal PHI. The loop-carried (desired) value is operand 3, + // and it is produced by this block. + Register PhiR = MI->getOperand(0).getReg(); + Register R = MI->getOperand(3).getReg(); + int RMIStage = getStage(MRI.getUniqueVRegDef(R)); + if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage)) + R = MI->getOperand(1).getReg(); + MRI.setRegClass(R, MRI.getRegClass(PhiR)); + MRI.replaceRegWith(PhiR, R); + if (LIS) + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + return; + } + + int Stage = getStage(MI); + if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 || + LiveStages[MI->getParent()].test(Stage)) + // Instruction is live, no rewriting to do. + return; + + for (MachineOperand &DefMO : MI->defs()) { + SmallVector<std::pair<MachineInstr *, Register>, 4> Subs; + for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) { + // Only PHIs can use values from this block by construction. + // Match with the equivalent PHI in B. + assert(UseMI.isPHI()); + Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(), + MI->getParent()); + Subs.emplace_back(&UseMI, Reg); + } + for (auto &Sub : Subs) + Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0, + *MRI.getTargetRegisterInfo()); + } + if (LIS) + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); +} + +void PeelingModuloScheduleExpander::fixupBranches() { + std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info = + TII->analyzeLoopForPipelining(BB); + assert(Info); + + // Work outwards from the kernel. + bool KernelDisposed = false; + int TC = Schedule.getNumStages() - 1; + for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend(); + ++PI, ++EI, --TC) { + MachineBasicBlock *Prolog = *PI; + MachineBasicBlock *Fallthrough = *Prolog->succ_begin(); + MachineBasicBlock *Epilog = *EI; + SmallVector<MachineOperand, 4> Cond; + TII->removeBranch(*Prolog); + Optional<bool> StaticallyGreater = + Info->createTripCountGreaterCondition(TC, *Prolog, Cond); + if (!StaticallyGreater.hasValue()) { + LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n"); + // Dynamically branch based on Cond. + TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc()); + } else if (*StaticallyGreater == false) { + LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n"); + // Prolog never falls through; branch to epilog and orphan interior + // blocks. Leave it to unreachable-block-elim to clean up. + Prolog->removeSuccessor(Fallthrough); + for (MachineInstr &P : Fallthrough->phis()) { + P.RemoveOperand(2); + P.RemoveOperand(1); + } + TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc()); + KernelDisposed = true; + } else { + LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n"); + // Prolog always falls through; remove incoming values in epilog. + Prolog->removeSuccessor(Epilog); + for (MachineInstr &P : Epilog->phis()) { + P.RemoveOperand(4); + P.RemoveOperand(3); + } + } + } + + if (!KernelDisposed) { + Info->adjustTripCount(-(Schedule.getNumStages() - 1)); + Info->setPreheader(Prologs.back()); + } else { + Info->disposed(); + } +} + +void PeelingModuloScheduleExpander::rewriteKernel() { + KernelRewriter KR(*Schedule.getLoop(), Schedule); + KR.rewrite(); +} + +void PeelingModuloScheduleExpander::expand() { + BB = Schedule.getLoop()->getTopBlock(); + Preheader = Schedule.getLoop()->getLoopPreheader(); + LLVM_DEBUG(Schedule.dump()); + + rewriteKernel(); + peelPrologAndEpilogs(); + fixupBranches(); +} + +void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() { + BB = Schedule.getLoop()->getTopBlock(); + Preheader = Schedule.getLoop()->getLoopPreheader(); + + // Dump the schedule before we invalidate and remap all its instructions. + // Stash it in a string so we can print it if we found an error. + std::string ScheduleDump; + raw_string_ostream OS(ScheduleDump); + Schedule.print(OS); + OS.flush(); + + // First, run the normal ModuleScheduleExpander. We don't support any + // InstrChanges. + assert(LIS && "Requires LiveIntervals!"); + ModuloScheduleExpander MSE(MF, Schedule, *LIS, + ModuloScheduleExpander::InstrChangesTy()); + MSE.expand(); + MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel(); + if (!ExpandedKernel) { + // The expander optimized away the kernel. We can't do any useful checking. + MSE.cleanup(); + return; + } + // Before running the KernelRewriter, re-add BB into the CFG. + Preheader->addSuccessor(BB); + + // Now run the new expansion algorithm. + KernelRewriter KR(*Schedule.getLoop(), Schedule); + KR.rewrite(); + peelPrologAndEpilogs(); + + // Collect all illegal phis that the new algorithm created. We'll give these + // to KernelOperandInfo. + SmallPtrSet<MachineInstr *, 4> IllegalPhis; + for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) { + if (NI->isPHI()) + IllegalPhis.insert(&*NI); + } + + // Co-iterate across both kernels. We expect them to be identical apart from + // phis and full COPYs (we look through both). + SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs; + auto OI = ExpandedKernel->begin(); + auto NI = BB->begin(); + for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) { + while (OI->isPHI() || OI->isFullCopy()) + ++OI; + while (NI->isPHI() || NI->isFullCopy()) + ++NI; + assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!"); + // Analyze every operand separately. + for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin(); + OOpI != OI->operands_end(); ++OOpI, ++NOpI) + KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis), + KernelOperandInfo(&*NOpI, MRI, IllegalPhis)); + } + + bool Failed = false; + for (auto &OldAndNew : KOIs) { + if (OldAndNew.first == OldAndNew.second) + continue; + Failed = true; + errs() << "Modulo kernel validation error: [\n"; + errs() << " [golden] "; + OldAndNew.first.print(errs()); + errs() << " "; + OldAndNew.second.print(errs()); + errs() << "]\n"; + } + + if (Failed) { + errs() << "Golden reference kernel:\n"; + ExpandedKernel->print(errs()); + errs() << "New kernel:\n"; + BB->print(errs()); + errs() << ScheduleDump; + report_fatal_error( + "Modulo kernel validation (-pipeliner-experimental-cg) failed"); + } + + // Cleanup by removing BB from the CFG again as the original + // ModuloScheduleExpander intended. + Preheader->removeSuccessor(BB); + MSE.cleanup(); +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleTestPass implementation +//===----------------------------------------------------------------------===// +// This pass constructs a ModuloSchedule from its module and runs +// ModuloScheduleExpander. +// +// The module is expected to contain a single-block analyzable loop. +// The total order of instructions is taken from the loop as-is. +// Instructions are expected to be annotated with a PostInstrSymbol. +// This PostInstrSymbol must have the following format: +// "Stage=%d Cycle=%d". +//===----------------------------------------------------------------------===// + +namespace { +class ModuloScheduleTest : public MachineFunctionPass { +public: + static char ID; + + ModuloScheduleTest() : MachineFunctionPass(ID) { + initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void runOnLoop(MachineFunction &MF, MachineLoop &L); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // namespace + +char ModuloScheduleTest::ID = 0; + +INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", + "Modulo Schedule test pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test", + "Modulo Schedule test pass", false, false) + +bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) { + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + for (auto *L : MLI) { + if (L->getTopBlock() != L->getBottomBlock()) + continue; + runOnLoop(MF, *L); + return false; + } + return false; +} + +static void parseSymbolString(StringRef S, int &Cycle, int &Stage) { + std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_"); + std::pair<StringRef, StringRef> StageTokenAndValue = + getToken(StageAndCycle.first, "-"); + std::pair<StringRef, StringRef> CycleTokenAndValue = + getToken(StageAndCycle.second, "-"); + if (StageTokenAndValue.first != "Stage" || + CycleTokenAndValue.first != "_Cycle") { + llvm_unreachable( + "Bad post-instr symbol syntax: see comment in ModuloScheduleTest"); + return; + } + + StageTokenAndValue.second.drop_front().getAsInteger(10, Stage); + CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle); + + dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n"; +} + +void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) { + LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + MachineBasicBlock *BB = L.getTopBlock(); + dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n"; + + DenseMap<MachineInstr *, int> Cycle, Stage; + std::vector<MachineInstr *> Instrs; + for (MachineInstr &MI : *BB) { + if (MI.isTerminator()) + continue; + Instrs.push_back(&MI); + if (MCSymbol *Sym = MI.getPostInstrSymbol()) { + dbgs() << "Parsing post-instr symbol for " << MI; + parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]); + } + } + + ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle), + std::move(Stage)); + ModuloScheduleExpander MSE( + MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy()); + MSE.expand(); + MSE.cleanup(); +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleTestAnnotater implementation +//===----------------------------------------------------------------------===// + +void ModuloScheduleTestAnnotater::annotate() { + for (MachineInstr *MI : S.getInstructions()) { + SmallVector<char, 16> SV; + raw_svector_ostream OS(SV); + OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI); + MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str()); + MI->setPostInstrSymbol(MF, Sym); + } +} diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index c70b62252139..1a493964e678 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -97,7 +97,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, InstrSet &PHIsInCycle) { assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); // See if we already saw this register. if (!PHIsInCycle.insert(MI).second) @@ -109,16 +109,15 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, // Scan the PHI operands. for (unsigned i = 1; i != MI->getNumOperands(); i += 2) { - unsigned SrcReg = MI->getOperand(i).getReg(); + Register SrcReg = MI->getOperand(i).getReg(); if (SrcReg == DstReg) continue; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); // Skip over register-to-register moves. - if (SrcMI && SrcMI->isCopy() && - !SrcMI->getOperand(0).getSubReg() && + if (SrcMI && SrcMI->isCopy() && !SrcMI->getOperand(0).getSubReg() && !SrcMI->getOperand(1).getSubReg() && - TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) { + Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) { SrcReg = SrcMI->getOperand(1).getReg(); SrcMI = MRI->getVRegDef(SrcReg); } @@ -142,8 +141,8 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, /// other PHIs in a cycle. bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); - unsigned DstReg = MI->getOperand(0).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + Register DstReg = MI->getOperand(0).getReg(); + assert(Register::isVirtualRegister(DstReg) && "PHI destination is not a virtual register"); // See if we already saw this register. @@ -177,7 +176,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { InstrSet PHIsInCycle; if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && SingleValReg != 0) { - unsigned OldReg = MI->getOperand(0).getReg(); + Register OldReg = MI->getOperand(0).getReg(); if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg))) continue; diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 948a5835438c..4dd4c4b1084e 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -31,7 +31,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" @@ -168,7 +170,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { // Remove dead IMPLICIT_DEF instructions. for (MachineInstr *DefMI : ImpDefs) { - unsigned DefReg = DefMI->getOperand(0).getReg(); + Register DefReg = DefMI->getOperand(0).getReg(); if (MRI->use_nodbg_empty(DefReg)) { if (LIS) LIS->RemoveMachineInstrFromMaps(*DefMI); @@ -183,6 +185,11 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MF.DeleteMachineInstr(I.first); } + // TODO: we should use the incremental DomTree updater here. + if (Changed) + if (auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>()) + MDT->getBase().recalculate(MF); + LoweredPHIs.clear(); ImpDefs.clear(); VRegPHIUseCount.clear(); @@ -240,7 +247,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineInstr *MPhi = MBB.remove(&*MBB.begin()); unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; - unsigned DestReg = MPhi->getOperand(0).getReg(); + Register DestReg = MPhi->getOperand(0).getReg(); assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); bool isDead = MPhi->getOperand(0).isDead(); @@ -252,11 +259,12 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register // into the phi node destination. + MachineInstr *PHICopy = nullptr; const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); if (allPhiOperandsUndefined(*MPhi, *MRI)) // If all sources of a PHI node are implicit_def or undef uses, just emit an // implicit_def instead of a copy. - BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); else { // Can we reuse an earlier PHI node? This only happens for critical edges, @@ -273,15 +281,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } - BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), DestReg) - .addReg(IncomingReg); + // Give the target possiblity to handle special cases fallthrough otherwise + PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + IncomingReg, DestReg); } // Update live variable information if there is any. if (LV) { - MachineInstr &PHICopy = *std::prev(AfterPHIsIt); - if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); @@ -302,7 +308,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // killed. Note that because the value is defined in several places (once // each for each incoming block), the "def" block and instruction fields // for the VarInfo is not filled in. - LV->addVirtualRegisterKilled(IncomingReg, PHICopy); + LV->addVirtualRegisterKilled(IncomingReg, *PHICopy); } // Since we are going to be deleting the PHI node, if it is the last use of @@ -312,15 +318,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // If the result is dead, update LV. if (isDead) { - LV->addVirtualRegisterDead(DestReg, PHICopy); + LV->addVirtualRegisterDead(DestReg, *PHICopy); LV->removeVirtualRegisterDead(DestReg, *MPhi); } } // Update LiveIntervals for the new copy or implicit def. if (LIS) { - SlotIndex DestCopyIndex = - LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt)); + SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(*PHICopy); SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); if (IncomingReg) { @@ -368,11 +373,11 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // IncomingReg register in the corresponding predecessor basic block. SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; for (int i = NumSrcs - 1; i >= 0; --i) { - unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || isImplicitlyDefined(SrcReg, *MRI); - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + assert(Register::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); // Get the MachineBasicBlock equivalent of the BasicBlock that is the source @@ -406,9 +411,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (DefMI->isImplicitDef()) ImpDefs.insert(DefMI); } else { - NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg) - .addReg(SrcReg, 0, SrcSubReg); + NewSrcInstr = + TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(), + SrcReg, SrcSubReg, IncomingReg); } } @@ -457,7 +462,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } } else { // We just inserted this copy. - KillInst = std::prev(InsertPos); + KillInst = NewSrcInstr; } } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); @@ -567,7 +572,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { - unsigned Reg = BBI->getOperand(i).getReg(); + Register Reg = BBI->getOperand(i).getReg(); MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); // Is there a critical edge from PreMBB to MBB? if (PreMBB->succ_size() == 1) diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index a3fa1b0ad8ed..529fde84e39a 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -78,7 +78,7 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { MIB.add(MO); FirstActualI->eraseFromParent(); - MF.ensureAlignment(4); + MF.ensureAlignment(Align(16)); return true; } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index b918396aa8c5..54f1d38ed106 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -418,7 +418,7 @@ namespace { const MachineRegisterInfo &MRI, const TargetInstrInfo *TII = nullptr) : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) { - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!Register::isPhysicalRegister(Reg)) { Def = MRI.getVRegDef(Reg); DefIdx = MRI.def_begin(Reg).getOperandNo(); } @@ -460,8 +460,8 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx)) return false; - if (TargetRegisterInfo::isPhysicalRegister(DstReg) || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (Register::isPhysicalRegister(DstReg) || + Register::isPhysicalRegister(SrcReg)) return false; if (MRI->hasOneNonDBGUse(SrcReg)) @@ -581,7 +581,7 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, MRI->constrainRegClass(DstReg, DstRC); } - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); @@ -609,8 +609,8 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) { unsigned SrcReg, SrcReg2; int CmpMask, CmpValue; if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || - TargetRegisterInfo::isPhysicalRegister(SrcReg) || - (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) + Register::isPhysicalRegister(SrcReg) || + (SrcReg2 != 0 && Register::isPhysicalRegister(SrcReg2))) return false; // Attempt to optimize the comparison instruction. @@ -663,7 +663,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, // Thus, instead of maintaining untested code, we will revisit that if // that changes at some point. unsigned Reg = RegSubReg.Reg; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return false; const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); @@ -675,7 +675,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, do { CurSrcPair = SrcToLook.pop_back_val(); // As explained above, do not handle physical registers - if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + if (Register::isPhysicalRegister(CurSrcPair.Reg)) return false; ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII); @@ -723,7 +723,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, // constraints to the register allocator. Moreover, if we want to extend // the live-range of a physical register, unlike SSA virtual register, // we will have to check that they aren't redefine before the related use. - if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + if (Register::isPhysicalRegister(CurSrcPair.Reg)) return false; // Keep following the chain if the value isn't any better yet. @@ -761,7 +761,7 @@ insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, // NewRC is only correct if no subregisters are involved. findNextSource() // should have rejected those cases already. assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand"); - unsigned NewVR = MRI.createVirtualRegister(NewRC); + Register NewVR = MRI.createVirtualRegister(NewRC); MachineBasicBlock *MBB = OrigPHI.getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(), TII.get(TargetOpcode::PHI), NewVR); @@ -1170,7 +1170,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { "Coalescer can understand multiple defs?!"); const MachineOperand &MODef = MI.getOperand(0); // Do not rewrite physical definitions. - if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + if (Register::isPhysicalRegister(MODef.getReg())) return false; bool Changed = false; @@ -1221,7 +1221,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { MachineInstr & PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def, RewriteMapTy &RewriteMap) { - assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + assert(!Register::isPhysicalRegister(Def.Reg) && "We do not rewrite physical registers"); // Find the new source to use in the COPY rewrite. @@ -1229,7 +1229,7 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, // Insert the COPY. const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); - unsigned NewVReg = MRI->createVirtualRegister(DefRC); + Register NewVReg = MRI->createVirtualRegister(DefRC); MachineInstr *NewCopy = BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), @@ -1280,7 +1280,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( while (CpyRewriter.getNextRewritableSource(Src, Def)) { // If a physical register is here, this is probably for a good reason. // Do not rewrite that. - if (TargetRegisterInfo::isPhysicalRegister(Def.Reg)) + if (Register::isPhysicalRegister(Def.Reg)) return false; // If we do not know how to rewrite this definition, there is no point @@ -1315,12 +1315,11 @@ bool PeepholeOptimizer::isLoadFoldable( if (MCID.getNumDefs() != 1) return false; - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting // loads. It should be checked when processing uses of the load, since // uses can be removed during peephole. - if (!MI.getOperand(0).getSubReg() && - TargetRegisterInfo::isVirtualRegister(Reg) && + if (!MI.getOperand(0).getSubReg() && Register::isVirtualRegister(Reg) && MRI->hasOneNonDBGUser(Reg)) { FoldAsLoadDefCandidates.insert(Reg); return true; @@ -1336,8 +1335,8 @@ bool PeepholeOptimizer::isMoveImmediate( return false; if (MCID.getNumDefs() != 1) return false; - unsigned Reg = MI.getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MI.getOperand(0).getReg(); + if (Register::isVirtualRegister(Reg)) { ImmDefMIs.insert(std::make_pair(Reg, &MI)); ImmDefRegs.insert(Reg); return true; @@ -1359,8 +1358,8 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr &MI, // Ignore dead implicit defs. if (MO.isImplicit() && MO.isDead()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (ImmDefRegs.count(Reg) == 0) continue; @@ -1393,12 +1392,12 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, DenseMap<unsigned, MachineInstr *> &CopyMIs) { assert(MI.isCopy() && "expected a COPY machine instruction"); - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + Register DstReg = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(DstReg)) return false; if (CopySrcRegs.insert(SrcReg).second) { @@ -1416,7 +1415,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, if (SrcSubReg != PrevSrcSubReg) return false; - unsigned PrevDstReg = PrevCopy->getOperand(0).getReg(); + Register PrevDstReg = PrevCopy->getOperand(0).getReg(); // Only replace if the copy register class is the same. // @@ -1433,8 +1432,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, } bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { - return TargetRegisterInfo::isPhysicalRegister(Reg) && - !MRI->isAllocatable(Reg); + return Register::isPhysicalRegister(Reg) && !MRI->isAllocatable(Reg); } bool PeepholeOptimizer::foldRedundantNAPhysCopy( @@ -1444,9 +1442,9 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( if (DisableNAPhysCopyOpt) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) { // %vreg = COPY %physreg // Avoid using a datastructure which can track multiple live non-allocatable // phys->virt copies since LLVM doesn't seem to do this. @@ -1454,7 +1452,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( return false; } - if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) + if (!(Register::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) return false; // %physreg = COPY %vreg @@ -1467,7 +1465,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( return false; } - unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); + Register PrevDstReg = PrevCopy->second->getOperand(0).getReg(); if (PrevDstReg == SrcReg) { // Remove the virt->phys copy: we saw the virtual register definition, and // the non-allocatable physical register's state hasn't changed since then. @@ -1489,7 +1487,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( static bool isVirtualRegisterOperand(MachineOperand &MO) { if (!MO.isReg()) return false; - return TargetRegisterInfo::isVirtualRegister(MO.getReg()); + return Register::isVirtualRegister(MO.getReg()); } bool PeepholeOptimizer::findTargetRecurrence( @@ -1662,7 +1660,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { for (const MachineOperand &MO : MI->operands()) { // Visit all operands: definitions can be implicit or explicit. if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isDef() && isNAPhysCopy(Reg)) { const auto &Def = NAPhysToVirtMIs.find(Reg); if (Def != NAPhysToVirtMIs.end()) { @@ -1778,7 +1776,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); if (MI->isCall()) - MI->getMF()->updateCallSiteInfo(MI, FoldMI); + MI->getMF()->moveCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); MRI->markUsesInDebugValueAsUndef(FoldedReg); @@ -1810,7 +1808,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() { assert(Def->isCopy() && "Invalid definition"); // Copy instruction are supposed to be: Def = Src. // If someone breaks this assumption, bad things will happen everywhere. - assert(Def->getNumOperands() == 2 && "Invalid number of operands"); + // There may be implicit uses preventing the copy to be moved across + // some target specific register definitions + assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 && + "Invalid number of operands"); + assert(!Def->hasImplicitDef() && "Only implicit uses are allowed"); if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of src. @@ -1855,6 +1857,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { SrcIdx = OpIdx; } + // In some rare case, Def has no input, SrcIdx is out of bound, + // getOperand(SrcIdx) will fail below. + if (SrcIdx >= Def->getNumOperands()) + return ValueTrackerResult(); + // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY // will break the assumed guarantees for the upper bits. for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) { @@ -2087,7 +2094,7 @@ ValueTrackerResult ValueTracker::getNextSource() { // If we can still move up in the use-def chain, move to the next // definition. - if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { + if (!Register::isPhysicalRegister(Reg) && OneRegSrc) { MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg); if (DI != MRI.def_end()) { Def = DI->getParent(); diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp index 2752e186875c..0d2f6f99ca96 100644 --- a/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -76,7 +76,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn, } for (auto I = F.use_begin(), E = F.use_end(); I != E;) { - auto *CI = dyn_cast<CallInst>(I->getUser()); + auto *CI = cast<CallInst>(I->getUser()); assert(CI->getCalledFunction() && "Cannot lower an indirect call!"); ++I; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index b38987ad1c90..11bff45f9ad5 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -73,9 +73,9 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { LLVM_DEBUG(dbgs() << "Processing " << *MI); - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { // For virtual registers, mark all uses as <undef>, and convert users to // implicit-def when possible. for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { @@ -100,8 +100,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { for (MachineOperand &MO : UserMI->operands()) { if (!MO.isReg()) continue; - unsigned UserReg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(UserReg) || + Register UserReg = MO.getReg(); + if (!Register::isPhysicalRegister(UserReg) || !TRI->regsOverlap(Reg, UserReg)) continue; // UserMI uses or redefines Reg. Set <undef> flags on all uses. diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index d463bee67595..729f06dda62b 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -898,7 +898,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI.getUseLocalStackAllocationBlock()) { - unsigned Align = MFI.getLocalFrameMaxAlign(); + unsigned Align = MFI.getLocalFrameMaxAlign().value(); // Adjust to alignment boundary. Offset = alignTo(Offset, Align, Skew); diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index da3ef4b771f3..74e721dbd138 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -129,7 +129,7 @@ const PseudoSourceValue * PseudoSourceValueManager::getFixedStack(int FI) { std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI]; if (!V) - V = llvm::make_unique<FixedStackPseudoSourceValue>(FI, TII); + V = std::make_unique<FixedStackPseudoSourceValue>(FI, TII); return V.get(); } @@ -138,7 +138,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) { std::unique_ptr<const GlobalValuePseudoSourceValue> &E = GlobalCallEntries[GV]; if (!E) - E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV, TII); + E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TII); return E.get(); } @@ -147,6 +147,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) { std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E = ExternalCallEntries[ES]; if (!E) - E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII); + E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII); return E.get(); } diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp index f05c97ad621e..2850033e6419 100644 --- a/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/lib/CodeGen/ReachingDefAnalysis.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 1cbe75c27d13..156daaa03bb5 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -73,7 +73,7 @@ void RegAllocBase::seedLiveRegs() { NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; enqueue(&LIS->getInterval(Reg)); @@ -154,7 +154,7 @@ void RegAllocBase::allocatePhysRegs() { continue; } LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && + assert(Register::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); enqueue(SplitVirtReg); ++NumNewQueued; diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 2ffa5e389f89..44d0233604e7 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -90,7 +90,7 @@ namespace { explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {} unsigned getSparseSetIndex() const { - return TargetRegisterInfo::virtReg2Index(VirtReg); + return Register::virtReg2Index(VirtReg); } }; @@ -200,11 +200,11 @@ namespace { void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { - return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { - return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint); @@ -264,7 +264,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) { /// Returns false if \p VirtReg is known to not live out of the current block. bool RegAllocFast::mayLiveOut(unsigned VirtReg) { - if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) { + if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) { // Cannot be live-out if there are no successors. return !MBB->succ_empty(); } @@ -272,7 +272,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { // If this block loops back to itself, it would be necessary to check whether // the use comes after the def. if (MBB->isSuccessor(MBB)) { - MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return true; } @@ -282,7 +282,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { unsigned C = 0; for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) { if (UseInst.getParent() != MBB || ++C >= Limit) { - MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); // Cannot be live-out if there are no successors. return !MBB->succ_empty(); } @@ -293,7 +293,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { /// Returns false if \p VirtReg is known to not be live into the current block. bool RegAllocFast::mayLiveIn(unsigned VirtReg) { - if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) + if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) return !MBB->pred_empty(); // See if the first \p Limit def of the register are all in the current block. @@ -301,7 +301,7 @@ bool RegAllocFast::mayLiveIn(unsigned VirtReg) { unsigned C = 0; for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) { if (DefInst.getParent() != MBB || ++C >= Limit) { - MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return !MBB->pred_empty(); } } @@ -394,7 +394,7 @@ void RegAllocFast::killVirtReg(LiveReg &LR) { /// Mark virtreg as no longer available. void RegAllocFast::killVirtReg(unsigned VirtReg) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + assert(Register::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end() && LRI->PhysReg) @@ -405,7 +405,7 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) { /// stack slot if needed. void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + assert(Register::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && @@ -455,9 +455,8 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { if (MO.isUndef()) return; - unsigned PhysReg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && - "Bad usePhysReg operand"); + Register PhysReg = MO.getReg(); + assert(Register::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { @@ -626,9 +625,9 @@ unsigned RegAllocFast::traceCopyChain(unsigned Reg) const { static const unsigned ChainLengthLimit = 3; unsigned C = 0; do { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg); if (!VRegDef || !isCoalescable(*VRegDef)) @@ -646,7 +645,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const { unsigned C = 0; for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) { if (isCoalescable(MI)) { - unsigned Reg = MI.getOperand(1).getReg(); + Register Reg = MI.getOperand(1).getReg(); Reg = traceCopyChain(Reg); if (Reg != 0) return Reg; @@ -662,7 +661,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const { void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { const unsigned VirtReg = LR.VirtReg; - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + assert(Register::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); @@ -671,8 +670,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { << " with hint " << printReg(Hint0, TRI) << '\n'); // Take hint when possible. - if (TargetRegisterInfo::isPhysicalRegister(Hint0) && - MRI->isAllocatable(Hint0) && RC.contains(Hint0)) { + if (Register::isPhysicalRegister(Hint0) && MRI->isAllocatable(Hint0) && + RC.contains(Hint0)) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint0); if (Cost < spillDirty) { @@ -692,9 +691,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { // Try other hint. unsigned Hint1 = traceCopies(VirtReg); - if (TargetRegisterInfo::isPhysicalRegister(Hint1) && - MRI->isAllocatable(Hint1) && RC.contains(Hint1) && - !isRegUsedInInstr(Hint1)) { + if (Register::isPhysicalRegister(Hint1) && MRI->isAllocatable(Hint1) && + RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint1); if (Cost < spillDirty) { @@ -752,8 +750,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { assert(MO.isUndef() && "expected undef use"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg"); + Register VirtReg = MO.getReg(); + assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg"); LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); MCPhysReg PhysReg; @@ -778,14 +776,13 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { /// Allocates a register for VirtReg and mark it as dirty. MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register"); + assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (!LRI->PhysReg) { // If there is no hint, peek at the only use of this register. - if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && + if ((!Hint || !Register::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); // It's a copy, use the destination register as a hint. @@ -812,8 +809,7 @@ RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register"); + assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); @@ -866,7 +862,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, } // Handle subregister index. - MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register()); MO.setIsRenamable(true); MO.setSubReg(0); @@ -893,8 +889,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, SmallSet<unsigned, 8> ThroughRegs; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { @@ -908,8 +904,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg || !Register::isPhysicalRegister(Reg)) + continue; markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { if (ThroughRegs.count(PhysRegState[*AI])) @@ -922,8 +919,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; if (MO.isUse()) { if (!MO.isTied()) continue; LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO @@ -947,8 +945,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; if (!MO.isEarlyClobber()) continue; // Note: defineVirtReg may invalidate MO. @@ -961,8 +960,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, UsedInInstr.clear(); for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg || !Register::isPhysicalRegister(Reg)) + continue; LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) << " as used in instr\n"); markRegUsedInInstr(Reg); @@ -1002,10 +1002,8 @@ void RegAllocFast::dumpState() { e = LiveVirtRegs.end(); i != e; ++i) { if (!i->PhysReg) continue; - assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && - "Bad map key"); - assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && - "Bad map value"); + assert(Register::isVirtualRegister(i->VirtReg) && "Bad map key"); + assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value"); assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); } } @@ -1045,9 +1043,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { continue; } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { VirtOpEnd = i+1; if (MO.isUse()) { hasTiedOps = hasTiedOps || @@ -1096,8 +1094,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { for (unsigned I = 0; I != VirtOpEnd; ++I) { MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; if (MO.isUse()) { if (MO.isUndef()) { HasUndefUse = true; @@ -1124,8 +1123,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { for (MachineOperand &MO : MI.uses()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; assert(MO.isUndef() && "Should only have undef virtreg uses left"); @@ -1139,8 +1138,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { if (hasEarlyClobbers) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg || !Register::isPhysicalRegister(Reg)) + continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MO.isTied()) continue; markRegUsedInInstr(Reg); @@ -1166,10 +1166,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI->isAllocatable(Reg)) + if (!Reg || !Register::isPhysicalRegister(Reg) || !MRI->isAllocatable(Reg)) continue; definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); } @@ -1180,10 +1179,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // We have already dealt with phys regs in the previous scan. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); if (setPhysReg(MI, MI.getOperand(I), PhysReg)) { @@ -1215,8 +1214,8 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { // mostly constants and frame indices. if (!MO.isReg()) return; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) return; // See if this virtual register has already been allocated to a physical diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 771fc46415db..d27db678f02a 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -685,7 +685,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); const unsigned Reg = LI->reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg) && + assert(Register::isVirtualRegister(Reg) && "Can only enqueue virtual registers"); unsigned Prio; @@ -899,7 +899,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Check if any interfering live range is heavier than MaxWeight. for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + assert(Register::isVirtualRegister(Intf->reg) && "Only expecting virtual register interference from query"); // Do not allow eviction of a virtual register if we are in the middle @@ -984,7 +984,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, continue; // Cannot evict non virtual reg interference. - if (!TargetRegisterInfo::isVirtualRegister(Intf->reg)) + if (!Register::isVirtualRegister(Intf->reg)) return false; // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) @@ -2881,7 +2881,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { continue; } // Get the current assignment. - Register OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) + Register OtherPhysReg = Register::isPhysicalRegister(OtherReg) ? OtherReg : VRM->getPhys(OtherReg); // Push the collected information. @@ -2919,7 +2919,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { SmallVector<unsigned, 2> RecoloringCandidates; HintsInfo Info; unsigned Reg = VirtReg.reg; - unsigned PhysReg = VRM->getPhys(Reg); + Register PhysReg = VRM->getPhys(Reg); // Start the recoloring algorithm from the input live-interval, then // it will propagate to the ones that are copy-related with it. Visited.insert(Reg); @@ -2932,7 +2932,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { Reg = RecoloringCandidates.pop_back_val(); // We cannot recolor physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); @@ -2940,7 +2940,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { // Get the live interval mapped with this virtual register to be able // to check for the interference with the new color. LiveInterval &LI = LIS->getInterval(Reg); - unsigned CurrPhys = VRM->getPhys(Reg); + Register CurrPhys = VRM->getPhys(Reg); // Check that the new color matches the register class constraints and // that it is free for this live range. if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || @@ -3021,7 +3021,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { /// getting rid of 2 copies. void RAGreedy::tryHintsRecoloring() { for (LiveInterval *LI : SetOfBrokenHints) { - assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && + assert(Register::isVirtualRegister(LI->reg) && "Recoloring is possible only for virtual registers"); // Some dead defs may be around (e.g., because of debug uses). // Ignore those. diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 7a5a6c148ed4..3c4a46b12f99 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -558,7 +558,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, // Iterate over all live ranges. for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; VRegsToAlloc.insert(Reg); @@ -824,11 +824,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { if (!VRegsToAlloc.empty()) { const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot = - llvm::make_unique<PBQPRAConstraintList>(); - ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>()); - ConstraintsRoot->addConstraint(llvm::make_unique<Interference>()); + std::make_unique<PBQPRAConstraintList>(); + ConstraintsRoot->addConstraint(std::make_unique<SpillCosts>()); + ConstraintsRoot->addConstraint(std::make_unique<Interference>()); if (PBQPCoalescing) - ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>()); + ConstraintsRoot->addConstraint(std::make_unique<Coalescing>()); ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); bool PBQPAllocComplete = false; @@ -848,7 +848,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { std::string GraphFileName = FullyQualifiedName + "." + RS.str() + ".pbqpgraph"; std::error_code EC; - raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); + raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_Text); LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" << GraphFileName << "\"\n"); G.dump(OS); diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index b37dfada7101..757ff0e44953 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -142,6 +142,13 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { auto SetRegAsDefined = [&RegMask] (unsigned Reg) { RegMask[Reg / 32] &= ~(1u << Reg % 32); }; + + // Some targets can clobber registers "inside" a call, typically in + // linker-generated code. + for (const MCPhysReg Reg : TRI->getIntraCallClobberedRegs(&MF)) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + SetRegAsDefined(*AI); + // Scan all the physical registers. When a register is defined in the current // function set it and all the aliasing registers as defined in the regmask. // FIXME: Rewrite to use regunits. @@ -164,7 +171,8 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { SetRegAsDefined(PReg); } - if (TargetFrameLowering::isSafeForNoCSROpt(F)) { + if (TargetFrameLowering::isSafeForNoCSROpt(F) && + MF.getSubtarget().getFrameLowering()->isProfitableForNoCSROpt(F)) { ++NumCSROpt; LLVM_DEBUG(dbgs() << MF.getName() << " function optimized for not having CSR.\n"); diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp index fc4be82d215e..0205e6193741 100644 --- a/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -130,7 +130,11 @@ bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) { }; if (const Function *F = findCalledFunction(M, MI)) { - UpdateRegMask(*F); + if (F->isDefinitionExact()) { + UpdateRegMask(*F); + } else { + LLVM_DEBUG(dbgs() << "Function definition is not exact\n"); + } } else { LLVM_DEBUG(dbgs() << "Failed to find call target function\n"); } diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 2db6ab454cea..6ff5ddbc023d 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -406,8 +406,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { Partial = SrcSub || DstSub; // If one register is a physreg, it must be Dst. - if (TargetRegisterInfo::isPhysicalRegister(Src)) { - if (TargetRegisterInfo::isPhysicalRegister(Dst)) + if (Register::isPhysicalRegister(Src)) { + if (Register::isPhysicalRegister(Dst)) return false; std::swap(Src, Dst); std::swap(SrcSub, DstSub); @@ -416,7 +416,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); - if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + if (Register::isPhysicalRegister(Dst)) { // Eliminate DstSub on a physreg. if (DstSub) { Dst = TRI.getSubReg(Dst, DstSub); @@ -474,8 +474,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { CrossClass = NewRC != DstRC || NewRC != SrcRC; } // Check our invariants - assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); - assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + assert(Register::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(Register::isPhysicalRegister(Dst) && DstSub) && "Cannot have a physical SubIdx"); SrcReg = Src; DstReg = Dst; @@ -483,7 +483,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { } bool CoalescerPair::flip() { - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) return false; std::swap(SrcReg, DstReg); std::swap(SrcIdx, DstIdx); @@ -507,8 +507,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { } // Now check that Dst matches DstReg. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + if (Register::isPhysicalRegister(DstReg)) { + if (!Register::isPhysicalRegister(Dst)) return false; assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); // DstSub could be set for a physreg from INSERT_SUBREG. @@ -802,7 +802,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return { false, false }; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); + Register NewReg = NewDstMO.getReg(); if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) return { false, false }; @@ -835,8 +835,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return { false, false }; - if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && - TargetRegisterInfo::isVirtualRegister(IntB.reg) && + if (Register::isVirtualRegister(IntA.reg) && + Register::isVirtualRegister(IntB.reg) && !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) return { false, false }; if (NewMI != DefMI) { @@ -877,7 +877,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); - if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + if (Register::isPhysicalRegister(NewReg)) UseMO.substPhysReg(NewReg, *TRI); else UseMO.setReg(NewReg); @@ -1188,7 +1188,7 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just /// defining a subregister. static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { - assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && + assert(!Register::isPhysicalRegister(Reg) && "This code cannot handle physreg aliasing"); for (const MachineOperand &Op : MI.operands()) { if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) @@ -1209,7 +1209,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (Register::isPhysicalRegister(SrcReg)) return false; LiveInterval &SrcInt = LIS->getInterval(SrcReg); @@ -1240,7 +1240,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, return false; // Only support subregister destinations when the def is read-undef. MachineOperand &DstOperand = CopyMI->getOperand(0); - unsigned CopyDstReg = DstOperand.getReg(); + Register CopyDstReg = DstOperand.getReg(); if (DstOperand.getSubReg() && !DstOperand.isUndef()) return false; @@ -1254,7 +1254,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + if (Register::isPhysicalRegister(DstReg)) { unsigned NewDstReg = DstReg; unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), @@ -1269,7 +1269,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else { // Theoretically, some stack frame reference could exist. Just make sure // it hasn't actually happened. - assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + assert(Register::isVirtualRegister(DstReg) && "Only expect to deal with virtual or physical registers"); } } @@ -1317,7 +1317,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (MO.isReg()) { assert(MO.isImplicit() && "No explicit operands after implicit operands."); // Discard VReg implicit defs. - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) ImplicitOps.push_back(MO); } } @@ -1336,12 +1336,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MachineOperand &MO = NewMI.getOperand(i); if (MO.isReg() && MO.isDef()) { assert(MO.isImplicit() && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(MO.getReg())); + Register::isPhysicalRegister(MO.getReg())); NewMIImplDefs.push_back(MO.getReg()); } } - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (Register::isVirtualRegister(DstReg)) { unsigned NewIdx = NewMI.getOperand(0).getSubReg(); if (DefRC != nullptr) { @@ -1428,7 +1428,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. - assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + assert(Register::isPhysicalRegister(DstReg) && "Only expect virtual or physical registers in remat"); NewMI.getOperand(0).setIsDead(true); NewMI.addOperand(MachineOperand::CreateReg( @@ -1480,7 +1480,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugValue()) { - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) UseMO.substPhysReg(DstReg, *TRI); else UseMO.setReg(DstReg); @@ -1651,7 +1651,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + bool DstIsPhys = Register::isPhysicalRegister(DstReg); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { @@ -2411,8 +2411,8 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( assert(MI && "No defining instruction"); if (!MI->isFullCopy()) return std::make_pair(VNI, TrackReg); - unsigned SrcReg = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI->getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return std::make_pair(VNI, TrackReg); const LiveInterval &LI = LIS->getInterval(SrcReg); @@ -3189,9 +3189,9 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, MachineInstr *MI = Indexes->getInstructionFromIndex(Def); assert(MI && "No instruction to erase"); if (MI->isCopy()) { - unsigned Reg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && - Reg != CP.getSrcReg() && Reg != CP.getDstReg()) + Register Reg = MI->getOperand(1).getReg(); + if (Register::isVirtualRegister(Reg) && Reg != CP.getSrcReg() && + Reg != CP.getDstReg()) ShrinkRegs.push_back(Reg); } ErasedInstrs.insert(MI); @@ -3463,10 +3463,10 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { if (Copy->getOperand(1).isUndef()) return false; - unsigned SrcReg = Copy->getOperand(1).getReg(); - unsigned DstReg = Copy->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) - || TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register SrcReg = Copy->getOperand(1).getReg(); + Register DstReg = Copy->getOperand(0).getReg(); + if (Register::isPhysicalRegister(SrcReg) || + Register::isPhysicalRegister(DstReg)) return false; return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) @@ -3526,12 +3526,11 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) return false; // Check if the destination of this copy has any other affinity. - if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + if (Register::isPhysicalRegister(DstReg) || // If SrcReg is a physical register, the copy won't be coalesced. // Ignoring it may have other side effect (like missing // rematerialization). So keep it. - TargetRegisterInfo::isPhysicalRegister(SrcReg) || - !isTerminalReg(DstReg, Copy, MRI)) + Register::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI)) return false; // DstReg is a terminal node. Check if it interferes with any other @@ -3554,7 +3553,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { if (OtherReg == SrcReg) OtherReg = OtherSrcReg; // Check if OtherReg is a non-terminal. - if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || + if (Register::isPhysicalRegister(OtherReg) || isTerminalReg(OtherReg, MI, MRI)) continue; // Check that OtherReg interfere with DstReg. diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 7d9b3aa9b2d7..bf192d1c530d 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -134,6 +134,22 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const { } dbgs() << '\n'; } + +LLVM_DUMP_METHOD +void PressureChange::dump() const { + dbgs() << "[" << getPSetOrMax() << ", " << getUnitInc() << "]\n"; +} + +void RegPressureDelta::dump() const { + dbgs() << "[Excess="; + Excess.dump(); + dbgs() << ", CriticalMax="; + CriticalMax.dump(); + dbgs() << ", CurrentMax="; + CurrentMax.dump(); + dbgs() << "]\n"; +} + #endif void RegPressureTracker::increaseRegPressure(unsigned RegUnit, @@ -219,7 +235,7 @@ void LiveRegSet::clear() { } static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return &LIS.getInterval(Reg); return LIS.getCachedRegUnit(Reg); } @@ -345,7 +361,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { assert(isBottomClosed() && "need bottom-up tracking to intialize."); for (const RegisterMaskPair &Pair : P.LiveOutRegs) { unsigned RegUnit = Pair.RegUnit; - if (TargetRegisterInfo::isVirtualRegister(RegUnit) + if (Register::isVirtualRegister(RegUnit) && !RPTracker.hasUntiedDef(RegUnit)) increaseSetPressure(LiveThruPressure, *MRI, RegUnit, LaneBitmask::getNone(), Pair.LaneMask); @@ -406,7 +422,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit, SlotIndex Pos, LaneBitmask SafeDefault, bool(*Property)(const LiveRange &LR, SlotIndex Pos)) { - if (TargetRegisterInfo::isVirtualRegister(RegUnit)) { + if (Register::isVirtualRegister(RegUnit)) { const LiveInterval &LI = LIS.getInterval(RegUnit); LaneBitmask Result; if (TrackLaneMasks && LI.hasSubRanges()) { @@ -483,7 +499,7 @@ class RegisterOperandsCollector { void collectOperand(const MachineOperand &MO) const { if (!MO.isReg() || !MO.getReg()) return; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isUse()) { if (!MO.isUndef() && !MO.isInternalRead()) pushReg(Reg, RegOpers.Uses); @@ -503,7 +519,7 @@ class RegisterOperandsCollector { void pushReg(unsigned Reg, SmallVectorImpl<RegisterMaskPair> &RegUnits) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll())); } else if (MRI.isAllocatable(Reg)) { for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) @@ -514,7 +530,7 @@ class RegisterOperandsCollector { void collectOperandLanes(const MachineOperand &MO) const { if (!MO.isReg() || !MO.getReg()) return; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); unsigned SubRegIdx = MO.getSubReg(); if (MO.isUse()) { if (!MO.isUndef() && !MO.isInternalRead()) @@ -535,7 +551,7 @@ class RegisterOperandsCollector { void pushRegLanes(unsigned Reg, unsigned SubRegIdx, SmallVectorImpl<RegisterMaskPair> &RegUnits) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LaneBitmask LaneMask = SubRegIdx != 0 ? TRI.getSubRegIndexLaneMask(SubRegIdx) : MRI.getMaxLaneMaskForVReg(Reg); @@ -590,7 +606,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, // If the def is all that is live after the instruction, then in case // of a subregister def we need a read-undef flag. unsigned RegUnit = I->RegUnit; - if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + if (Register::isVirtualRegister(RegUnit) && AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none()) AddFlagsMI->setRegisterDefReadUndef(RegUnit); @@ -616,7 +632,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, if (AddFlagsMI != nullptr) { for (const RegisterMaskPair &P : DeadDefs) { unsigned RegUnit = P.RegUnit; - if (!TargetRegisterInfo::isVirtualRegister(RegUnit)) + if (!Register::isVirtualRegister(RegUnit)) continue; LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit, Pos.getDeadSlot()); @@ -825,7 +841,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, if (TrackUntiedDefs) { for (const RegisterMaskPair &Def : RegOpers.Defs) { unsigned RegUnit = Def.RegUnit; - if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + if (Register::isVirtualRegister(RegUnit) && (LiveRegs.contains(RegUnit) & Def.LaneMask).none()) UntiedDefs.insert(RegUnit); } diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index bb19110e6d70..ec0868acab38 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -49,7 +49,7 @@ using namespace llvm; STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); -void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { +void RegScavenger::setRegUsed(Register Reg, LaneBitmask LaneMask) { LiveUnits.addRegMasked(Reg, LaneMask); } @@ -96,12 +96,12 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) { } } -void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) { +void RegScavenger::addRegUnits(BitVector &BV, Register Reg) { for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) BV.set(*RUI); } -void RegScavenger::removeRegUnits(BitVector &BV, unsigned Reg) { +void RegScavenger::removeRegUnits(BitVector &BV, Register Reg) { for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) BV.reset(*RUI); } @@ -133,8 +133,8 @@ void RegScavenger::determineKillsAndDefs() { } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { @@ -204,8 +204,8 @@ void RegScavenger::forward() { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { if (MO.isUndef()) @@ -278,14 +278,14 @@ void RegScavenger::backward() { --MBBI; } -bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { +bool RegScavenger::isRegUsed(Register Reg, bool includeReserved) const { if (isReserved(Reg)) return includeReserved; return !LiveUnits.available(Reg); } -unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { - for (unsigned Reg : *RC) { +Register RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { + for (Register Reg : *RC) { if (!isRegUsed(Reg)) { LLVM_DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI) << "\n"); @@ -297,13 +297,13 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { BitVector Mask(TRI->getNumRegs()); - for (unsigned Reg : *RC) + for (Register Reg : *RC) if (!isRegUsed(Reg)) Mask.set(Reg); return Mask; } -unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, +Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI) { @@ -329,7 +329,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, Candidates.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg() || MO.isUndef() || !MO.getReg()) continue; - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (Register::isVirtualRegister(MO.getReg())) { if (MO.isDef()) isVirtDefInsn = true; else if (MO.isKill()) @@ -430,7 +430,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, // be usefull for this other vreg as well later. bool FoundVReg = false; for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { FoundVReg = true; break; } @@ -457,7 +457,7 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) { } RegScavenger::ScavengedInfo & -RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, +RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, MachineBasicBlock::iterator Before, MachineBasicBlock::iterator &UseMI) { // Find an available scavenging slot with size and alignment matching @@ -531,7 +531,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, return Scavenged[SI]; } -unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, +Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill) { MachineInstr &MI = *I; @@ -542,7 +542,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Exclude all the registers being used by the instruction. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + !Register::isVirtualRegister(MO.getReg())) for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) Candidates.reset(*AI); } @@ -556,7 +556,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; - unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + Register SReg = findSurvivorReg(I, Candidates, 25, UseMI); // If we found an unused register there is no reason to spill it. if (!isRegUsed(SReg)) { @@ -576,7 +576,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } -unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, +Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill) { @@ -620,8 +620,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, /// \p ReserveAfter controls whether the scavenged register needs to be reserved /// after the current instruction, otherwise it will only be reserved before the /// current instruction. -static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, - unsigned VReg, bool ReserveAfter) { +static Register scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, + Register VReg, bool ReserveAfter) { const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); #ifndef NDEBUG // Verify that all definitions and uses are in the same basic block. @@ -664,7 +664,7 @@ static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, // spill/reload if necessary. int SPAdj = 0; const TargetRegisterClass &RC = *MRI.getRegClass(VReg); - unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), + Register SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), ReserveAfter, SPAdj); MRI.replaceRegWith(VReg, SReg); ++NumScavengedRegs; @@ -694,17 +694,17 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, for (const MachineOperand &MO : NMI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // We only care about virtual registers and ignore virtual registers // created by the target callbacks in the process (those will be handled // in a scavenging round). - if (!TargetRegisterInfo::isVirtualRegister(Reg) || - TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + if (!Register::isVirtualRegister(Reg) || + Register::virtReg2Index(Reg) >= InitialNumVirtRegs) continue; if (!MO.readsReg()) continue; - unsigned SReg = scavengeVReg(MRI, RS, Reg, true); + Register SReg = scavengeVReg(MRI, RS, Reg, true); N->addRegisterKilled(SReg, &TRI, false); RS.setRegUsed(SReg); } @@ -716,10 +716,10 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Only vregs, no newly created vregs (see above). - if (!TargetRegisterInfo::isVirtualRegister(Reg) || - TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + if (!Register::isVirtualRegister(Reg) || + Register::virtReg2Index(Reg) >= InitialNumVirtRegs) continue; // We have to look at all operands anyway so we can precalculate here // whether there is a reading operand. This allows use to skip the use @@ -730,14 +730,14 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, NextInstructionReadsVReg = true; } if (MO.isDef()) { - unsigned SReg = scavengeVReg(MRI, RS, Reg, false); + Register SReg = scavengeVReg(MRI, RS, Reg, false); I->addRegisterDead(SReg, &TRI, false); } } } #ifndef NDEBUG for (const MachineOperand &MO : MBB.front().operands()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; assert(!MO.isInternalRead() && "Cannot assign inside bundles"); assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index 22cff48c3051..e3f5abb6301f 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -138,7 +138,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { LLVM_DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:"); for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses; ++I) { - unsigned NewVReg = MRI->createVirtualRegister(RegClass); + Register NewVReg = MRI->createVirtualRegister(RegClass); LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg); Intervals.push_back(&NewLI); LLVM_DEBUG(dbgs() << ' ' << printReg(NewVReg)); @@ -390,7 +390,7 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) { // there can't be any further splitting. bool Changed = false; for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (!LIS->hasInterval(Reg)) continue; LiveInterval &LI = LIS->getInterval(Reg); diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp index a6bc7330e2cc..ddbbd0f8d6e9 100644 --- a/lib/CodeGen/SafeStack.cpp +++ b/lib/CodeGen/SafeStack.cpp @@ -871,7 +871,7 @@ public: report_fatal_error("TargetLowering instance is required"); auto *DL = &F.getParent()->getDataLayout(); - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); auto &ACT = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); // Compute DT and LI only for functions that have the attribute. diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index 7776dffb4e9c..b4037499d7d1 100644 --- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -173,15 +173,30 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %mask_1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %mask_1, label %cond.load, label %else // - - Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx); + } // Create "cond" block // @@ -290,13 +305,29 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %mask_1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %mask_1, label %cond.store, label %else // - Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx); + } // Create "cond" block // @@ -392,15 +423,30 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // %Mask1 = extractelement <16 x i1> %Mask, i32 1 + // %Mask1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %Mask1, label %cond.load, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -499,14 +545,29 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx + // %Mask1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %Mask1, label %cond.store, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -555,6 +616,32 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { // The result vector Value *VResult = PassThru; + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + LoadInst *Load = + Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx)); + VResult = + Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); + ++MemIndex; + } + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // @@ -563,8 +650,14 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { // br i1 %mask_1, label %cond.load, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -633,13 +726,44 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { unsigned VectorWidth = VecType->getNumElements(); + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *OneElt = + Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); + Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + Builder.CreateAlignedStore(OneElt, NewPtr, 1); + ++MemIndex; + } + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // br i1 %mask_1, label %cond.store, label %else // - Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -727,17 +851,24 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, switch (II->getIntrinsicID()) { default: break; - case Intrinsic::masked_load: + case Intrinsic::masked_load: { // Scalarize unsupported vector masked load - if (TTI->isLegalMaskedLoad(CI->getType())) + unsigned Alignment = + cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); + if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment))) return false; scalarizeMaskedLoad(CI, ModifiedDT); return true; - case Intrinsic::masked_store: - if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) + } + case Intrinsic::masked_store: { + unsigned Alignment = + cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), + MaybeAlign(Alignment))) return false; scalarizeMaskedStore(CI, ModifiedDT); return true; + } case Intrinsic::masked_gather: if (TTI->isLegalMaskedGather(CI->getType())) return false; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index d5ad7e92299d..96a1f86c3e04 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -205,10 +204,10 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (ExitMI) { for (const MachineOperand &MO : ExitMI->operands()) { if (!MO.isReg() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); - } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) { + } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) { addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO)); } } @@ -285,7 +284,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // We do not need to track any dependencies for constant registers. if (MRI.isConstantPhysReg(Reg)) return; @@ -361,7 +360,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // No point in tracking lanemasks if we don't have interesting subregisters. const TargetRegisterClass &RC = *MRI.getRegClass(Reg); if (!RC.HasDisjunctSubRegs) @@ -373,6 +372,13 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const return TRI->getSubRegIndexLaneMask(SubReg); } +bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) { + auto RegUse = CurrentVRegUses.find(MO.getReg()); + if (RegUse == CurrentVRegUses.end()) + return true; + return (RegUse->LaneMask & getLaneMaskForMO(MO)).none(); +} + /// Adds register output and data dependencies from this SUnit to instructions /// that occur later in the same scheduling region if they read from or write to /// the virtual register defined at OperIdx. @@ -382,7 +388,7 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); LaneBitmask DefLaneMask; LaneBitmask KillLaneMask; @@ -393,6 +399,18 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // earlier instruction. KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask; + if (MO.getSubReg() != 0 && MO.isUndef()) { + // There may be other subregister defs on the same instruction of the same + // register in later operands. The lanes of other defs will now be live + // after this instruction, so these should not be treated as killed by the + // instruction even though they appear to be killed in this one operand. + for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &OtherMO = MI->getOperand(I); + if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg) + KillLaneMask &= ~getLaneMaskForMO(OtherMO); + } + } + // Clear undef flag, we'll re-add it later once we know which subregister // Def is first. MO.setIsUndef(false); @@ -402,8 +420,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { } if (MO.isDead()) { - assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && - "Dead defs should have no uses"); + assert(deadDefHasNoUse(MO) && "Dead defs should have no uses"); } else { // Add data dependence to all uses we found so far. const TargetSubtargetInfo &ST = MF.getSubtarget(); @@ -491,7 +508,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); const MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Remember the use. Data dependencies will be added when we find the def. LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) @@ -514,7 +531,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// Returns true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). -static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { +static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) { return MI->isCall() || MI->hasUnmodeledSideEffects() || (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA)); } @@ -701,7 +718,7 @@ void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { map.reComputeSize(); } -void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, +void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker, PressureDiffs *PDiffs, LiveIntervals *LIS, @@ -821,10 +838,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, const MachineOperand &MO = MI.getOperand(j); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { addPhysRegDeps(SU, j); - } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (Register::isVirtualRegister(Reg)) { HasVRegDef = true; addVRegDefDeps(SU, j); } @@ -838,10 +855,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // additional use dependencies. if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { addPhysRegDeps(SU, j); - } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) { + } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) { addVRegUseDeps(SU, j); } } @@ -1071,7 +1088,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, for (MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.readsReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -1102,7 +1119,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { if (MO.isReg()) { if (!MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; LiveRegs.removeReg(Reg); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 49c922f560fa..e8950b58d42d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -111,10 +110,20 @@ static cl::opt<bool> MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads")); +static cl::opt<bool> + EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), + cl::desc("DAG combiner enable merging multiple stores " + "into a wider store")); + static cl::opt<unsigned> TokenFactorInlineLimit( "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors")); +static cl::opt<unsigned> StoreMergeDependenceLimit( + "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), + cl::desc("Limit the number of times for the same StoreNode and RootNode " + "to bail out in store merging dependence check")); + namespace { class DAGCombiner { @@ -152,6 +161,14 @@ namespace { /// which have not yet been combined to the worklist. SmallPtrSet<SDNode *, 32> CombinedNodes; + /// Map from candidate StoreNode to the pair of RootNode and count. + /// The count is used to track how many times we have seen the StoreNode + /// with the same RootNode bail out in dependence check. If we have seen + /// the bail out for the same pair many times over a limit, we won't + /// consider the StoreNode with the same RootNode as store merging + /// candidate again. + DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap; + // AA - Used for DAG load/store alias analysis. AliasAnalysis *AA; @@ -236,6 +253,7 @@ namespace { void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); PruningList.remove(N); + StoreRootCountMap.erase(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) @@ -361,6 +379,7 @@ namespace { SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); + SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); @@ -421,7 +440,6 @@ namespace { SDValue visitFP_TO_SINT(SDNode *N); SDValue visitFP_TO_UINT(SDNode *N); SDValue visitFP_ROUND(SDNode *N); - SDValue visitFP_ROUND_INREG(SDNode *N); SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); @@ -470,7 +488,7 @@ namespace { SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); - SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); + SDValue visitShiftByConstant(SDNode *N); SDValue foldSelectOfConstants(SDNode *N); SDValue foldVSelectOfConstants(SDNode *N); @@ -497,6 +515,7 @@ namespace { bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; bool isOneUseSetCC(SDValue N) const; + bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -510,7 +529,7 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildLogBase2(SDValue V, const SDLoc &DL); - SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); + SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); @@ -521,11 +540,11 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); - SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); + SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue MatchStoreCombine(StoreSDNode *N); SDValue ReduceLoadWidth(SDNode *N); @@ -742,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); } +bool TargetLowering::DAGCombinerInfo:: +recursivelyDeleteUnusedNodes(SDNode *N) { + return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N); +} + void TargetLowering::DAGCombinerInfo:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); @@ -766,195 +790,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) { DAG.DeleteNode(N); } -/// Return 1 if we can compute the negated form of the specified expression for -/// the same cost as the expression itself, or 2 if we can compute the negated -/// form more cheaply than the expression itself. -static char isNegatibleForFree(SDValue Op, bool LegalOperations, - const TargetLowering &TLI, - const TargetOptions *Options, - bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return 2; - - // Don't allow anything with multiple uses unless we know it is free. - EVT VT = Op.getValueType(); - const SDNodeFlags Flags = Op->getFlags(); - if (!Op.hasOneUse() && - !(Op.getOpcode() == ISD::FP_EXTEND && - TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) - return 0; - - // Don't recurse exponentially. - if (Depth > 6) - return 0; - - switch (Op.getOpcode()) { - default: return false; - case ISD::ConstantFP: { - if (!LegalOperations) - return 1; - - // Don't invert constant FP values after legalization unless the target says - // the negated constant is legal. - return TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, - ForCodeSize); - } - case ISD::BUILD_VECTOR: { - // Only permit BUILD_VECTOR of constants. - if (llvm::any_of(Op->op_values(), [&](SDValue N) { - return !N.isUndef() && !isa<ConstantFPSDNode>(N); - })) - return 0; - if (!LegalOperations) - return 1; - if (TLI.isOperationLegal(ISD::ConstantFP, VT) && - TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return 1; - return llvm::all_of(Op->op_values(), [&](SDValue N) { - return N.isUndef() || - TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, - ForCodeSize); - }); - } - case ISD::FADD: - if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) - return 0; - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - case ISD::FSUB: - // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return 1; - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - } -} - -/// If isNegatibleForFree returns true, return the newly negated expression. -static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return Op.getOperand(0); - - assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); - const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags Flags = Op->getFlags(); - - switch (Op.getOpcode()) { - default: llvm_unreachable("Unknown code"); - case ISD::ConstantFP: { - APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); - V.changeSign(); - return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); - } - case ISD::BUILD_VECTOR: { - SmallVector<SDValue, 4> Ops; - for (SDValue C : Op->op_values()) { - if (C.isUndef()) { - Ops.push_back(C); - continue; - } - APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); - V.changeSign(); - Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); - } - return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); - } - case ISD::FADD: - assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(0), Flags); - case ISD::FSUB: - // fold (fneg (fsub 0, B)) -> B - if (ConstantFPSDNode *N0CFP = - isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) - if (N0CFP->isZero()) - return Op.getOperand(1); - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0), Flags); - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - - // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - Op.getOperand(0), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), Flags); - - case ISD::FP_EXTEND: - case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1)); - case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1)); - } -} - // APInts must be the same size for most operations, this helper // function zero extends the shorter of the pair so that they match. // We provide an Offset so that we can create bitwidths that won't overflow. @@ -1124,7 +959,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); if (!OpNode.getNode()) return SDValue(); - AddToWorklist(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); } } @@ -1438,7 +1272,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); - AddToWorklist(N0.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); @@ -1591,8 +1424,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); for (SDNode *LN : UpdatedNodes) { - AddToWorklist(LN); AddUsersToWorklist(LN); + AddToWorklist(LN); } if (!NIsValid) continue; @@ -1673,6 +1506,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); case ISD::SUBCARRY: return visitSUBCARRY(N); + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: return visitMULFIX(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1736,7 +1573,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); case ISD::FP_ROUND: return visitFP_ROUND(N); - case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); @@ -3308,6 +3144,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { + // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) + if (SDValue Carry = getAsCarry(TLI, N0)) { + SDValue X = N1; + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X); + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), NegX, Zero, + Carry); + } + } + return SDValue(); } @@ -3442,6 +3290,30 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { return SDValue(); } +// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and +// UMULFIXSAT here. +SDValue DAGCombiner::visitMULFIX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Scale = N->getOperand(2); + EVT VT = N0.getValueType(); + + // fold (mulfix x, undef, scale) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + + // Canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale); + + // fold (mulfix x, 0, scale) -> 0 + if (isNullConstant(N1)) + return DAG.getConstant(0, SDLoc(N), VT); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3537,7 +3409,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // x * 15 --> (x << 4) - x // x * -33 --> -((x << 5) + x) // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) - if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { + if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). @@ -4083,10 +3955,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (VT.isVector()) { // fold (mulhs x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhs x, 0) -> 0 @@ -4095,7 +3967,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { // fold (mulhs x, 1) -> (sra x, size(x)-1) if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, - DAG.getConstant(N0.getValueSizeInBits() - 1, DL, + DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 @@ -4130,10 +4002,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (VT.isVector()) { // fold (mulhu x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhu x, 0) -> 0 @@ -4265,6 +4137,18 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // (umul_lohi N0, 0) -> (0, 0) + if (isNullConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, Zero, Zero); + } + + // (umul_lohi N0, 1) -> (N0, 0) + if (isOneConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, N->getOperand(0), Zero); + } + // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { @@ -4290,13 +4174,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { } SDValue DAGCombiner::visitMULO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); bool IsSigned = (ISD::SMULO == N->getOpcode()); + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); + + // fold (mulo x, 0) -> 0 + no carry out + if (isNullOrNullSplat(N1)) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getConstant(0, DL, CarryVT)); + // (mulo x, 2) -> (addo x, x) - if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1))) + if (ConstantSDNode *C2 = isConstOrConstSplat(N1)) if (C2->getAPIntValue() == 2) - return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N), - N->getVTList(), N->getOperand(0), N->getOperand(0)); + return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, + N->getVTList(), N0, N0); return SDValue(); } @@ -4444,7 +4344,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) && Level <= AfterLegalizeTypes) { // Input types must be integer and the same. - if (XVT.isInteger() && XVT == Y.getValueType()) { + if (XVT.isInteger() && XVT == Y.getValueType() && + !(VT.isVector() && TLI.isTypeLegal(VT) && + !XVT.isVector() && !TLI.isTypeLegal(XVT))) { SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic); } @@ -4770,8 +4672,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, return true; } - // Do not change the width of a volatile load. - if (LoadN->isVolatile()) + // Do not change the width of a volatile or atomic loads. + if (!LoadN->isSimple()) return false; // Do not generate loads of non-round integer types since these can @@ -4803,15 +4705,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, if (!MemVT.isRound()) return false; - // Don't change the width of a volatile load. - if (LDST->isVolatile()) + // Don't change the width of a volatile or atomic loads. + if (!LDST->isSimple()) return false; // Verify that we are actually reducing a load width here. if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) return false; - // Ensure that this isn't going to produce an unsupported unaligned access. + // Ensure that this isn't going to produce an unsupported memory access. if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, LDST->getAddressSpace(), ShAmt / 8, @@ -5076,6 +4978,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { return T1; } +/// Try to replace shift/logic that tests if a bit is clear with mask + setcc. +/// For a target with a bit test, this is expected to become test + set and save +/// at least 1 instruction. +static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { + assert(And->getOpcode() == ISD::AND && "Expected an 'and' op"); + + // This is probably not worthwhile without a supported type. + EVT VT = And->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + // Look through an optional extension and find a 'not'. + // TODO: Should we favor test+set even without the 'not' op? + SDValue Not = And->getOperand(0), And1 = And->getOperand(1); + if (Not.getOpcode() == ISD::ANY_EXTEND) + Not = Not.getOperand(0); + if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1)) + return SDValue(); + + // Look though an optional truncation. The source operand may not be the same + // type as the original 'and', but that is ok because we are masking off + // everything but the low bit. + SDValue Srl = Not.getOperand(0); + if (Srl.getOpcode() == ISD::TRUNCATE) + Srl = Srl.getOperand(0); + + // Match a shift-right by constant. + if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() || + !isa<ConstantSDNode>(Srl.getOperand(1))) + return SDValue(); + + // We might have looked through casts that make this transform invalid. + // TODO: If the source type is wider than the result type, do the mask and + // compare in the source type. + const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1); + unsigned VTBitWidth = VT.getSizeInBits(); + if (ShiftAmt.uge(VTBitWidth)) + return SDValue(); + + // Turn this into a bit-test pattern using mask op + setcc: + // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0 + SDLoc DL(And); + SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT); + EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Mask = DAG.getConstant( + APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT); + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ); + return DAG.getZExtOrTrunc(Setcc, DL, VT); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5163,6 +5118,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. @@ -5337,7 +5293,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned MemBitSize = MemVT.getScalarSizeInBits(); APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize); if (DAG.MaskedValueIsZero(N1, ExtBits) && - ((!LegalOperations && !LN0->isVolatile()) || + ((!LegalOperations && LN0->isSimple()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), @@ -5358,6 +5314,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) return Shifts; + if (TLI.hasBitTest(N0, N1)) + if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) + return V; + return SDValue(); } @@ -5564,6 +5524,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { return true; } +// Match 2 elements of a packed halfword bswap. +static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) { + if (N.getOpcode() == ISD::OR) + return isBSwapHWordElement(N.getOperand(0), Parts) && + isBSwapHWordElement(N.getOperand(1), Parts); + + if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { + ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); + if (!C || C->getAPIntValue() != 16) + return false; + Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); + return true; + } + + return false; +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5581,43 +5558,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); // Look for either - // (or (or (and), (and)), (or (and), (and))) - // (or (or (or (and), (and)), (and)), (and)) - if (N0.getOpcode() != ISD::OR) - return SDValue(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); + // (or (bswaphpair), (bswaphpair)) + // (or (or (bswaphpair), (and)), (and)) + // (or (or (and), (bswaphpair)), (and)) SDNode *Parts[4] = {}; - if (N1.getOpcode() == ISD::OR && - N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { + if (isBSwapHWordPair(N0, Parts)) { // (or (or (and), (and)), (or (and), (and))) - if (!isBSwapHWordElement(N00, Parts)) + if (!isBSwapHWordPair(N1, Parts)) return SDValue(); - - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - SDValue N10 = N1.getOperand(0); - if (!isBSwapHWordElement(N10, Parts)) - return SDValue(); - SDValue N11 = N1.getOperand(1); - if (!isBSwapHWordElement(N11, Parts)) - return SDValue(); - } else { + } else if (N0.getOpcode() == ISD::OR) { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - if (N00.getOpcode() != ISD::OR) - return SDValue(); - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) - return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && + !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); - } + } else + return SDValue(); // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) @@ -5791,15 +5751,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); - bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - if (!LegalMask) { - std::swap(NewLHS, NewRHS); - ShuffleVectorSDNode::commuteMask(Mask); - LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, + Mask, DAG); + if (LegalShuffle) + return LegalShuffle; } } } @@ -5867,8 +5823,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return V; // See if this is some rotate idiom. - if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) - return SDValue(Rot, 0); + if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N))) + return Rot; if (SDValue Load = MatchLoadCombine(N)) return Load; @@ -5914,6 +5870,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, /// Otherwise, returns an expansion of \p ExtractFrom based on the following /// patterns: /// +/// (or (add v v) (shrl v bitwidth-1)): +/// expands (add v v) -> (shl v 1) +/// /// (or (mul v c0) (shrl (mul v c1) c2)): /// expands (mul v c0) -> (shl (mul v c1) c3) /// @@ -5936,6 +5895,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, "Existing shift must be valid as a rotate half"); ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); + + // Value and Type of the shift. + SDValue OppShiftLHS = OppShift.getOperand(0); + EVT ShiftedVT = OppShiftLHS.getValueType(); + + // Amount of the existing shift. + ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); + + // (add v v) -> (shl v 1) + if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && + ExtractFrom.getOpcode() == ISD::ADD && + ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && + ExtractFrom.getOperand(0) == OppShiftLHS && + OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1) + return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS, + DAG.getShiftAmountConstant(1, ShiftedVT, DL)); + // Preconditions: // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) // @@ -5959,15 +5935,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // op0 must be the same opcode on both sides, have the same LHS argument, // and produce the same value type. - SDValue OppShiftLHS = OppShift.getOperand(0); - EVT ShiftedVT = OppShiftLHS.getValueType(); if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || ShiftedVT != ExtractFrom.getValueType()) return SDValue(); - // Amount of the existing shift. - ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. @@ -6137,7 +6109,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. -SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, +SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { @@ -6152,32 +6124,33 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, - HasPos ? Pos : Neg).getNode(); + HasPos ? Pos : Neg); } - return nullptr; + return SDValue(); } // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { +SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) return nullptr; + if (!TLI.isTypeLegal(VT)) + return SDValue(); // The target must have at least one rotate flavor. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) return nullptr; + if (!HasROTL && !HasROTR) + return SDValue(); // Check for truncated rotate. if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { assert(LHS.getValueType() == RHS.getValueType()); - if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { - return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), - SDValue(Rot, 0)).getNode(); + if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot); } } @@ -6192,7 +6165,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If neither side matched a rotate half, bail if (!LHSShift && !RHSShift) - return nullptr; + return SDValue(); // InstCombine may have combined a constant shl, srl, mul, or udiv with one // side of the rotate, so try to handle that here. In all cases we need to @@ -6215,15 +6188,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If a side is still missing, nothing else we can do. if (!RHSShift || !LHSShift) - return nullptr; + return SDValue(); // At this point we've matched or extracted a shift op on each side. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return nullptr; // Not shifting the same value. + return SDValue(); // Not shifting the same value. if (LHSShift.getOpcode() == RHSShift.getOpcode()) - return nullptr; // Shifts must disagree. + return SDValue(); // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { @@ -6267,13 +6240,13 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } - return Rot.getNode(); + return Rot; } // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) - return nullptr; + return SDValue(); // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; @@ -6290,17 +6263,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); if (TryL) return TryL; - SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); if (TryR) return TryR; - return nullptr; + return SDValue(); } namespace { @@ -6415,7 +6388,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, Depth + 1); case ISD::LOAD: { auto L = cast<LoadSDNode>(Op.getNode()); - if (L->isVolatile() || L->isIndexed()) + if (!L->isSimple() || L->isIndexed()) return None; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); @@ -6504,8 +6477,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { SDValue Chain; SmallVector<StoreSDNode *, 8> Stores; for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) { + // TODO: Allow unordered atomics when wider type is legal (see D66309) if (Store->getMemoryVT() != MVT::i8 || - Store->isVolatile() || Store->isIndexed()) + !Store->isSimple() || Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -6716,7 +6690,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); LoadSDNode *L = P->Load; - assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && + assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && + !L->isIndexed() && "Must be enforced by calculateByteProvider"); assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); @@ -6958,25 +6933,25 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants if (isAllOnesConstant(N1) && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } @@ -7079,26 +7054,103 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return SDValue(); } +/// If we have a shift-by-constant of a bitwise logic op that itself has a +/// shift-by-constant operand with identical opcode, we may be able to convert +/// that into 2 independent shifts followed by the logic op. This is a +/// throughput improvement. +static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { + // Match a one-use bitwise logic op. + SDValue LogicOp = Shift->getOperand(0); + if (!LogicOp.hasOneUse()) + return SDValue(); + + unsigned LogicOpcode = LogicOp.getOpcode(); + if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR && + LogicOpcode != ISD::XOR) + return SDValue(); + + // Find a matching one-use shift by constant. + unsigned ShiftOpcode = Shift->getOpcode(); + SDValue C1 = Shift->getOperand(1); + ConstantSDNode *C1Node = isConstOrConstSplat(C1); + assert(C1Node && "Expected a shift with constant operand"); + const APInt &C1Val = C1Node->getAPIntValue(); + auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp, + const APInt *&ShiftAmtVal) { + if (V.getOpcode() != ShiftOpcode || !V.hasOneUse()) + return false; + + ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1)); + if (!ShiftCNode) + return false; + + // Capture the shifted operand and shift amount value. + ShiftOp = V.getOperand(0); + ShiftAmtVal = &ShiftCNode->getAPIntValue(); + + // Shift amount types do not have to match their operand type, so check that + // the constants are the same width. + if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth()) + return false; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits())) + return false; + + return true; + }; + + // Logic ops are commutative, so check each operand for a match. + SDValue X, Y; + const APInt *C0Val; + if (matchFirstShift(LogicOp.getOperand(0), X, C0Val)) + Y = LogicOp.getOperand(1); + else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val)) + Y = LogicOp.getOperand(0); + else + return SDValue(); + + // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) + SDLoc DL(Shift); + EVT VT = Shift->getValueType(0); + EVT ShiftAmtVT = Shift->getOperand(1).getValueType(); + SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT); + SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC); + SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1); + return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2); +} + /// Handle transforms common to the three shifts, when the shift amount is a /// constant. /// We are looking for: (shift being one of shl/sra/srl) /// shift (binop X, C0), C1 /// And want to transform into: /// binop (shift X, C1), (shift C0, C1) -SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { +SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { + assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand"); + // Do not turn a 'not' into a regular xor. if (isBitwiseNot(N->getOperand(0))) return SDValue(); // The inner binop must be one-use, since we want to replace it. - SDNode *LHS = N->getOperand(0).getNode(); - if (!LHS->hasOneUse()) return SDValue(); + SDValue LHS = N->getOperand(0); + if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) + return SDValue(); + + // TODO: This is limited to early combining because it may reveal regressions + // otherwise. But since we just checked a target hook to see if this is + // desirable, that should have filtered out cases where this interferes + // with some other pattern matching. + if (!LegalTypes) + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize // it. - switch (LHS->getOpcode()) { + switch (LHS.getOpcode()) { default: return SDValue(); case ISD::OR: @@ -7112,14 +7164,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { } // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1)); if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant // or is copy/select. Enable this in other cases when figure out it's exactly // profitable. - SDValue BinOpLHSVal = LHS->getOperand(0); + SDValue BinOpLHSVal = LHS.getOperand(0); bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL || BinOpLHSVal.getOpcode() == ISD::SRA || BinOpLHSVal.getOpcode() == ISD::SRL) && @@ -7133,24 +7185,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { if (IsCopyOrSelect && N->hasOneUse()) return SDValue(); - EVT VT = N->getValueType(0); - - if (!TLI.isDesirableToCommuteWithShift(N, Level)) - return SDValue(); - // Fold the constants, shifting the binop RHS by the shift amount. - SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), - N->getValueType(0), - LHS->getOperand(1), N->getOperand(1)); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1), + N->getOperand(1)); assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); - // Create the new shift. - SDValue NewShift = DAG.getNode(N->getOpcode(), - SDLoc(LHS->getOperand(0)), - VT, LHS->getOperand(0), N->getOperand(1)); - - // Create the new binop. - return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); + SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), + N->getOperand(1)); + return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { @@ -7478,7 +7522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } if (N1C && !N1C->isOpaque()) - if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; return SDValue(); @@ -7597,6 +7641,37 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. + // sra (add (shl X, N1C), AddC), N1C --> + // sext (add (trunc X to (width - N1C)), AddC') + if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + N0.getOperand(0).getOpcode() == ISD::SHL && + N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { + if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { + SDValue Shl = N0.getOperand(0); + // Determine what the truncate's type would be and ask the target if that + // is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + + // TODO: The simple type check probably belongs in the default hook + // implementation and/or target-specific overrides (because + // non-simple types likely require masking when legalized), but that + // restriction may conflict with other transforms. + if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). + trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + return DAG.getSExtOrTrunc(Add, DL, VT); + } + } + } + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -7638,7 +7713,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRA = visitShiftByConstant(N, N1C)) + if (SDValue NewSRA = visitShiftByConstant(N)) return NewSRA; return SDValue(); @@ -7819,7 +7894,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(N, 0); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRL = visitShiftByConstant(N, N1C)) + if (SDValue NewSRL = visitShiftByConstant(N)) return NewSRL; // Attempt to convert a srl of a load into a narrower zero-extending load. @@ -8100,6 +8175,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +/// If a (v)select has a condition value that is a sign-bit test, try to smear +/// the condition operand sign-bit across the value width and use it as a mask. +static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + SDValue C1 = N->getOperand(1); + SDValue C2 = N->getOperand(2); + assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && + "Expected select-of-constants"); + + EVT VT = N->getValueType(0); + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || + VT != Cond.getOperand(0).getValueType()) + return SDValue(); + + // The inverted-condition + commuted-select variants of these patterns are + // canonicalized to these forms in IR. + SDValue X = Cond.getOperand(0); + SDValue CondC = Cond.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && + isAllOnesOrAllOnesSplat(C2)) { + // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::OR, DL, VT, Sra, C1); + } + if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { + // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::AND, DL, VT, Sra, C1); + } + return SDValue(); +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8148,22 +8260,36 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { return Cond; } - // For any constants that differ by 1, we can transform the select into an - // extend and add. Use a target hook because some targets may prefer to - // transform in the other direction. + // Use a target hook because some targets may prefer to transform in the + // other direction. if (TLI.convertSelectOfConstantsToMath(VT)) { - if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { + // For any constants that differ by 1, we can transform the select into an + // extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + if (C1Val - 1 == C2Val) { // select Cond, C1, C1-1 --> add (zext Cond), C1-1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } - if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { + if (C1Val + 1 == C2Val) { // select Cond, C1, C1+1 --> add (sext Cond), C1+1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; } return SDValue(); @@ -8381,23 +8507,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } -static -std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { - SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the inputs. - SDValue Lo, Hi, LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); - - return std::make_pair(Lo, Hi); -} - // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { @@ -8456,7 +8565,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue Mask = MSC->getMask(); - SDValue Data = MSC->getValue(); SDValue Chain = MSC->getChain(); SDLoc DL(N); @@ -8464,123 +8572,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSCATTER data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != - TargetLowering::TypeSplitVector) - return SDValue(); - SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); - - EVT MemoryVT = MSC->getMemoryVT(); - unsigned Alignment = MSC->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - SDValue Scale = MSC->getScale(); - SDValue BasePtr = MSC->getBasePtr(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MSC->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MSC->getAAInfo(), MSC->getRanges()); - - SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; - SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), - DataLo.getValueType(), DL, OpsLo, MMO); - - // The order of the Scatter operation after split is well defined. The "Hi" - // part comes after the "Lo". So these two operations should be chained one - // after another. - SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale }; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); + return SDValue(); } SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); - SDValue Data = MST->getValue(); SDValue Chain = MST->getChain(); - EVT VT = Data.getValueType(); SDLoc DL(N); // Zap masked stores with a zero mask. if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSTORE data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue Ptr = MST->getBasePtr(); - - EVT MemoryVT = MST->getMemoryVT(); - unsigned Alignment = MST->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MST->getAAInfo(), MST->getRanges()); - - Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MST->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MST->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, - MST->getAAInfo(), MST->getRanges()); - - Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - } return SDValue(); } @@ -8593,76 +8597,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MGT->getPassThru(), MGT->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MGATHER result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MGT->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); - - SDValue Chain = MGT->getChain(); - EVT MemoryVT = MGT->getMemoryVT(); - unsigned Alignment = MGT->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue Scale = MGT->getScale(); - SDValue BasePtr = MGT->getBasePtr(); - SDValue Index = MGT->getIndex(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), MGT->getRanges()); - - SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale }; - Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, - MMO); - - SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale }; - Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, - MMO); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); - - SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { GatherRes, Chain }; - return DAG.getMergeValues(RetOps, DL); + return SDValue(); } SDValue DAGCombiner::visitMLOAD(SDNode *N) { @@ -8674,76 +8609,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MLOAD result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MLD->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); - - SDValue Chain = MLD->getChain(); - SDValue Ptr = MLD->getBasePtr(); - EVT MemoryVT = MLD->getMemoryVT(); - unsigned Alignment = MLD->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); - - Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MLD->isExpandingLoad()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, - MLD->getAAInfo(), MLD->getRanges()); - - Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); - - SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { LoadRes, Chain }; - return DAG.getMergeValues(RetOps, DL); - } return SDValue(); } @@ -8791,6 +8656,18 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); } + // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C) + APInt Pow2C; + if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() && + isNullOrNullSplat(N2)) { + SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + // The general case for select-of-constants: // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 // ...but that only makes sense if a vselect is slower than 2 logic ops, so @@ -8832,13 +8709,12 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); if (isAbs) { - EVT VT = LHS.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) return DAG.getNode(ISD::ABS, DL, VT, LHS); - SDValue Shift = DAG.getNode( - ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); + SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + DL, getShiftAmountTy(VT))); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -8851,10 +8727,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), - N0.getOperand(1), TLI)) { - if (SDValue FMinMax = combineMinNumMaxNum( - DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) return FMinMax; } @@ -9209,8 +9084,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || - !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || - !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + !N0.hasOneUse() || !LN0->isSimple() || + !DstVT.isVector() || !DstVT.isPow2VectorType() || + !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) return SDValue(); SmallVector<SDNode *, 4> SetCCs; @@ -9411,7 +9287,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && + if ((LegalOperations || !LN0->isSimple() || + VT.isVector()) && !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) return SDValue(); @@ -9436,7 +9313,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()) || ((LegalOperations || VT.isVector() || - cast<LoadSDNode>(N0)->isVolatile()) && + !cast<LoadSDNode>(N0)->isSimple()) && !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) return {}; @@ -9468,6 +9345,35 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, return SDValue(N, 0); // Return N so it doesn't get rechecked! } +static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, + const TargetLowering &TLI, EVT VT, + SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, + ISD::NodeType ExtOpc) { + if (!N0.hasOneUse()) + return SDValue(); + + MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0); + if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) + return SDValue(); + + if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SDLoc dl(Ld); + SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); + SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), + Ld->getBasePtr(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), + Ld->getMemOperand(), ExtLoadType, + Ld->isExpandingLoad()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); + return NewLoad; +} + static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations) { assert((N->getOpcode() == ISD::SIGN_EXTEND || @@ -9568,6 +9474,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { ISD::SEXTLOAD, ISD::SIGN_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD, + ISD::SIGN_EXTEND)) + return foldedExt; + // fold (sext (load x)) to multiple smaller sextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -9856,6 +9767,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD, + ISD::ZERO_EXTEND)) + return foldedExt; + // fold (zext (load x)) to multiple smaller zextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -10340,7 +10256,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) + // Reducing the width of a volatile load is illegal. For atomics, we may be + // able to reduce the width provided we never widen again. (see D66309) + if (!LN0->isSimple() || + !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); auto AdjustBigEndianShift = [&](unsigned ShAmt) { @@ -10369,11 +10288,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, + Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else - Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, + Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); @@ -10392,7 +10311,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. - SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, DL, VT); else @@ -10513,7 +10431,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() && + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() && N0.hasOneUse()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -10530,7 +10448,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) && TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -10757,7 +10675,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (!LN0->isVolatile() && + if (LN0->isSimple() && LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), @@ -11051,7 +10969,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -11237,15 +11155,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { for (int i = 0; i != MaskScale; ++i) NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); - bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - if (!LegalMask) { - std::swap(SV0, SV1); - ShuffleVectorSDNode::commuteMask(NewMask); - LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; } return SDValue(); @@ -11998,7 +11911,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) - if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) + if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -12006,17 +11919,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N1, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -12056,7 +11969,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc - if ((Options.UnsafeFPMath || + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 @@ -12175,7 +12088,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Options.UnsafeFPMath || + if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { return N0; } @@ -12195,16 +12108,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } } - if ((Options.UnsafeFPMath || - (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) - && N1.getOpcode() == ISD::FADD) { + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y if (N0 == N1->getOperand(0)) return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); @@ -12214,10 +12127,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return DAG.getNode(ISD::FADD, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode( + ISD::FADD, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -12228,6 +12141,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } +/// Return true if both inputs are at least as cheap in negated form and at +/// least one input is strictly cheaper in negated form. +bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { + if (char LHSNeg = + TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize)) + if (char RHSNeg = + TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize)) + // Both negated operands are at least as cheap as their counterparts. + // Check to see if at least one is cheaper negated. + if (LHSNeg == 2 || RHSNeg == 2) + return true; + + return false; +} + SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12254,10 +12182,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); - // fold (fmul A, 1.0) -> A - if (N1CFP && N1CFP->isExactlyValue(1.0)) - return N0; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -12302,21 +12226,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N0); - // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, DL, VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } + // -N0 * -N1 --> N0 * N1 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) @@ -12395,6 +12311,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } + // (-N0 * -N1) + N2 --> (N0 * N1) + N2 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); + } + if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; @@ -12602,9 +12527,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), @@ -12645,28 +12569,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { - AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } + if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) + return RV; } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } - } + if (isCheaperToUseNegatedFPOps(N0, N1)) + return DAG.getNode( + ISD::FDIV, SDLoc(N), VT, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); return SDValue(); } @@ -13112,22 +13024,6 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - - // fold (fp_round_inreg c1fp) -> c1fp - if (N0CFP && isTypeLegal(EVT)) { - SDLoc DL(N); - SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); - return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); - } - - return SDValue(); -} - SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -13236,9 +13132,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); - if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, ForCodeSize)) - return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. @@ -14004,11 +13899,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { } SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { - if (OptLevel == CodeGenOpt::None || LD->isVolatile()) + if (OptLevel == CodeGenOpt::None || !LD->isSimple()) return SDValue(); SDValue Chain = LD->getOperand(0); StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode()); - if (!ST || ST->isVolatile()) + // TODO: Relax this restriction for unordered atomics (see D66309) + if (!ST || !ST->isSimple()) return SDValue(); EVT LDType = LD->getValueType(0); @@ -14107,7 +14003,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). - if (!LD->isVolatile()) { + // TODO: Allow this for unordered atomics (see D66309) + if (LD->isSimple()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. if (!N->hasAnyUseOfValue(0)) { @@ -14241,7 +14138,7 @@ struct LoadedSlice { /// Helper structure used to compute the cost of a slice. struct Cost { /// Are we optimizing for code size. - bool ForCodeSize; + bool ForCodeSize = false; /// Various cost. unsigned Loads = 0; @@ -14250,10 +14147,10 @@ struct LoadedSlice { unsigned ZExts = 0; unsigned Shift = 0; - Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} + explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {} /// Get the cost of one isolated slice. - Cost(const LoadedSlice &LS, bool ForCodeSize = false) + Cost(const LoadedSlice &LS, bool ForCodeSize) : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); EVT LoadedType = LS.getLoadedType(); @@ -14678,7 +14575,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { return false; LoadSDNode *LD = cast<LoadSDNode>(N); - if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + if (!LD->isSimple() || !ISD::isNormalLoad(LD) || !LD->getValueType(0).isInteger()) return false; @@ -14829,13 +14726,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { else if (Chain->getOpcode() == ISD::TokenFactor && SDValue(LD, 1).hasOneUse()) { // LD has only 1 chain use so they are no indirect dependencies. - bool isOk = false; - for (const SDValue &ChainOp : Chain->op_values()) - if (ChainOp.getNode() == LD) { - isOk = true; - break; - } - if (!isOk) + if (!LD->isOperandOf(Chain.getNode())) return Result; } else return Result; // Fail. @@ -14848,7 +14739,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. -static SDNode * +static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC) { @@ -14860,14 +14751,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); - if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; + if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue(); // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type - // legalization. - MVT VT = MVT::getIntegerVT(NumBytes*8); + // legalization (and the target doesn't explicitly think this is a bad idea). + MVT VT = MVT::getIntegerVT(NumBytes * 8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DC->isTypeLegal(VT)) - return nullptr; + return SDValue(); + if (St->getMemOperand() && + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *St->getMemOperand())) + return SDValue(); // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. @@ -14901,8 +14797,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), NewAlign) - .getNode(); + St->getPointerInfo().getWithOffset(StOffset), NewAlign); } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and @@ -14911,7 +14806,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, /// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); - if (ST->isVolatile()) + if (!ST->isSimple()) return SDValue(); SDValue Chain = ST->getChain(); @@ -14933,16 +14828,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { std::pair<unsigned, unsigned> MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(1), ST,this)) - return SDValue(NewST, 0); + return NewST; // Or is commutative, so try swapping X and Y. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(0), ST,this)) - return SDValue(NewST, 0); + return NewST; } if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || @@ -15367,14 +15262,16 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) return; - // The memory operands must not be volatile/indexed. - if (Ld->isVolatile() || Ld->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Ld->isSimple() || Ld->isIndexed()) return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { - // The memory operands must not be volatile/indexed. - if (Other->isVolatile() || Other->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Other->isSimple() || Other->isIndexed()) return false; // Don't mix temporal stores with non-temporal stores. if (St->isNonTemporal() != Other->isNonTemporal()) @@ -15394,8 +15291,10 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!OtherLd->hasNUsesOfValue(1, 0)) return false; - // The memory operands must not be volatile/indexed. - if (OtherLd->isVolatile() || OtherLd->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!OtherLd->isSimple() || + OtherLd->isIndexed()) return false; // Don't mix temporal loads with non-temporal loads. if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal()) @@ -15425,6 +15324,18 @@ void DAGCombiner::getStoreMergeCandidates( return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; + // Check if the pair of StoreNode and the RootNode already bail out many + // times which is over the limit in dependence check. + auto OverLimitInDependenceCheck = [&](SDNode *StoreNode, + SDNode *RootNode) -> bool { + auto RootCount = StoreRootCountMap.find(StoreNode); + if (RootCount != StoreRootCountMap.end() && + RootCount->second.first == RootNode && + RootCount->second.second > StoreMergeDependenceLimit) + return true; + return false; + }; + // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down // through all children. For instance we will find Store{1,2,3} if @@ -15454,7 +15365,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } else @@ -15464,7 +15376,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } @@ -15522,13 +15435,24 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, - Max)) + Max)) { + // If the searching bail out, record the StoreNode and RootNode in the + // StoreRootCountMap. If we have seen the pair many times over a limit, + // we won't add the StoreNode into StoreNodes set again. + if (Visited.size() >= Max) { + auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode]; + if (RootCount.first == RootNode) + RootCount.second++; + else + RootCount = {RootNode, 1}; + } return false; + } return true; } bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) return false; EVT MemVT = St->getMemoryVT(); @@ -15588,7 +15512,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { bool RV = false; while (StoreNodes.size() > 1) { - unsigned StartIdx = 0; + size_t StartIdx = 0; while ((StartIdx + 1 < StoreNodes.size()) && StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != StoreNodes[StartIdx + 1].OffsetFromBase) @@ -16113,7 +16037,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { case MVT::ppcf128: return SDValue(); case MVT::f32: - if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). @@ -16125,7 +16049,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { return SDValue(); case MVT::f64: if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && - !ST->isVolatile()) || + ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -16134,7 +16058,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { Ptr, ST->getMemOperand()); } - if (!ST->isVolatile() && + if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the @@ -16181,7 +16105,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - if (((!LegalOperations && !ST->isVolatile()) || + // TODO: May be able to relax for unordered atomics (see D66309) + if (((!LegalOperations && ST->isSimple()) || TLI.isOperationLegal(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, DAG, *ST->getMemOperand())) { @@ -16242,9 +16167,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits); AddToWorklist(Value.getNode()); - if (Shorter) + if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits)) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); @@ -16263,9 +16187,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If this is a load followed by a store to the same location, then the store // is dead/noop. + // TODO: Can relax for unordered atomics (see D66309) if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && - ST->isUnindexed() && !ST->isVolatile() && + ST->isUnindexed() && ST->isSimple() && // There can't be any side effects between the load and store, such as // a call or store. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { @@ -16274,9 +16199,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // TODO: Can relax for unordered atomics (see D66309) if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { - if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && - !ST1->isVolatile()) { + if (ST->isUnindexed() && ST->isSimple() && + ST1->isUnindexed() && ST1->isSimple()) { if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT()) { // If this is a store followed by a store with the same value to the @@ -16405,7 +16331,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { break; case ISD::STORE: { StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain); - if (ST->isVolatile() || ST->isIndexed()) + // TODO: Can relax for unordered atomics (see D66309) + if (!ST->isSimple() || ST->isIndexed()) continue; const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, @@ -16456,6 +16383,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { if (OptLevel == CodeGenOpt::None) return SDValue(); + // Can't change the number of memory accesses for a volatile store or break + // atomicity for an atomic one. + if (!ST->isSimple()) + return SDValue(); + SDValue Val = ST->getValue(); SDLoc DL(ST); @@ -16531,12 +16463,52 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { } /// Convert a disguised subvector insertion into a shuffle: -/// insert_vector_elt V, (bitcast X from vector type), IdxC --> -/// bitcast(shuffle (bitcast V), (extended X), Mask) -/// Note: We do not use an insert_subvector node because that requires a legal -/// subvector type. SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { SDValue InsertVal = N->getOperand(1); + SDValue Vec = N->getOperand(0); + + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) + // --> (vector_shuffle X, Y) + if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && + InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(InsertVal.getOperand(1))) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode()); + ArrayRef<int> Mask = SVN->getMask(); + + SDValue X = Vec.getOperand(0); + SDValue Y = Vec.getOperand(1); + + // Vec's operand 0 is using indices from 0 to N-1 and + // operand 1 from N to 2N - 1, where N is the number of + // elements in the vectors. + int XOffset = -1; + if (InsertVal.getOperand(0) == X) { + XOffset = 0; + } else if (InsertVal.getOperand(0) == Y) { + XOffset = X.getValueType().getVectorNumElements(); + } + + if (XOffset != -1) { + SmallVector<int, 16> NewMask(Mask.begin(), Mask.end()); + + auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1)); + NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + assert(NewMask[InsIndex] < + (int)(2 * Vec.getValueType().getVectorNumElements()) && + NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); + + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X, + Y, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; + } + } + + // insert_vector_elt V, (bitcast X from vector type), IdxC --> + // bitcast(shuffle (bitcast V), (extended X), Mask) + // Note: We do not use an insert_subvector node because that requires a + // legal subvector type. if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || !InsertVal.getOperand(0).getValueType().isVector()) return SDValue(); @@ -16674,7 +16646,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { - assert(!OriginalLoad->isVolatile()); + assert(OriginalLoad->isSimple()); EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); @@ -16747,12 +16719,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorklist(EVE); // Since we're explicitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddToWorklist(Load.getNode()); AddUsersToWorklist(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorklist(EVE); + AddToWorklist(Load.getNode()); ++OpsNarrowed; return SDValue(EVE, 0); } @@ -16982,7 +16954,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(VecOp.getNode()) && !Index->hasPredecessor(VecOp.getNode())) { auto *VecLoad = dyn_cast<LoadSDNode>(VecOp); - if (VecLoad && !VecLoad->isVolatile()) + if (VecLoad && VecLoad->isSimple()) return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); } @@ -17041,7 +17013,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Make sure we found a non-volatile load and the extractelement is // the only use. - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. @@ -17344,17 +17316,16 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { // the shuffle mask with -1. } - // Turn this into a shuffle with zero if that's legal. - EVT VecVT = Extract.getOperand(0).getValueType(); - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT)) - return SDValue(); - // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... --> // bitcast (shuffle V, ZeroVec, VectorMask) SDLoc DL(BV); + EVT VecVT = Extract.getOperand(0).getValueType(); SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); - SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, - ShufMask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0), + ZeroVec, ShufMask, DAG); + if (!Shuf) + return SDValue(); return DAG.getBitcast(VT, Shuf); } @@ -17656,6 +17627,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } + // A splat of a single element is a SPLAT_VECTOR if supported on the target. + if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) + if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) { + assert(!V.isUndef() && "Splat of undef should have been handled earlier"); + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); + } + // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -17829,11 +17807,9 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { } } - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), - DAG.getBitcast(VT, SV1), Mask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask, DAG); } SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { @@ -17853,6 +17829,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); + // If the input is a concat_vectors, just make a larger concat by padding + // with smaller undefs. + if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) { + unsigned NumOps = N->getNumOperands() * In.getNumOperands(); + SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end()); + Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType())); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); + } + SDValue Scalar = peekThroughOneUseBitcasts(In); // concat_vectors(scalar_to_vector(scalar), undef) -> @@ -18002,6 +17987,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find +// if the subvector can be sourced for free. +static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { + if (V.getOpcode() == ISD::INSERT_SUBVECTOR && + V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) { + return V.getOperand(1); + } + auto *IndexC = dyn_cast<ConstantSDNode>(Index); + if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && + V.getOperand(0).getValueType() == SubVT && + (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) { + uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements(); + return V.getOperand(SubIdx); + } + return SDValue(); +} + static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -18010,39 +18012,29 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1) return SDValue(); + EVT VecVT = BinOp.getValueType(); SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1); - SDValue Index = Extract->getOperand(1); - EVT VT = Extract->getValueType(0); + if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType()) + return SDValue(); - // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find - // if the source subvector is the same type as the one being extracted. - auto GetSubVector = [VT, Index](SDValue V) -> SDValue { - if (V.getOpcode() == ISD::INSERT_SUBVECTOR && - V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) { - return V.getOperand(1); - } - auto *IndexC = dyn_cast<ConstantSDNode>(Index); - if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && - V.getOperand(0).getValueType() == VT && - (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) { - uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements(); - return V.getOperand(SubIdx); - } + SDValue Index = Extract->getOperand(1); + EVT SubVT = Extract->getValueType(0); + if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT)) return SDValue(); - }; - SDValue Sub0 = GetSubVector(Bop0); - SDValue Sub1 = GetSubVector(Bop1); + + SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT); + SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT); // TODO: We could handle the case where only 1 operand is being inserted by // creating an extract of the other operand, but that requires checking // number of uses and/or costs. - if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT)) + if (!Sub0 || !Sub1) return SDValue(); // We are inserting both operands of the wide binop only to extract back // to the narrow vector size. Eliminate all of the insert/extract: // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y - return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1, + return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1, BinOp->getFlags()); } @@ -18174,7 +18166,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); - if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx) + if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || + !ExtIdx) return SDValue(); // Allow targets to opt-out. @@ -18878,7 +18871,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { int SplatIndex = SVN->getSplatIndex(); - if (TLI.isExtractVecEltCheap(VT, SplatIndex) && + if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) && TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) { // splat (vector_bo L, R), Index --> // splat (scalar_bo (extelt L, Index), (extelt R, Index)) @@ -19153,22 +19146,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SV1 = DAG.getUNDEF(VT); // Avoid introducing shuffles with illegal mask. - if (!TLI.isShuffleMaskLegal(Mask, VT)) { - ShuffleVectorSDNode::commuteMask(Mask); - - if (!TLI.isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) - std::swap(SV0, SV1); - } - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG); } if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) @@ -19191,35 +19175,35 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); int Elt = C0->getZExtValue(); NewMask[0] = Elt; - SDValue Val; // If we have an implict truncate do truncate here as long as it's legal. // if it's not legal, this should if (VT.getScalarType() != InVal.getValueType() && InVal.getValueType().isScalarInteger() && isTypeLegal(VT.getScalarType())) { - Val = + SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); } if (VT.getScalarType() == InVecT.getScalarType() && - VT.getVectorNumElements() <= InVecT.getVectorNumElements() && - TLI.isShuffleMaskLegal(NewMask, VT)) { - Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, - DAG.getUNDEF(InVecT), NewMask); - // If the initial vector is the correct size this shuffle is a - // valid result. - if (VT == InVecT) - return Val; - // If not we must truncate the vector. - if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); - EVT SubVT = - EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), - VT.getVectorNumElements()); - Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, - ZeroIdx); - return Val; + VT.getVectorNumElements() <= InVecT.getVectorNumElements()) { + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec, + DAG.getUNDEF(InVecT), NewMask, DAG); + if (LegalShuffle) { + // If the initial vector is the correct size this shuffle is a + // valid result. + if (VT == InVecT) + return LegalShuffle; + // If not we must truncate the vector. + if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); + EVT SubVT = + EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), + VT.getVectorNumElements()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, + LegalShuffle, ZeroIdx); + } } } } @@ -19627,6 +19611,39 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } + // Make sure all but the first op are undef or constant. + auto ConcatWithConstantOrUndef = [](SDValue Concat) { + return Concat.getOpcode() == ISD::CONCAT_VECTORS && + std::all_of(std::next(Concat->op_begin()), Concat->op_end(), + [](const SDValue &Op) { + return Op.isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); + }); + }; + + // The following pattern is likely to emerge with vector reduction ops. Moving + // the binary operation ahead of the concat may allow using a narrower vector + // instruction that has better performance than the wide version of the op: + // VBinOp (concat X, undef/constant), (concat Y, undef/constant) --> + // concat (VBinOp X, Y), VecC + if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) && + (LHS.hasOneUse() || RHS.hasOneUse())) { + EVT NarrowVT = LHS.getOperand(0).getValueType(); + if (NarrowVT == RHS.getOperand(0).getValueType() && + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + SDLoc DL(N); + unsigned NumOperands = LHS.getNumOperands(); + SmallVector<SDValue, 4> ConcatOps; + for (unsigned i = 0; i != NumOperands; ++i) { + // This constant fold for operands 1 and up. + ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i), + RHS.getOperand(i))); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + } + } + if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) return V; @@ -19723,7 +19740,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // Token chains must be identical. if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. - LLD->isVolatile() || RLD->isVolatile() || + // Be conservative for atomics for the moment + // TODO: This does appear to be legal for unordered atomics (see D66309) + !LLD->isSimple() || !RLD->isSimple() || // FIXME: If either is a pre/post inc/dec load, // we'd need to split out the address adjustment. LLD->isIndexed() || RLD->isIndexed() || @@ -19928,7 +19947,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { - if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint())) + if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType())) return SDValue(); // If we are before legalize types, we want the other legalization to happen @@ -20016,8 +20035,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // when the condition can be materialized as an all-ones register. Any // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. + // TODO: The operation legality checks could be loosened to include "custom", + // but that may cause regressions for targets that do not have shift + // instructions. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) && + TLI.isOperationLegal(ISD::SHL, VT) && + TLI.isOperationLegal(ISD::SRA, VT)) { SDValue AndLHS = N0->getOperand(0); auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { @@ -20209,7 +20233,10 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { +/// For the last iteration, put numerator N into it to gain more precision: +/// Result = N X_i + X_i (N - N A X_i) +SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, + SDNodeFlags Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -20230,25 +20257,39 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { AddToWorklist(Est.getNode()); + SDLoc DL(Op); if (Iterations) { - SDLoc DL(Op); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - // Newton iterations: Est = Est + Est (1 - Arg * Est) + // Newton iterations: Est = Est + Est (N - Arg * Est) + // If this is the last iteration, also multiply by the numerator. for (int i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); + SDValue MulEst = Est; + + if (i == Iterations - 1) { + MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags); + AddToWorklist(MulEst.getNode()); + } + + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, + (i == Iterations - 1 ? N : FPOne), NewEst, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); + Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags); AddToWorklist(Est.getNode()); } + } else { + // If no iterations are available, multiply with N. + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags); + AddToWorklist(Est.getNode()); } + return Est; } @@ -20271,31 +20312,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); - AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); - AddToWorklist(Est.getNode()); } // If non-reciprocal square root is requested, multiply the result by Arg. - if (!Reciprocal) { + if (!Reciprocal) Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); - AddToWorklist(Est.getNode()); - } return Est; } @@ -20321,13 +20350,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - AddToWorklist(AE.getNode()); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - AddToWorklist(AEE.getNode()); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); - AddToWorklist(RHS.getNode()); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -20340,10 +20364,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // SQRT: LHS = (A * E) * -0.5 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); } - AddToWorklist(LHS.getNode()); Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); - AddToWorklist(Est.getNode()); } return Est; @@ -20400,16 +20422,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); - AddToWorklist(Fabs.getNode()); - AddToWorklist(IsDenorm.getNode()); - AddToWorklist(Est.getNode()); } else { // X == 0.0 ? 0.0 : Est SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); - AddToWorklist(IsZero.getNode()); - AddToWorklist(Est.getNode()); } } } @@ -20432,6 +20449,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { struct MemUseCharacteristics { bool IsVolatile; + bool IsAtomic; SDValue BasePtr; int64_t Offset; Optional<int64_t> NumBytes; @@ -20447,18 +20465,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; - return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/, + return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), + Offset /*base offset*/, Optional<int64_t>(LSN->getMemoryVT().getStoreSize()), LSN->getMemOperand()}; } if (const auto *LN = cast<LifetimeSDNode>(N)) - return {false /*isVolatile*/, LN->getOperand(1), + return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, (LN->hasOffset()) ? Optional<int64_t>(LN->getSize()) : Optional<int64_t>(), (MachineMemOperand *)nullptr}; // Default. - return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/, + return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), + (int64_t)0 /*offset*/, Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr}; }; @@ -20474,6 +20494,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { if (MUC0.IsVolatile && MUC1.IsVolatile) return true; + // Be conservative about atomics for the moment + // TODO: This is way overconservative for unordered atomics (see D66309) + if (MUC0.IsAtomic && MUC1.IsAtomic) + return true; + if (MUC0.MMO && MUC1.MMO) { if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) @@ -20555,7 +20580,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. - const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile(); + // TODO: relax aliasing for unordered atomics (see D66309) + const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple(); // Starting off. Chains.push_back(OriginalChain); @@ -20571,8 +20597,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LOAD: case ISD::STORE: { // Get alias information for C. + // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa<LoadSDNode>(C.getNode()) && - !cast<LSBaseSDNode>(C.getNode())->isVolatile(); + cast<LSBaseSDNode>(C.getNode())->isSimple(); if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); @@ -20727,7 +20754,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; - if (Chain->isVolatile() || Chain->isIndexed()) + // TODO: Relax for unordered atomics (see D66309) + if (!Chain->isSimple() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. @@ -20795,11 +20823,11 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps); CombineTo(St, TF); - AddToWorklist(STChain); - // Add TF operands worklist in reverse order. - for (auto I = TF->getNumOperands(); I;) - AddToWorklist(TF->getOperand(--I).getNode()); + // Add TF and its operands to the worklist. AddToWorklist(TF.getNode()); + for (const SDValue &Op : TF->ops()) + AddToWorklist(Op.getNode()); + AddToWorklist(STChain); return true; } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 22c23ba877e8..6d7260d7aee5 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -174,7 +174,7 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) { if (RegDef) return 0; RegDef = MO.getReg(); - } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + } else if (Register::isVirtualRegister(MO.getReg())) { // This is another use of a vreg. Don't try to sink it. return 0; } @@ -1213,14 +1213,13 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { if (!FrameAlign) FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); Flags.setByValSize(FrameSize); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); CLI.OutVals.push_back(Arg.Val); CLI.OutFlags.push_back(Flags); @@ -1237,8 +1236,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); // Set labels for heapallocsite call. - if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) { - MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); + if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) { + const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); MF->addCodeViewHeapAllocSite(CLI.Call, MD); } @@ -1303,6 +1302,7 @@ bool FastISel::selectCall(const User *I) { ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) @@ -1388,9 +1388,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { "Expected inlined-at fields to agree"); // A dbg.declare describes the address of a source variable, so lower it // into an indirect DBG_VALUE. + auto *Expr = DI->getExpression(); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - *Op, DI->getVariable(), DI->getExpression()); + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false, + *Op, DI->getVariable(), Expr); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1414,19 +1416,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { @@ -1453,24 +1455,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); return true; } - case Intrinsic::objectsize: { - ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1)); - unsigned long long Res = CI->isZero() ? -1ULL : 0; - Constant *ResCI = ConstantInt::get(II->getType(), Res); - unsigned ResultReg = getRegForValue(ResCI); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } - case Intrinsic::is_constant: { - Constant *ResCI = ConstantInt::get(II->getType(), 0); - unsigned ResultReg = getRegForValue(ResCI); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); + + case Intrinsic::is_constant: + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::expect: { @@ -1677,11 +1667,11 @@ bool FastISel::selectInstruction(const Instruction *I) { /// (fall-through) successor, and update the CFG. void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, const DebugLoc &DbgLoc) { - if (FuncInfo.MBB->getBasicBlock()->size() > 1 && + if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // For more accurate line information if this is the only instruction - // in the block then emit it, otherwise we have the unconditional - // fall-through case, which needs no instructions. + // For more accurate line information if this is the only non-debug + // instruction in the block then emit it, otherwise we have the + // unconditional fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr, @@ -2028,7 +2018,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, unsigned OpNum) { - if (TargetRegisterInfo::isVirtualRegister(Op)) { + if (Register::isVirtualRegister(Op)) { const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); if (!MRI.constrainRegClass(Op, RegClass)) { @@ -2236,7 +2226,7 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - assert(TargetRegisterInfo::isVirtualRegister(Op0) && + assert(Register::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); @@ -2417,10 +2407,9 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { } else return nullptr; - bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr; - bool IsDereferenceable = - I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr; + bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal); + bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load); + bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable); const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); AAMDNodes AAInfo; diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 8b1759246b76..cf6711adad48 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -424,7 +425,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { unsigned BitWidth = IntVT.getSizeInBits(); unsigned DestReg = ValueMap[PN]; - if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + if (!Register::isVirtualRegister(DestReg)) return; LiveOutRegInfo.grow(DestReg); LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; @@ -445,7 +446,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); unsigned SrcReg = ValueMap[V]; - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; } @@ -480,7 +481,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when " "its CopyToReg node was created."); unsigned SrcReg = ValueMap[V]; - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 9bc07d35dfc5..c5095995ec2e 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -71,7 +71,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) - if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) + if (Register::isPhysicalRegister(RN->getReg())) continue; NumImpUses = N - I; break; @@ -86,7 +86,7 @@ void InstrEmitter:: EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned VRBase = 0; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(SrcReg)) { // Just use the input register directly! SDValue Op(Node, ResNo); if (IsClone) @@ -114,7 +114,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { VRBase = DestReg; Match = false; } else if (DestReg != SrcReg) @@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); + TRI->getCommonSubClass(UseRC, RC); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) @@ -219,7 +219,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); + assert(Register::isPhysicalRegister(VRBase)); MIB.addReg(VRBase, RegState::Define); } @@ -229,7 +229,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; @@ -272,7 +272,7 @@ unsigned InstrEmitter::getVR(SDValue Op, // does not include operand register class info. const TargetRegisterClass *RC = TLI->getRegClassFor( Op.getSimpleValueType(), Op.getNode()->isDivergent()); - unsigned VReg = MRI->createVirtualRegister(RC); + Register VReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; @@ -319,7 +319,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, if (!ConstrainedRC) { OpRC = TRI->getAllocatableClass(OpRC); assert(OpRC && "Constraints cannot be fulfilled for allocation"); - unsigned NewVReg = MRI->createVirtualRegister(OpRC); + Register NewVReg = MRI->createVirtualRegister(OpRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; @@ -385,9 +385,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, (IIRC && TRI->isDivergentRegClass(IIRC))) : nullptr; - if (OpRC && IIRC && OpRC != IIRC && - TargetRegisterInfo::isVirtualRegister(VReg)) { - unsigned NewVReg = MRI->createVirtualRegister(IIRC); + if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) { + Register NewVReg = MRI->createVirtualRegister(IIRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; @@ -465,7 +464,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, // register instead. RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx); assert(RC && "No legal register class for VT supports that SubIdx"); - unsigned NewReg = MRI->createVirtualRegister(RC); + Register NewReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) .addReg(VReg); return NewReg; @@ -485,7 +484,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } @@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, unsigned Reg; MachineInstr *DefMI; RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); - if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + if (R && Register::isPhysicalRegister(R->getReg())) { Reg = R->getReg(); DefMI = nullptr; } else { @@ -529,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) Reg = ConstrainForSubReg(Reg, SubIdx, Node->getOperand(0).getSimpleValueType(), Node->isDivergent(), Node->getDebugLoc()); @@ -541,7 +540,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, MachineInstrBuilder CopyMI = BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) CopyMI.addReg(Reg, 0, SubIdx); else CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); @@ -614,7 +613,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *DstRC = TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); - unsigned NewVReg = MRI->createVirtualRegister(DstRC); + Register NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -631,7 +630,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, bool IsClone, bool IsCloned) { unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); - unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); + Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); unsigned NumOps = Node->getNumOperands(); @@ -649,7 +648,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); // Skip physical registers as they don't have a vreg to get and we'll // insert copies for them in TwoAddressInstructionPass anyway. - if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + if (!R || !Register::isPhysicalRegister(R->getReg())) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); @@ -678,7 +677,7 @@ MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, unsigned> &VRBaseMap) { MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + const DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); @@ -702,12 +701,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // EmitTargetCodeForFrameDebugValue is responsible for allocation. auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()); + if (SD->isIndirect()) - // Push [fi + 0] onto the DIExpression stack. - FrameMI.addImm(0); - else - // Push fi onto the DIExpression stack. - FrameMI.addReg(0); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + FrameMI.addReg(0); return FrameMI.addMetadata(Var).addMetadata(Expr); } // Otherwise, we're going to create an instruction here. @@ -753,9 +751,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) - MIB.addImm(0U); - else - MIB.addReg(0U, RegState::Debug); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + MIB.addReg(0U, RegState::Debug); MIB.addMetadata(Var); MIB.addMetadata(Expr); @@ -928,12 +926,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // // Collect all the used physreg defs, and make sure that any unused physreg // defs are marked as dead. - SmallVector<unsigned, 8> UsedRegs; + SmallVector<Register, 8> UsedRegs; // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = NumDefs; i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - NumDefs]; + Register Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. @@ -960,8 +958,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { - unsigned Reg = R->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = R->getReg(); + if (Reg.isPhysical()) UsedRegs.push_back(Reg); } } @@ -995,8 +993,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::CopyToReg: { unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && - SrcVal.isMachineOpcode() && + if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() && SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Instead building a COPY to that vreg destination, build an // IMPLICIT_DEF instruction instead. @@ -1093,16 +1090,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. - MIB.addReg(Reg, RegState::Define | - getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, + RegState::Define | + getImplRegState(Register::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | - getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, + RegState::Define | RegState::EarlyClobber | + getImplRegState(Register::isPhysicalRegister(Reg))); ECRegs.push_back(Reg); } break; @@ -1136,7 +1135,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // then remove the early-clobber flag. for (unsigned Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { - MachineOperand *MO = + MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, false, TRI); assert(MO && "No def operand for clobbered register?"); MO->setIsEarlyClobber(false); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bf817f00f83d..f9fdf525240f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -161,6 +162,7 @@ private: SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl, SDValue ChainIn); SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSPLAT_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -236,6 +238,16 @@ public: } ReplacedNode(Old); } + + void ReplaceNodeWithValue(SDValue Old, SDValue New) { + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + DAG.ReplaceAllUsesOfValueWith(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); + ReplacedNode(Old.getNode()); + } }; } // end anonymous namespace @@ -493,8 +505,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. EVT MemVT = ST->getMemoryVT(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *ST->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); @@ -608,8 +620,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { EVT MemVT = ST->getMemoryVT(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *ST->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } @@ -669,8 +681,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *LD->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); } break; @@ -894,11 +906,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { if (SrcVT.getScalarType() == MVT::f16) { EVT ISrcVT = SrcVT.changeTypeToInteger(); EVT IDestVT = DestVT.changeTypeToInteger(); - EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); - SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, - Chain, Ptr, ISrcVT, - LD->getMemOperand()); + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain, + Ptr, ISrcVT, LD->getMemOperand()); Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); Chain = Result.getValue(1); break; @@ -959,15 +970,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal || - TLI.isTypeLegal(Node->getValueType(i))) && + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || - TLI.isTypeLegal(Op.getValueType()) || Op.getOpcode() == ISD::TargetConstant || Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"); @@ -1004,7 +1013,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; - case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); @@ -1097,38 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_ROUND: - case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These pseudo-ops are the same as the other STRICT_ ops except + // they are registered with setOperationAction() using the input type + // instead of the output type. Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); + Node->getOperand(1).getValueType()); break; case ISD::SADDSAT: case ISD::UADDSAT: @@ -1139,7 +1124,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: { + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -1650,7 +1636,6 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; - bool NeedSwap = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: @@ -1664,6 +1649,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, return true; } // Swapping operands didn't work. Try inverting the condition. + bool NeedSwap = false; InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { // If inverting the condition is not enough, try swapping operands @@ -2021,6 +2007,14 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } +SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue SplatVal = Node->getOperand(0); + + return DAG.getSplatBuildVector(VT, DL, SplatVal); +} + // Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result @@ -2074,12 +2068,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) { - LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); + LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG)); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); } - LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); + LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG)); return CallInfo.first; } @@ -2167,6 +2161,9 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + RTLIB::Libcall LC; switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2815,6 +2812,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::STRICT_FP_ROUND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2829,6 +2832,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::STRICT_FP_EXTEND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getOperand(1).getValueType(), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2873,19 +2882,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::FP_ROUND_INREG: { - // The only way we can lower this is to turn it into a TRUNCSTORE, - // EXTLOAD pair, targeting a temporary location (a stack slot). - - // NOTE: there is a choice here between constantly creating new stack - // slots and always reusing the same one. We currently always create - // new ones, as reuse may inhibit scheduling. - EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); - Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, - Node->getValueType(0), dl); - Results.push_back(Tmp1); - break; - } case ISD::UINT_TO_FP: if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { Results.push_back(Tmp1); @@ -2901,33 +2897,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; + case ISD::STRICT_FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) { + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n"); + return true; + } + break; case ISD::FP_TO_UINT: - if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG)) + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) Results.push_back(Tmp1); break; - case ISD::LROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, - RTLIB::LROUND_F64, RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128)); - break; - case ISD::LLROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128)); - break; - case ISD::LRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, - RTLIB::LRINT_F64, RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128)); - break; - case ISD::LLRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128)); + case ISD::STRICT_FP_TO_UINT: + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) { + // Relink the chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2); + // Replace the new UINT result. + ReplaceNodeWithValue(SDValue(Node, 0), Tmp1); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n"); + return true; + } break; case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); @@ -3348,6 +3337,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: Results.push_back(TLI.expandFixedPointMul(Node, DAG)); break; case ISD::ADDCARRY: @@ -3662,6 +3652,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::BUILD_VECTOR: Results.push_back(ExpandBUILD_VECTOR(Node)); break; + case ISD::SPLAT_VECTOR: + Results.push_back(ExpandSPLAT_VECTOR(Node)); + break; case ISD::SRA: case ISD::SRL: case ISD::SHL: { @@ -3715,6 +3708,33 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } + if (Results.empty() && Node->isStrictFPOpcode()) { + // FIXME: We were asked to expand a strict floating-point operation, + // but there is currently no expansion implemented that would preserve + // the "strict" properties. For now, we just fall back to the non-strict + // version if that is legal on the target. The actual mutation of the + // operation will happen in SelectionDAGISel::DoInstructionSelection. + switch (Node->getOpcode()) { + default: + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + return true; + break; + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These are registered by the operand type instead of the value + // type. Reflect that here. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()) + == TargetLowering::Legal) + return true; + break; + } + } + // Replace the original node with the legalized result. if (Results.empty()) { LLVM_DEBUG(dbgs() << "Cannot expand node\n"); @@ -3956,6 +3976,34 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); break; + case ISD::LROUND: + case ISD::STRICT_LROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); + break; + case ISD::LLROUND: + case ISD::STRICT_LLROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); + break; + case ISD::LRINT: + case ISD::STRICT_LRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); + break; + case ISD::LLRINT: + case ISD::STRICT_LLRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); + break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b4849b2881e6..72d052473f11 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -42,10 +42,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, } //===----------------------------------------------------------------------===// -// Convert Float Results to Integer for Non-HW-supported Operations. +// Convert Float Results to Integer //===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -58,26 +58,18 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); - case ISD::Register: - case ISD::CopyFromReg: - case ISD::CopyToReg: - assert(isLegalInHWReg(N->getValueType(ResNo)) && - "Unsupported SoftenFloatRes opcode!"); - // Only when isLegalInHWReg, we can skip check of the operands. - R = SDValue(N, ResNo); - break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -89,7 +81,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -102,30 +94,24 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; } - if (R.getNode() && R.getNode() != N) { + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) { + assert(R.getNode() != N); SetSoftenedFloat(SDValue(N, ResNo), R); - // Return true only if the node is changed, assuming that the operands - // are also converted when necessary. - return true; } - - // Otherwise, return false to tell caller to scan operands. - return false; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -144,10 +130,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, we can load better from the constant pool. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) { ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); // In ppcf128, the high 64 bits are always first in memory regardless // of Endianness. LLVM's APFloat representation is not Endian sensitive, @@ -172,19 +155,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, keep the extracted value in register. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FABS can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -200,57 +177,69 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMAX_F32, RTLIB::FMAX_F64, RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -301,98 +290,123 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -400,48 +414,57 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)), GetSoftenedFloat(N->getOperand(2)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[3] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType(), + N->getOperand(2).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FNEG can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - EVT FloatVT = N->getValueType(ResNo); + EVT FloatVT = N->getValueType(0); if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { // Expand Y = FNEG(X) -> Y = X ^ sign mask APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); @@ -452,13 +475,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, false, dl).first; + NVT, Ops, CallOptions, dl).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -485,7 +509,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -493,15 +520,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, - false, SDLoc(N)).first; + CallOptions, SDLoc(N)).first; if (N->getValueType(0) == MVT::f32) return Res32; EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -515,20 +545,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -536,87 +573,111 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { "Unsupported power type!"); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ROUND_F32, RTLIB::ROUND_F64, RTLIB::ROUND_F80, RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -625,17 +686,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { - bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast<LoadSDNode>(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -666,23 +729,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); - if (LegalInHWReg) - return ExtendNode; return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -736,14 +793,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(Signed); + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - Op, Signed, dl).first; + Op, CallOptions, dl).first; } //===----------------------------------------------------------------------===// -// Convert Float Operand to Integer for Non-HW-supported Operations. +// Convert Float Operand to Integer //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -753,8 +814,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: - if (CanSkipSoftenFloatOperand(N, OpNo)) - return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -762,11 +821,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; - case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; - case ISD::FABS: Res = SoftenFloatOp_FABS(N); break; - case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; - case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break; case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; @@ -776,19 +831,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; - case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: - Res = SoftenFloatOp_STORE(N, OpNo); - // Do not try to analyze or soften this node again if the value is - // or can be held in a register. In that case, Res.getNode() should - // be equal to N. - if (Res.getNode() == N && - isLegalInHWReg(N->getOperand(OpNo).getValueType())) - return false; - // Otherwise, we need to reanalyze and lower the new Res nodes. - break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; } // If the result is null, the sub-method took care of registering results etc. @@ -800,60 +845,16 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand expansion"); + "Invalid operand promotion"); ReplaceValueWith(SDValue(N, 0), Res); return false; } -bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { - if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) - return false; - - // When the operand type can be kept in registers there is nothing to do for - // the following opcodes. - switch (N->getOperand(OpNo).getOpcode()) { - case ISD::BITCAST: - case ISD::ConstantFP: - case ISD::CopyFromReg: - case ISD::CopyToReg: - case ISD::FABS: - case ISD::FCOPYSIGN: - case ISD::FNEG: - case ISD::Register: - case ISD::SELECT: - case ISD::SELECT_CC: - return true; - } - - switch (N->getOpcode()) { - case ISD::ConstantFP: // Leaf node. - case ISD::CopyFromReg: // Operand is a register that we know to be left - // unchanged by SoftenFloatResult(). - case ISD::Register: // Leaf node. - return true; - } - return false; -} - SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { - return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), - GetSoftenedFloat(N->getOperand(0))); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) { - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); - - if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) - return SDValue(); - - if (N->getNumOperands() == 3) - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); + SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, - N->getOperand(3)), - 0); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { @@ -868,7 +869,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } @@ -885,7 +889,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -895,7 +902,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(2), N->getOperand(3)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -911,34 +919,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) { - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (Op == N->getOperand(0)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op), 0); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { - SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - - if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) { - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (Op == N->getOperand(0)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op), 0); -} - SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool Signed = N->getOpcode() == ISD::FP_TO_SINT; EVT SVT = N->getOperand(0).getValueType(); @@ -962,23 +942,15 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; // Truncate the result if the libcall returns a larger type. return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } -SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) { - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); - - if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), - 0); -} - SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); @@ -986,7 +958,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1009,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, use it. if (!NewRHS.getNode()) { @@ -1047,13 +1021,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, RTLIB::LROUND_F80, RTLIB::LROUND_F128, RTLIB::LROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { @@ -1061,13 +1038,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, RTLIB::LLROUND_F128, RTLIB::LLROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { @@ -1075,13 +1055,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, RTLIB::LRINT_F80, RTLIB::LRINT_F128, RTLIB::LRINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { @@ -1089,13 +1072,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, RTLIB::LLRINT_F128, RTLIB::LLRINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } //===----------------------------------------------------------------------===// @@ -1267,13 +1253,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1341,13 +1328,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1355,13 +1343,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1470,13 +1459,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1555,7 +1545,9 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1732,7 +1724,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1741,8 +1734,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), - false, dl).first; + CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { @@ -1807,49 +1801,53 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, RTLIB::LROUND_F80, RTLIB::LROUND_F128, RTLIB::LROUND_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, RTLIB::LLROUND_F128, RTLIB::LLROUND_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, RTLIB::LRINT_F80, RTLIB::LRINT_F128, RTLIB::LRINT_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, RTLIB::LLRINT_F128, RTLIB::LLRINT_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } //===----------------------------------------------------------------------===// @@ -2002,6 +2000,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { dbgs() << "\n"); SDValue R = SDValue(); + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); + return; + } + switch (N->getOpcode()) { // These opcodes cannot appear if promotion of FP16 is done in the backend // instead of Clang diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 15ac45c37c66..d5c1b539adbd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -100,6 +100,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_BUILD_VECTOR(N); break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntRes_SPLAT_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntRes_CONCAT_VECTORS(N); break; @@ -112,6 +114,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; @@ -148,9 +152,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break; + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; case ISD::ATOMIC_LOAD: @@ -494,7 +501,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; - SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + if (N->getOpcode() == ISD::STRICT_FP_TO_UINT && + !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) + NewOpc = ISD::STRICT_FP_TO_SINT; + + SDValue Res; + if (N->isStrictFPOpcode()) { + Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, + { N->getOperand(0), N->getOperand(1) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + } else + Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the @@ -503,7 +523,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: // before legalization: fp-to-uint16, 65534. -> 0xfffe // after legalization: fp-to-sint32, 65534. -> 0x0000fffe - return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || + N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } @@ -590,7 +611,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand(), N->getIndexType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -623,48 +644,84 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { - // For promoting iN -> iM, this can be expanded by - // 1. ANY_EXTEND iN to iM - // 2. SHL by M-N - // 3. [US][ADD|SUB]SAT - // 4. L/ASHR by M-N + // If the promoted type is legal, we can convert this to: + // 1. ANY_EXTEND iN to iM + // 2. SHL by M-N + // 3. [US][ADD|SUB]SAT + // 4. L/ASHR by M-N + // Else it is more efficient to convert this to a min and a max + // operation in the higher precision arithmetic. SDLoc dl(N); SDValue Op1 = N->getOperand(0); SDValue Op2 = N->getOperand(1); unsigned OldBits = Op1.getScalarValueSizeInBits(); unsigned Opcode = N->getOpcode(); - unsigned ShiftOp; - switch (Opcode) { - case ISD::SADDSAT: - case ISD::SSUBSAT: - ShiftOp = ISD::SRA; - break; - case ISD::UADDSAT: - case ISD::USUBSAT: - ShiftOp = ISD::SRL; - break; - default: - llvm_unreachable("Expected opcode to be signed or unsigned saturation " - "addition or subtraction"); - } - - SDValue Op1Promoted = GetPromotedInteger(Op1); - SDValue Op2Promoted = GetPromotedInteger(Op2); + SDValue Op1Promoted, Op2Promoted; + if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) { + Op1Promoted = ZExtPromotedInteger(Op1); + Op2Promoted = ZExtPromotedInteger(Op2); + } else { + Op1Promoted = SExtPromotedInteger(Op1); + Op2Promoted = SExtPromotedInteger(Op2); + } EVT PromotedType = Op1Promoted.getValueType(); unsigned NewBits = PromotedType.getScalarSizeInBits(); - unsigned SHLAmount = NewBits - OldBits; - EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); - SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); - Op1Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); - Op2Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); - SDValue Result = - DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); - return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + unsigned ShiftOp; + switch (Opcode) { + case ISD::SADDSAT: + case ISD::SSUBSAT: + ShiftOp = ISD::SRA; + break; + case ISD::UADDSAT: + case ISD::USUBSAT: + ShiftOp = ISD::SRL; + break; + default: + llvm_unreachable("Expected opcode to be signed or unsigned saturation " + "addition or subtraction"); + } + + unsigned SHLAmount = NewBits - OldBits; + EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); + Op2Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + + SDValue Result = + DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + } else { + if (Opcode == ISD::USUBSAT) { + SDValue Max = + DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted); + } + + if (Opcode == ISD::UADDSAT) { + APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Add = + DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); + } + + unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB; + APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits); + APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits); + SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Result = + DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); + Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); + Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); + return Result; + } } SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { @@ -673,6 +730,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { SDValue Op1Promoted, Op2Promoted; bool Signed = N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT; + bool Saturating = + N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT; if (Signed) { Op1Promoted = SExtPromotedInteger(N->getOperand(0)); Op2Promoted = SExtPromotedInteger(N->getOperand(1)); @@ -685,7 +744,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { unsigned DiffSize = PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits(); - bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; if (Saturating) { // Promoting the operand and result values changes the saturation width, // which is extends the values that we clamp to on saturation. This could be @@ -1110,6 +1168,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntOp_SPLAT_VECTOR(N); break; case ISD::VSELECT: case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; @@ -1148,7 +1208,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break; case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; @@ -1339,6 +1400,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { GetPromotedInteger(N->getOperand(0))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) { + // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the + // operand in place. + return SDValue( + DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Only know how to promote the condition!"); SDValue Cond = N->getOperand(0); @@ -1454,8 +1522,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, EVT DataVT = N->getValueType(0); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); } else if (OpNo == 4) { - // Need to sign extend the index since the bits will likely be used. - NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); @@ -1470,8 +1542,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, EVT DataVT = N->getValue().getValueType(); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); } else if (OpNo == 4) { - // Need to sign extend the index since the bits will likely be used. - NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); @@ -1715,7 +1791,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -2473,7 +2550,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2488,7 +2567,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2514,7 +2594,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT RetVT = N->getValueType(0); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2540,7 +2622,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT RetVT = N->getValueType(0); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2743,7 +2827,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, } SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } @@ -2777,38 +2863,53 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); uint64_t Scale = N->getConstantOperandVal(2); - bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; - EVT BoolVT = getSetCCResultType(VT); - SDValue Zero = DAG.getConstant(0, dl, VT); + bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT || + N->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (N->getOpcode() == ISD::SMULFIX || + N->getOpcode() == ISD::SMULFIXSAT); + + // Handle special case when scale is equal to zero. if (!Scale) { SDValue Result; if (!Saturating) { Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); } else { - Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + EVT BoolVT = getSetCCResultType(VT); + unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO; + Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); SDValue Product = Result.getValue(0); SDValue Overflow = Result.getValue(1); - - APInt MinVal = APInt::getSignedMinValue(VTSize); - APInt MaxVal = APInt::getSignedMaxValue(VTSize); - SDValue SatMin = DAG.getConstant(MinVal, dl, VT); - SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); - Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + if (Signed) { + APInt MinVal = APInt::getSignedMinValue(VTSize); + APInt MaxVal = APInt::getSignedMaxValue(VTSize); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + } else { + // For unsigned multiplication, we only need to check the max since we + // can't really overflow towards zero. + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product); + } } SplitInteger(Result, Lo, Hi); return; } + // For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will + // cover for unhandled cases below, while still being valid for UMULFIX[SAT]. + assert(Scale <= VTSize && "Scale can't be larger than the value type size."); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue LL, LH, RL, RH; GetExpandedInteger(LHS, LL, LH); GetExpandedInteger(RHS, RL, RH); SmallVector<SDValue, 4> Result; - bool Signed = (N->getOpcode() == ISD::SMULFIX || - N->getOpcode() == ISD::SMULFIXSAT); unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG, TargetLowering::MulExpansionKind::OnlyLegalOrCustom, @@ -2822,19 +2923,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, "the size of the current value type"); EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - // Shift whole amount by scale. - SDValue ResultLL = Result[0]; - SDValue ResultLH = Result[1]; - SDValue ResultHL = Result[2]; - SDValue ResultHH = Result[3]; - - SDValue SatMax, SatMin; - SDValue NVTZero = DAG.getConstant(0, dl, NVT); - SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); - EVT BoolNVT = getSetCCResultType(NVT); - - // After getting the multplication result in 4 parts, we need to perform a + // After getting the multiplication result in 4 parts, we need to perform a // shift right by the amount of the scale to get the result in that scale. + // // Let's say we multiply 2 64 bit numbers. The resulting value can be held in // 128 bits that are cut into 4 32-bit parts: // @@ -2846,123 +2937,135 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, // // |NVTSize-| // - // The resulting Lo and Hi will only need to be one of these 32-bit parts - // after shifting. + // The resulting Lo and Hi would normally be in LL and LH after the shift. But + // to avoid unneccessary shifting of all 4 parts, we can adjust the shift + // amount and get Lo and Hi using two funnel shifts. Or for the special case + // when Scale is a multiple of NVTSize we can just pick the result without + // shifting. + uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed. + if (Scale % NVTSize) { + SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy); + Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0], + ShiftAmount); + Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1], + ShiftAmount); + } else { + Lo = Result[Part0]; + Hi = Result[Part0 + 1]; + } + + // Unless saturation is requested we are done. The result is in <Hi,Lo>. + if (!Saturating) + return; + + // Can not overflow when there is no integer part. + if (Scale == VTSize) + return; + + // To handle saturation we must check for overflow in the multiplication. + // + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result) + // aren't all zeroes. + // + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result) + // aren't all ones or all zeroes. + // + // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the + // highest bit of HH determines saturation direction in the event of signed + // saturation. + + SDValue ResultHL = Result[2]; + SDValue ResultHH = Result[3]; + + SDValue SatMax, SatMin; + SDValue NVTZero = DAG.getConstant(0, dl, NVT); + SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); + EVT BoolNVT = getSetCCResultType(NVT); + + if (!Signed) { + if (Scale < NVTSize) { + // Overflow happened if ((HH | (HL >> Scale)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale, dl, ShiftTy)); + SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH); + SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE); + } else if (Scale == NVTSize) { + // Overflow happened if (HH != 0). + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE); + } else if (Scale < VTSize) { + // Overflow happened if ((HH >> (Scale - NVTSize)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale - NVTSize, dl, + ShiftTy)); + SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE); + } else + llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT" + "(and saturation can't happen with Scale==VTSize)."); + + Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo); + return; + } + if (Scale < NVTSize) { - // If the scale is less than the size of the VT we expand to, the Hi and - // Lo of the result will be in the first 2 parts of the result after - // shifting right. This only requires shifting by the scale as far as the - // third part in the result (ResultHL). - SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy); - SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy); - Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt); - Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, - DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt)); - Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); - - // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the - // highest bit of HH determines saturation direction in the event of - // saturation. // The number of overflow bits we can check are VTSize - Scale + 1 (we // include the sign bit). If these top bits are > 0, then we overflowed past // the max value. If these top bits are < -1, then we overflowed past the // min value. Otherwise, we did not overflow. - if (Saturating) { - unsigned OverflowBits = VTSize - Scale + 1; - assert(OverflowBits <= VTSize && OverflowBits > NVTSize && - "Extent of overflow bits must start within HL"); - SDValue HLHiMask = DAG.getConstant( - APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); - SDValue HLLoMask = DAG.getConstant( - APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); - - // HH > 0 or HH == 0 && HL > HLLoMask - SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); - SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); - SDValue HLPos = - DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); - SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, - DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos)); - - // HH < -1 or HH == -1 && HL < HLHiMask - SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); - SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); - SDValue HLNeg = - DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); - SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, - DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg)); - } + unsigned OverflowBits = VTSize - Scale + 1; + assert(OverflowBits <= VTSize && OverflowBits > NVTSize && + "Extent of overflow bits must start within HL"); + SDValue HLHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); + SDValue HLLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); + // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT)); + // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT)); } else if (Scale == NVTSize) { - // If the scales are equal, Lo and Hi are ResultLH and Result HL, - // respectively. Avoid shifting to prevent undefined behavior. - Lo = ResultLH; - Hi = ResultHL; - - // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1. - // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0. - if (Saturating) { - SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); - SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); - SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); - SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, - DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg)); - - SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); - SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); - SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); - SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, - DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos)); - } + // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg)); + // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos)); } else if (Scale < VTSize) { - // If the scale is instead less than the old VT size, but greater than or - // equal to the expanded VT size, the first part of the result (ResultLL) is - // no longer a part of Lo because it would be scaled out anyway. Instead we - // can start shifting right from the fourth part (ResultHH) to the second - // part (ResultLH), and Result LH will be the new Lo. - SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy); - SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy); - Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); - Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, - DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); - Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt)); - // This is similar to the case when we saturate if Scale < NVTSize, but we - // only need to chech HH. - if (Saturating) { - unsigned OverflowBits = VTSize - Scale + 1; - SDValue HHHiMask = DAG.getConstant( - APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); - SDValue HHLoMask = DAG.getConstant( - APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); - - SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); - SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); - } - } else if (Scale == VTSize) { - assert( - !Signed && - "Only unsigned types can have a scale equal to the operand bit width"); - - Lo = ResultHL; - Hi = ResultHH; - } else { - llvm_unreachable("Expected the scale to be less than or equal to the width " - "of the operands"); - } + // only need to check HH. + unsigned OverflowBits = VTSize - Scale + 1; + SDValue HHHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); + SDValue HHLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); + SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); + } else + llvm_unreachable("Illegal scale for signed fixed point mul."); - if (Saturating) { - APInt LHMax = APInt::getSignedMaxValue(NVTSize); - APInt LLMax = APInt::getAllOnesValue(NVTSize); - APInt LHMin = APInt::getSignedMinValue(NVTSize); - Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi); - Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi); - Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo); - Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); - } + // Saturate to signed maximum. + APInt MaxHi = APInt::getSignedMaxValue(NVTSize); + APInt MaxLo = APInt::getAllOnesValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo); + // Saturate to signed minimum. + APInt MinHi = APInt::getSignedMinValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); } void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, @@ -3030,7 +3133,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -3129,7 +3234,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); return; } @@ -3217,7 +3324,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -3373,7 +3482,8 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -3399,7 +3509,8 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -3759,7 +3870,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -3924,7 +4037,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { @@ -4033,6 +4148,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); } +SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) { + SDLoc dl(N); + + SDValue SplatVal = N->getOperand(0); + + assert(!SplatVal.getValueType().isVector() && "Input must be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "Type must be promoted to a vector type"); + EVT NOutElemVT = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal); + + return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op); +} + SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 14fd5be23ccb..b596c174a287 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -81,7 +81,6 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { SDValue Res(&Node, i); - EVT VT = Res.getValueType(); bool Failed = false; // Don't create a value in map. auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0; @@ -135,17 +134,13 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) { + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { - // If the value can be kept in HW registers, softening machinery can - // leave it unchanged and don't put it to any map. - if (Mapped == 0 && - !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat && - isLegalInHWReg(VT))) { + if (Mapped == 0) { dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { @@ -257,13 +252,9 @@ bool DAGTypeLegalizer::run() { Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - Changed = SoftenFloatResult(N, i); - if (Changed) - goto NodeDone; - // If not changed, the result type should be legally in register. - assert(isLegalInHWReg(ResultVT) && - "Unchanged SoftenFloatResult should be legal in register!"); - goto ScanOperands; + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -439,15 +430,9 @@ NodeDone: bool Failed = false; // Check that all result types are legal. - // A value type is illegal if its TypeAction is not TypeLegal, - // and TLI.RegClassForVT does not have a register class for this type. - // For example, the x86_64 target has f128 that is not TypeLegal, - // to have softened operators, but it also has FR128 register class to - // pass and return f128 values. Hence a legalized node can have f128 type. if (!IgnoreNodeResults(&Node)) for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(Node.getValueType(i)) && - !TLI.isTypeLegal(Node.getValueType(i))) { + if (!isTypeLegal(Node.getValueType(i))) { dbgs() << "Result type " << i << " illegal: "; Node.dump(&DAG); Failed = true; @@ -456,8 +441,7 @@ NodeDone: // Check that all operand types are legal. for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && - !isTypeLegal(Node.getOperand(i).getValueType()) && - !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + !isTypeLegal(Node.getOperand(i).getValueType())) { dbgs() << "Operand type " << i << " illegal: "; Node.getOperand(i).dump(&DAG); Failed = true; @@ -713,23 +697,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - // f128 of x86_64 could be kept in SSE registers, - // but sometimes softened to i128. - assert((Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || - Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for softened float"); AnalyzeNewValue(Result); auto &OpIdEntry = SoftenedFloats[getTableId(Op)]; - // Allow repeated calls to save f128 type nodes - // or any node with type that transforms to itself. - // Many operations on these types are not softened. - assert(((OpIdEntry == 0) || - Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && - "Node is already converted to integer!"); + assert((OpIdEntry == 0) && "Node is already converted to integer!"); OpIdEntry = getTableId(Result); } @@ -1003,25 +977,27 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { /// Convert the node into a libcall with the same prototype. SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned) { + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions, dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; } SmallVector<SDValue, 8> Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; } /// Expand a node into a call to a libcall. Similar to ExpandLibCall except that diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1d489b1b3a33..4afbae69128a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -73,15 +73,6 @@ private: return VT.isSimple() && TLI.isTypeLegal(VT); } - /// Return true if this type can be passed in registers. - /// For example, x86_64's f128, should to be legally in registers - /// and only some operations converted to library calls or integer - /// bitwise operations. - bool isLegalInHWReg(EVT VT) const { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - return VT == NVT && isSimpleLegalType(VT); - } - EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -306,6 +297,7 @@ private: SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N); SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); @@ -363,6 +355,7 @@ private: SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); @@ -472,14 +465,11 @@ private: // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// Given an operand Op of Float type, returns the integer if the Op is not - /// supported in target HW and converted to the integer. - /// The integer contains exactly the same bits as Op - only the type changed. - /// For example, if Op is an f32 which was softened to an i32, then this - /// method returns an i32, the bits of which coincide with those of Op. - /// If the Op can be efficiently supported in target HW or the operand must - /// stay in a register, the Op is not converted to an integer. - /// In that case, the given op is returned. + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op SDValue GetSoftenedFloat(SDValue Op) { TableId Id = getTableId(Op); auto Iter = SoftenedFloats.find(Id); @@ -494,19 +484,19 @@ private: } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Convert Float Results to Integer for Non-HW-supported Operations. - bool SoftenFloatResult(SDNode *N, unsigned ResNo); + // Convert Float Results to Integer. + void SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_ConstantFP(SDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FABS(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -518,7 +508,7 @@ private: SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FNEG(SDNode *N); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -531,27 +521,17 @@ private: SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Return true if we can skip softening the given operand or SDNode because - // either it was soften before by SoftenFloatResult and references to the - // operand were replaced by ReplaceValueWith or it's value type is legal in HW - // registers and the operand can be left unchanged. - bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); - - // Convert Float Operand to Integer for Non-HW-supported Operations. + // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); - SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); - SDValue SoftenFloatOp_FABS(SDNode *N); - SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); - SDValue SoftenFloatOp_FNEG(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); @@ -559,7 +539,6 @@ private: SDValue SoftenFloatOp_LLROUND(SDNode *N); SDValue SoftenFloatOp_LRINT(SDNode *N); SDValue SoftenFloatOp_LLRINT(SDNode *N); - SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -715,6 +694,7 @@ private: bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); SDValue ScalarizeVecOp_UnaryOp(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); @@ -830,6 +810,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); + SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue WidenVecRes_Convert(SDNode *N); @@ -933,6 +914,8 @@ private: void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVSETCC(const SDNode *N); + //===--------------------------------------------------------------------===// // Generic Expansion: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 943f63f46c47..5562f400b6e1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -52,17 +52,11 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: { - // Expand the floating point operand only if it was converted to integers. - // Otherwise, it is a legal type like f128 that can be saved in a register. - auto SoftenedOp = GetSoftenedFloat(InOp); - if (isLegalInHWReg(SoftenedOp.getValueType())) - break; - SplitInteger(SoftenedOp, Lo, Hi); + case TargetLowering::TypeSoftenFloat: + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); @@ -509,23 +503,6 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, GetSplitOp(Op, Lo, Hi); } -static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, - SelectionDAG &DAG) { - SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the inputs. - SDValue Lo, Hi, LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); - - return std::make_pair(Lo, Hi); -} - void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH, CL, CH; SDLoc dl(N); @@ -537,16 +514,25 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { if (Cond.getValueType().isVector()) { if (SDValue Res = WidenVSELECTAndMask(N)) std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl); - // It seems to improve code to generate two narrow SETCCs as opposed to - // splitting a wide result vector. - else if (Cond.getOpcode() == ISD::SETCC) - std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG); // Check if there are already splitted versions of the vector available and // use those instead of splitting the mask operand again. else if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Cond, CL, CH); - else + // It seems to improve code to generate two narrow SETCCs as opposed to + // splitting a wide result vector. + else if (Cond.getOpcode() == ISD::SETCC) { + // If the condition is a vXi1 vector, and the LHS of the setcc is a legal + // type and the setcc result type is the same vXi1, then leave the setcc + // alone. + EVT CondLHSVT = Cond.getOperand(0).getValueType(); + if (Cond.getValueType().getVectorElementType() == MVT::i1 && + isTypeLegal(CondLHSVT) && + getSetCCResultType(CondLHSVT) == Cond.getValueType()) + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); + else + SplitVecRes_SETCC(Cond.getNode(), CL, CH); + } else std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 10b8b705869e..15c3a0b6cfad 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" @@ -333,14 +334,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + // If we're asked to expand a strict vector floating-point operation, + // by default we're going to simply unroll it. That is usually the + // best approach, except in the case where the resulting strict (scalar) + // operations would themselves use the fallback mutation to non-strict. + // In that specific case, just do the fallback on the vector op. + if (Action == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) { + EVT EltVT = Node->getValueType(0).getVectorElementType(); + if (TLI.getOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Legal) + Action = TargetLowering::Legal; + } break; case ISD::ADD: case ISD::SUB: @@ -439,16 +453,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { break; case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: { + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); break; } - case ISD::FP_ROUND_INREG: - Action = TLI.getOperationAction(Node->getOpcode(), - cast<VTSDNode>(Node->getOperand(1))->getVT()); - break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::VECREDUCE_ADD: @@ -820,6 +831,13 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::SMULFIX: case ISD::UMULFIX: return ExpandFixedPointMul(Op); + case ISD::SMULFIXSAT: + case ISD::UMULFIXSAT: + // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly + // why. Maybe it results in worse codegen compared to the unroll for some + // targets? This should probably be investigated. And if we still prefer to + // unroll an explanation could be helpful. + return DAG.UnrollVectorOp(Op.getNode()); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -844,6 +862,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: return ExpandStrictFPOp(Op); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -1168,9 +1188,13 @@ SDValue VectorLegalizer::ExpandABS(SDValue Op) { SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { // Attempt to expand using TargetLowering. - SDValue Result; - if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG)) + SDValue Result, Chain; + if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) { + if (Op.getNode()->isStrictFPOpcode()) + // Relink the chain + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain); return Result; + } // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7e4d52617977..3763e886cef2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -52,7 +52,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; - case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; @@ -171,6 +170,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_EXTEND: R = ScalarizeVecRes_StrictFPOp(N); break; @@ -185,6 +186,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: R = ScalarizeVecRes_MULFIX(N); break; } @@ -604,6 +606,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + Res = ScalarizeVecOp_UnaryOp_StrictFP(N); + break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; @@ -679,6 +685,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); } +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>. +/// Do the strict FP operation on the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), + { N->getValueType(0).getScalarType(), MVT::Other }, + { N->getOperand(0), Elt }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Ops(N->getNumOperands()); @@ -828,7 +851,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; - case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; @@ -883,7 +905,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: @@ -977,6 +1001,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: SplitVecRes_MULFIX(N, Lo, Hi); break; } @@ -1560,10 +1585,14 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; @@ -1622,10 +1651,14 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; @@ -1651,11 +1684,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, - MMO); + MMO, MGT->getIndexType()); SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, - MMO); + MMO, MGT->getIndexType()); // Build a factor node to remember that this load is independent of the // other one. @@ -1979,6 +2012,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: @@ -2293,7 +2328,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, - OpsLo, MMO); + OpsLo, MMO, MGT->getIndexType()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -2303,7 +2338,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, - OpsHi, MMO); + OpsHi, MMO, MGT->getIndexType()); // Build a factor node to remember that this load is independent of the // other one. @@ -2340,12 +2375,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - // Split Mask operand - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -2397,12 +2436,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - // Split Mask operand - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } SDValue IndexHi, IndexLo; if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) @@ -2418,7 +2461,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), - DL, OpsLo, MMO); + DL, OpsLo, MMO, N->getIndexType()); MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), @@ -2430,7 +2473,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, // after another. SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); + DL, OpsHi, MMO, N->getIndexType()); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2596,7 +2639,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); - return PromoteTargetBoolean(Con, N->getValueType(0)); + + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con); } @@ -2663,7 +2710,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; - case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; @@ -2719,6 +2765,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryCanTrap(N); break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: + // These are binary operations, but with an extra operand that shouldn't + // be widened (the scale). + Res = WidenVecRes_BinaryWithExtraScalarOp(N); + break; + case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -2790,6 +2845,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FP_EXTEND: case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: Res = WidenVecRes_Convert_StrictFP(N); break; @@ -2866,6 +2923,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } +SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { + // Binary op widening, but with an extra operand that shouldn't be widened. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = N->getOperand(2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3, + N->getFlags()); +} + // Given a vector of operations that have been broken up to widen, see // if we can collect them together into the next widest legal VT. This // implementation is trap-safe. @@ -3716,7 +3784,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand(), N->getIndexType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -4094,7 +4162,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: @@ -4434,7 +4504,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) { SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index, Scale}; SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops, - MG->getMemOperand()); + MG->getMemOperand(), MG->getIndexType()); ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); return SDValue(); @@ -4472,7 +4542,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), SDLoc(N), Ops, - MSC->getMemOperand()); + MSC->getMemOperand(), MSC->getIndexType()); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { @@ -4504,7 +4574,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return PromoteTargetBoolean(CC, VT); + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, dl, VT, CC); } SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { @@ -4706,7 +4779,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; - unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads. + unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads. // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 2cb850fa1a3d..7ee44c808fcb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -498,7 +498,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } else diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 34b4c8502353..ff806bdb822c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1188,6 +1188,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!Pred.isArtificial()) AddPredQueued(NewSU, Pred); + // Make sure the clone comes after the original. (InstrEmitter assumes + // this ordering.) + AddPredQueued(NewSU, SDep(SU, SDep::Artificial)); + // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; @@ -1374,7 +1378,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } else @@ -2358,7 +2362,7 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) { PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { unsigned Reg = cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { RetVal = true; continue; } @@ -2379,7 +2383,7 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { unsigned Reg = cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { RetVal = true; continue; } @@ -2948,8 +2952,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyToReg && - TargetRegisterInfo::isVirtualRegister - (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + Register::isVirtualRegister( + cast<RegisterSDNode>(N->getOperand(1))->getReg())) continue; SDNode *PredFrameSetup = nullptr; @@ -2995,8 +2999,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyFromReg && - TargetRegisterInfo::isVirtualRegister - (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + Register::isVirtualRegister( + cast<RegisterSDNode>(N->getOperand(1))->getReg())) continue; // Perform checks on the successors of PredSU. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 568c6191e512..d4c1fb36475e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -115,7 +115,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, return; unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return; unsigned ResNo = User->getOperand(2).getResNo(); @@ -528,7 +528,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// glued together nodes with a single SUnit. -void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { +void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) { // Cluster certain nodes which should be scheduled together. ClusterNodes(); // Populate the SUnits array. @@ -656,7 +656,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && !BB->succ_empty()) { unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) // This copy is a liveout value. It is likely coalesced, so reduce the // latency so not to penalize the def. // FIXME: need target specific adjustment here? @@ -808,7 +808,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, } else { // Copy from physical register. assert(I->getReg() && "Unknown physical register!"); - unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC); bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); @@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + + if (MDNode *MD = DAG->getHeapAllocSite(N)) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } + GluedNodes.pop_back(); } auto NewInsn = @@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } } // Insert all the dbg_values which have not already been inserted in source diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5163b4fa4fd3..183ce4b0652d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -26,6 +26,7 @@ namespace llvm { +class AAResults; class InstrItineraryData; /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. @@ -93,7 +94,7 @@ class InstrItineraryData; /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AAResults *AA); /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index ab06b55b49fd..e7bac73678a7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -63,14 +63,13 @@ private: /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; - /// AA - AliasAnalysis for making memory reference queries. - AliasAnalysis *AA; + /// AA - AAResults for making memory reference queries. + AAResults *AA; public: - ScheduleDAGVLIW(MachineFunction &mf, - AliasAnalysis *aa, + ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetSubtargetInfo &STI = mf.getSubtarget(); HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5852e693fa9f..52a71b91d93f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -859,9 +859,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { break; case ISD::TargetExternalSymbol: { ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); - Erased = TargetExternalSymbols.erase( - std::pair<std::string,unsigned char>(ESN->getSymbol(), - ESN->getTargetFlags())); + Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>( + ESN->getSymbol(), ESN->getTargetFlags())); break; } case ISD::MCSymbol: { @@ -1084,6 +1083,7 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); MCSymbols.clear(); + SDCallSiteDbgInfo.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), static_cast<CondCodeSDNode*>(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), @@ -1353,7 +1353,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t Offset, bool isTargetGA, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); @@ -1400,7 +1400,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { } SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; @@ -1421,7 +1421,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1449,7 +1449,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1473,7 +1473,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, } SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); ID.AddInteger(Index); @@ -1535,10 +1535,9 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { } SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, - unsigned char TargetFlags) { + unsigned TargetFlags) { SDNode *&N = - TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, - TargetFlags)]; + TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)]; if (N) return SDValue(N, 0); N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT); InsertNode(N); @@ -1802,9 +1801,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, } SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset, - bool isTarget, - unsigned char TargetFlags) { + int64_t Offset, bool isTarget, + unsigned TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; @@ -1900,20 +1898,19 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) { EVT VT = Node->getValueType(0); SDValue Tmp1 = Node->getOperand(0); SDValue Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); + const MaybeAlign MA(Node->getConstantOperandVal(3)); SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, MachinePointerInfo(V)); SDValue VAList = VAListLoad; - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - + if (MA && *MA > TLI.getMinStackArgumentAlignment()) { VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - getConstant(Align - 1, dl, VAList.getValueType())); + getConstant(MA->value() - 1, dl, VAList.getValueType())); - VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, - getConstant(-(int64_t)Align, dl, VAList.getValueType())); + VAList = + getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)MA->value(), dl, VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg @@ -2154,12 +2151,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, } case ISD::OR: case ISD::XOR: - // If the LHS or RHS don't contribute bits to the or, drop them. - if (MaskedValueIsZero(V.getOperand(0), DemandedBits)) - return V.getOperand(1); - if (MaskedValueIsZero(V.getOperand(1), DemandedBits)) - return V.getOperand(0); - break; + case ISD::SIGN_EXTEND_INREG: + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, + *this, 0); case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) @@ -2203,15 +2197,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); break; } - case ISD::SIGN_EXTEND_INREG: - EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT(); - unsigned ExVTBits = ExVT.getScalarSizeInBits(); - - // If none of the extended bits are demanded, eliminate the sextinreg. - if (DemandedBits.getActiveBits() <= ExVTBits) - return V.getOperand(0); - - break; } return SDValue(); } @@ -2395,15 +2380,39 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// is less than the element bit-width of the shift node, return it. static const APInt *getValidShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { // Shifting more than the bitwidth is not valid. const APInt &ShAmt = SA->getAPIntValue(); - if (ShAmt.ult(V.getScalarValueSizeInBits())) + if (ShAmt.ult(BitWidth)) return &ShAmt; } return nullptr; } +/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less +/// than the element bit-width of the shift node, return the minimum value. +static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); + if (!BV) + return nullptr; + const APInt *MinShAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); + if (!SA) + return nullptr; + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return nullptr; + if (MinShAmt && MinShAmt->ule(ShAmt)) + continue; + MinShAmt = &ShAmt; + } + return MinShAmt; +} + /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. @@ -2437,7 +2446,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, return Known; } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return Known; // Limit search depth. KnownBits Known2; @@ -2582,14 +2591,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - Known = computeKnownBits(Src, DemandedSrc, Depth + 1); - } else { - Known = computeKnownBits(Src, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } + Known = computeKnownBits(Src, DemandedSrc, Depth + 1); break; } case ISD::SCALAR_TO_VECTOR: { @@ -2800,25 +2808,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One.lshrInPlace(Shift); // High bits are known zero. Known.Zero.setHighBits(Shift); - } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) { - // If the shift amount is a vector of constants see if we can bound - // the number of upper zero bits. - unsigned ShiftAmountMin = BitWidth; - for (unsigned i = 0; i != BV->getNumOperands(); ++i) { - if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) { - const APInt &ShAmt = C->getAPIntValue(); - if (ShAmt.ult(BitWidth)) { - ShiftAmountMin = std::min<unsigned>(ShiftAmountMin, - ShAmt.getZExtValue()); - continue; - } - } - // Don't know anything. - ShiftAmountMin = 0; - break; - } - - Known.Zero.setHighBits(ShiftAmountMin); + } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) { + // Minimum shift high bits are known zero. + Known.Zero.setHighBits(ShMinAmt->getZExtValue()); } break; case ISD::SRA: @@ -3105,12 +3097,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits)) + if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0)) + if (Known2.isNegative() && LowBits.intersects(Known2.One)) Known.One |= ~LowBits; assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); } @@ -3427,7 +3419,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Val.getNumSignBits(); } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. if (!DemandedElts) @@ -3729,6 +3721,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; + case ISD::MUL: { + // The output of the Mul can be at most twice the valid bits in the inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (SignBitsOp0 == 1) + break; + unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + if (SignBitsOp1 == 1) + break; + unsigned OutValidBits = + (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); + return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; + } case ISD::TRUNCATE: { // Check if the sign bits of source go down as far as the truncated value. unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); @@ -3817,13 +3821,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } - return ComputeNumSignBits(Src, Depth + 1); + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); } case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded @@ -3976,7 +3980,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) return true; - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return false; // Limit search depth. // TODO: Handle vectors. @@ -4645,7 +4649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getUNDEF(VT); // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Flags); @@ -5156,22 +5160,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N2C && N2C->isNullValue()) return N1; break; - case ISD::FP_ROUND_INREG: { - EVT EVT = cast<VTSDNode>(N2)->getVT(); - assert(VT == N1.getValueType() && "Not an inreg round!"); - assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && - "Cannot FP_ROUND_INREG integer types"); - assert(EVT.isVector() == VT.isVector() && - "FP_ROUND_INREG type should be vector iff the operand " - "type is vector!"); - assert((!EVT.isVector() || - EVT.getVectorNumElements() == VT.getVectorNumElements()) && - "Vector element counts must match in FP_ROUND_INREG"); - assert(EVT.bitsLE(VT) && "Not rounding down!"); - (void)EVT; - if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. - break; - } case ISD::FP_ROUND: assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && @@ -5382,7 +5370,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, std::swap(N1, N2); } else { switch (Opcode) { - case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: case ISD::SUB: return getUNDEF(VT); // fold op(undef, arg2) -> undef @@ -5770,7 +5757,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, + uint64_t Size, unsigned Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -5795,15 +5782,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; + if (Alignment > SrcAlign) + SrcAlign = Alignment; ConstantDataArraySlice Slice; bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), @@ -5818,15 +5805,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Align && - DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign /= 2; + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; - if (NewAlign > Align) { + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -5869,10 +5856,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, } Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); if (Value.getNode()) { - Store = DAG.getStore(Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, - MMOFlags); + Store = DAG.getStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); OutChains.push_back(Store); } } @@ -5900,7 +5886,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6567,7 +6553,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, - MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) { + MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -6619,7 +6605,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, createOperands(N, Ops); } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, @@ -7022,14 +7010,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7038,7 +7027,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, } auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getPassThru().getValueType() == N->getValueType(0) && @@ -7062,14 +7051,15 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7077,7 +7067,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, return SDValue(E, 0); } auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getMask().getValueType().getVectorNumElements() == @@ -7766,16 +7756,22 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); @@ -7925,6 +7921,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, CSEMap.InsertNode(N, IP); InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this); return N; } @@ -8619,7 +8616,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, // TokenFactor. SDValue OldChain = SDValue(OldLoad, 1); SDValue NewChain = SDValue(NewMemOp.getNode(), 1); - if (!OldLoad->hasAnyUseOfValue(1)) + if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1)) return NewChain; SDValue TokenFactor = @@ -8812,7 +8809,7 @@ HandleSDNode::~HandleSDNode() { GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, const GlobalValue *GA, EVT VT, - int64_t o, unsigned char TF) + int64_t o, unsigned TF) : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } @@ -8986,7 +8983,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, // Loads don't have side effects, look through them. if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { - if (!Ld->isVolatile()) + if (Ld->isUnordered()) return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); } return false; @@ -9005,21 +9002,51 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { SDValue SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, - ArrayRef<ISD::NodeType> CandidateBinOps) { + ArrayRef<ISD::NodeType> CandidateBinOps, + bool AllowPartials) { // The pattern must end in an extract from index 0. if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isNullConstant(Extract->getOperand(1))) return SDValue(); - SDValue Op = Extract->getOperand(0); - unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); - // Match against one of the candidate binary ops. + SDValue Op = Extract->getOperand(0); if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { return Op.getOpcode() == unsigned(BinOp); })) return SDValue(); + // Floating-point reductions may require relaxed constraints on the final step + // of the reduction because they may reorder intermediate operations. + unsigned CandidateBinOp = Op.getOpcode(); + if (Op.getValueType().isFloatingPoint()) { + SDNodeFlags Flags = Op->getFlags(); + switch (CandidateBinOp) { + case ISD::FADD: + if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation()) + return SDValue(); + break; + default: + llvm_unreachable("Unhandled FP opcode for binop reduction"); + } + } + + // Matching failed - attempt to see if we did enough stages that a partial + // reduction from a subvector is possible. + auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) { + if (!AllowPartials || !Op) + return SDValue(); + EVT OpVT = Op.getValueType(); + EVT OpSVT = OpVT.getScalarType(); + EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts); + if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) + return SDValue(); + BinOp = (ISD::NodeType)CandidateBinOp; + return getNode( + ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, + getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout()))); + }; + // At each stage, we're looking for something that looks like: // %s = shufflevector <8 x i32> %op, <8 x i32> undef, // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, @@ -9030,10 +9057,16 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, // <4,5,6,7,u,u,u,u> // <2,3,u,u,u,u,u,u> // <1,u,u,u,u,u,u,u> - unsigned CandidateBinOp = Op.getOpcode(); + // While a partial reduction match would be: + // <2,3,u,u,u,u,u,u> + // <1,u,u,u,u,u,u,u> + unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); + SDValue PrevOp; for (unsigned i = 0; i < Stages; ++i) { + unsigned MaskEnd = (1 << i); + if (Op.getOpcode() != CandidateBinOp) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -9049,12 +9082,14 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, // The first operand of the shuffle should be the same as the other operand // of the binop. if (!Shuffle || Shuffle->getOperand(0) != Op) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); // Verify the shuffle has the expected (at this stage of the pyramid) mask. - for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index) - if (Shuffle->getMaskElt(Index) != MaskEnd + Index) - return SDValue(); + for (int Index = 0; Index < (int)MaskEnd; ++Index) + if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index)) + return PartialReduction(PrevOp, MaskEnd); + + PrevOp = Op; } BinOp = (ISD::NodeType)CandidateBinOp; @@ -9114,8 +9149,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { getShiftAmountOperand(Operands[0].getValueType(), Operands[1]))); break; - case ISD::SIGN_EXTEND_INREG: - case ISD::FP_ROUND_INREG: { + case ISD::SIGN_EXTEND_INREG: { EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], @@ -9187,6 +9221,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; + // TODO: probably too restrictive for atomics, revisit + if (!LD->isSimple()) + return false; if (LD->isIndexed() || Base->isIndexed()) return false; if (LD->getChain() != Base->getChain()) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 9592bc30a4e1..3a53ab9717a4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" #include <cstdint> using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e818dd27c05e..8c15563fcd23 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -833,7 +833,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // If the source register was virtual and if we know something about it, // add an assert node. - if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || + if (!Register::isVirtualRegister(Regs[Part + i]) || !RegisterVT.isInteger()) continue; @@ -948,8 +948,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); - else if (!Regs.empty() && - TargetRegisterInfo::isVirtualRegister(Regs.front())) { + else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) { // Put the register class of the virtual registers in the flag word. That // way, later passes can recompute register class constraints for inline // assembly as well as normal instructions. @@ -1810,7 +1809,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // offsets to its parts don't wrap either. SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); - SDValue Val = RetOp.getValue(i); + SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, @@ -2263,7 +2262,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && - !I.getMetadata(LLVMContext::MD_unpredictable) && + !I.hasMetadata(LLVMContext::MD_unpredictable) && (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, @@ -2600,9 +2599,11 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setDiscardResult(true); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - None, false, getCurSDLoc(), false, false).second; + None, CallOptions, getCurSDLoc()).second; // On PS4, the "return address" must still be within the calling function, // even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. @@ -2618,24 +2619,18 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); - // Subtract the minimum value + // Subtract the minimum value. SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, - DAG.getConstant(B.First, dl, VT)); - - // Check range - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue RangeCmp = DAG.getSetCC( - dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); + SDValue RangeSub = + DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT)); // Determine the type of the test operands. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool UsePtrType = false; - if (!TLI.isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT)) { UsePtrType = true; - else { + } else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { // Switch table case range are encoded into series of masks. @@ -2644,6 +2639,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, break; } } + SDValue Sub = RangeSub; if (UsePtrType) { VT = TLI.getPointerTy(DAG.getDataLayout()); Sub = DAG.getZExtOrTrunc(Sub, dl, VT); @@ -2655,20 +2651,29 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + if (!B.OmitRangeCheck) + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); - SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, CopyTo, RangeCmp, - DAG.getBasicBlock(B.Default)); + SDValue Root = CopyTo; + if (!B.OmitRangeCheck) { + // Conditional branch to the default block. + SDValue RangeCmp = DAG.getSetCC(dl, + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + RangeSub.getValueType()), + RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()), + ISD::SETUGT); + + Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp, + DAG.getBasicBlock(B.Default)); + } // Avoid emitting unnecessary branches to the next block. if (MBB != NextBlock(SwitchBB)) - BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, - DAG.getBasicBlock(MBB)); + Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB)); - DAG.setRoot(BrRange); + DAG.setRoot(Root); } /// visitBitTestCase - this function produces one "bit test" @@ -3266,8 +3271,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // We care about the legality of the operation after it has been type // legalized. - while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && - VT != TLI.getTypeToTransformTo(Ctx, VT)) + while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal) VT = TLI.getTypeToTransformTo(Ctx, VT); // If the vselect is legal, assume we want to leave this as a vector setcc + @@ -3534,17 +3538,32 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); + Constant *MaskV = cast<Constant>(I.getOperand(2)); SDLoc DL = getCurSDLoc(); - - SmallVector<int, 8> Mask; - ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); - unsigned MaskNumElts = Mask.size(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); + if (MaskV->isNullValue() && VT.isScalableVector()) { + // Canonical splat form of first element of first input vector. + SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + SrcVT.getScalarType(), Src1, + DAG.getConstant(0, DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt)); + return; + } + + // For now, we only handle splats for scalable vectors. + // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation + // for targets that support a SPLAT_VECTOR for non-scalable vector types. + assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle"); + + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(MaskV, Mask); + unsigned MaskNumElts = Mask.size(); + if (SrcNumElts == MaskNumElts) { setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); return; @@ -3825,7 +3844,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = I.getType()->isVectorTy() ? - cast<VectorType>(I.getType())->getVectorNumElements() : 0; + I.getType()->getVectorNumElements() : 0; if (VectorWidth && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); @@ -3858,12 +3877,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If this is a scalar constant or a splat vector of constants, // handle it quickly. - const auto *CI = dyn_cast<ConstantInt>(Idx); - if (!CI && isa<ConstantDataVector>(Idx) && - cast<ConstantDataVector>(Idx)->getSplatValue()) - CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue()); + const auto *C = dyn_cast<Constant>(Idx); + if (C && isa<VectorType>(C->getType())) + C = C->getSplatValue(); - if (CI) { + if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); @@ -3872,7 +3890,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : DAG.getConstant(Offs, dl, IdxTy); - // In an inbouds GEP with an offset that is nonnegative even when + // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) @@ -4002,8 +4020,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal); + bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load); bool isDereferenceable = isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); @@ -4118,7 +4136,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - unsigned VReg = + Register VReg = SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. @@ -4132,8 +4150,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { "call visitLoadFromSwiftError when backend supports swifterror"); assert(!I.isVolatile() && - I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && - I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && + !I.hasMetadata(LLVMContext::MD_nontemporal) && + !I.hasMetadata(LLVMContext::MD_invariant_load) && "Support volatile, non temporal, invariant for load_from_swift_error"); const Value *SV = I.getOperand(0); @@ -4209,7 +4227,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { auto MMOFlags = MachineMemOperand::MONone; if (I.isVolatile()) MMOFlags |= MachineMemOperand::MOVolatile; - if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) + if (I.hasMetadata(LLVMContext::MD_nontemporal)) MMOFlags |= MachineMemOperand::MONonTemporal; MMOFlags |= TLI.getMMOFlags(I); @@ -4309,8 +4327,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // are looking for. If first operand of the GEP is a splat vector - we // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. -static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, - SDValue &Scale, SelectionDAGBuilder* SDB) { +static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, + ISD::MemIndexType &IndexType, SDValue &Scale, + SelectionDAGBuilder *SDB) { SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); @@ -4330,8 +4349,13 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, // Ensure all the other indices are 0. for (unsigned i = 1; i < FinalIndex; ++i) { - auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (!C || !C->isZero()) + auto *C = dyn_cast<Constant>(GEP->getOperand(i)); + if (!C) + return false; + if (isa<VectorType>(C->getType())) + C = C->getSplatValue(); + auto *CI = dyn_cast_or_null<ConstantInt>(C); + if (!CI || !CI->isZero()) return false; } @@ -4346,6 +4370,7 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); + IndexType = ISD::SIGNED_SCALED; if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); @@ -4373,9 +4398,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; + ISD::MemIndexType IndexType; SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -4385,11 +4412,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, - Ops, MMO); + Ops, MMO, IndexType); DAG.setRoot(Scatter); setValue(&I, Scatter); } @@ -4476,9 +4504,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; + ISD::MemIndexType IndexType; SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); bool ConstantMemory = false; if (UniformBase && AA && AA->pointsToConstantMemory( @@ -4500,11 +4530,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, - Ops, MMO); + Ops, MMO, IndexType); SDValue OutChain = Gather.getValue(1); if (!ConstantMemory) @@ -4628,7 +4659,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { auto Flags = MachineMemOperand::MOLoad; if (I.isVolatile()) Flags |= MachineMemOperand::MOVolatile; - if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) + if (I.hasMetadata(LLVMContext::MD_invariant_load)) Flags |= MachineMemOperand::MOInvariant; if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), DAG.getDataLayout())) @@ -4645,9 +4676,27 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, - getValue(I.getPointerOperand()), MMO); + + SDValue Ptr = getValue(I.getPointerOperand()); + + if (TLI.lowerAtomicLoadAsLoadSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for loads to prevent future divergence. + SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); + + setValue(&I, L); + SDValue OutChain = L.getValue(1); + if (!I.isUnordered()) + DAG.setRoot(OutChain); + else + PendingLoads.push_back(OutChain); + return; + } + + SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, + Ptr, MMO); SDValue OutChain = L.getValue(1); if (MemVT != VT) @@ -4686,9 +4735,17 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); + SDValue Ptr = getValue(I.getPointerOperand()); + if (TLI.lowerAtomicStoreAsStoreSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for stores to prevent future divergence. + SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); + DAG.setRoot(S); + return; + } SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, - getValue(I.getPointerOperand()), Val, MMO); + Ptr, Val, MMO); DAG.setRoot(OutChain); @@ -4731,8 +4788,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - SDValue Op = getValue(I.getArgOperand(i)); - Ops.push_back(Op); + const Value *Arg = I.getArgOperand(i); + if (!I.paramHasAttr(i, Attribute::ImmArg)) { + Ops.push_back(getValue(Arg)); + continue; + } + + // Use TargetConstant instead of a regular constant for immarg. + EVT VT = TLI.getValueType(*DL, Arg->getType(), true); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) { + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT)); + } else { + Ops.push_back( + DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT)); + } } SmallVector<EVT, 4> ValueVTs; @@ -4749,10 +4820,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - Result = - DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.flags, Info.size, AAInfo); + Result = DAG.getMemIntrinsicNode( + Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4918,12 +4989,11 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, // Put the exponent in the right bit position for later addition to the // final result: // - // #define LOG2OFe 1.4426950f - // t0 = Op * LOG2OFe + // t0 = Op * log2(e) // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, - getF32Constant(DAG, 0x3fb8aa3b, dl)); + DAG.getConstantFP(numbers::log2ef, dl, MVT::f32)); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -4941,10 +5011,11 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); - // Scale the exponent by log(2) [0.69314718f]. + // Scale the exponent by log(2). SDValue Exp = GetExponent(DAG, Op1, TLI, dl); - SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3f317218, dl)); + SDValue LogOfExponent = + DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + DAG.getConstantFP(numbers::ln2f, dl, MVT::f32)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -5311,19 +5382,32 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } -// getUnderlyingArgReg - Find underlying register used for a truncated or -// bitcasted argument. -static unsigned getUnderlyingArgReg(const SDValue &N) { +// getUnderlyingArgRegs - Find underlying registers used for a truncated, +// bitcasted, or split argument. Returns a list of <Register, size in bits> +static void +getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs, + const SDValue &N) { switch (N.getOpcode()) { - case ISD::CopyFromReg: - return cast<RegisterSDNode>(N.getOperand(1))->getReg(); + case ISD::CopyFromReg: { + SDValue Op = N.getOperand(1); + Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(), + Op.getValueType().getSizeInBits()); + return; + } case ISD::BITCAST: case ISD::AssertZext: case ISD::AssertSext: case ISD::TRUNCATE: - return getUnderlyingArgReg(N.getOperand(0)); + getUnderlyingArgRegs(Regs, N.getOperand(0)); + return; + case ISD::BUILD_PAIR: + case ISD::BUILD_VECTOR: + case ISD::CONCAT_VECTORS: + for (SDValue Op : N->op_values()) + getUnderlyingArgRegs(Regs, Op); + return; default: - return 0; + return; } } @@ -5412,11 +5496,16 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (FI != std::numeric_limits<int>::max()) Op = MachineOperand::CreateFI(FI); + SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes; if (!Op && N.getNode()) { - unsigned Reg = getUnderlyingArgReg(N); - if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + getUnderlyingArgRegs(ArgRegsAndSizes, N); + Register Reg; + if (ArgRegsAndSizes.size() == 1) + Reg = ArgRegsAndSizes.front().first; + + if (Reg && Reg.isVirtual()) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned PR = RegInfo.getLiveInPhysReg(Reg); + Register PR = RegInfo.getLiveInPhysReg(Reg); if (PR) Reg = PR; } @@ -5436,29 +5525,42 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (!Op) { + // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg + auto splitMultiRegDbgValue + = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) { + unsigned Offset = 0; + for (auto RegAndSize : SplitRegs) { + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegAndSize.second); + if (!FragmentExpr) + continue; + assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + RegAndSize.first, Variable, *FragmentExpr)); + Offset += RegAndSize.second; + } + }; + // Check if ValueMap has reg number. - DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + DenseMap<const Value *, unsigned>::const_iterator + VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, V->getType(), getABIRegCopyCC(V)); if (RFV.occupiesMultipleRegs()) { - unsigned Offset = 0; - for (auto RegAndSize : RFV.getRegsAndSizes()) { - Op = MachineOperand::CreateReg(RegAndSize.first, false); - auto FragmentExpr = DIExpression::createFragmentExpression( - Expr, Offset, RegAndSize.second); - if (!FragmentExpr) - continue; - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, - Op->getReg(), Variable, *FragmentExpr)); - Offset += RegAndSize.second; - } + splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; } + Op = MachineOperand::CreateReg(VMI->second, false); IsIndirect = IsDbgDeclare; + } else if (ArgRegsAndSizes.size() > 1) { + // This was split due to the calling convention, and no virtual register + // mapping exists for the value. + splitMultiRegDbgValue(ArgRegsAndSizes); + return true; } } @@ -5468,8 +5570,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); IsIndirect = (Op->isReg()) ? IsIndirect : true; + if (IsIndirect) + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, *Op, Variable, Expr)); return true; @@ -5554,11 +5658,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::sponentry: setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, - TLI.getPointerTy(DAG.getDataLayout()))); + TLI.getFrameIndexTy(DAG.getDataLayout()))); return; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, - TLI.getPointerTy(DAG.getDataLayout()), + TLI.getFrameIndexTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; case Intrinsic::read_register: { @@ -5888,65 +5992,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::masked_compressstore: visitMaskedStore(I, true /* IsCompressing */); return; - case Intrinsic::x86_mmx_pslli_w: - case Intrinsic::x86_mmx_pslli_d: - case Intrinsic::x86_mmx_pslli_q: - case Intrinsic::x86_mmx_psrli_w: - case Intrinsic::x86_mmx_psrli_d: - case Intrinsic::x86_mmx_psrli_q: - case Intrinsic::x86_mmx_psrai_w: - case Intrinsic::x86_mmx_psrai_d: { - SDValue ShAmt = getValue(I.getArgOperand(1)); - if (isa<ConstantSDNode>(ShAmt)) { - visitTargetIntrinsic(I, Intrinsic); - return; - } - unsigned NewIntrinsic = 0; - EVT ShAmtVT = MVT::v2i32; - switch (Intrinsic) { - case Intrinsic::x86_mmx_pslli_w: - NewIntrinsic = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntrinsic = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntrinsic = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntrinsic = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntrinsic = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntrinsic = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntrinsic = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntrinsic = Intrinsic::x86_mmx_psra_d; - break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - } - - // The vector shift intrinsics with scalars uses 32b shift amounts but - // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits - // to be zero. - // We must do this early because v2i32 is not a legal type. - SDValue ShOps[2]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); - ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); - EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); - Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, - DAG.getConstant(NewIntrinsic, sdl, MVT::i32), - getValue(I.getArgOperand(0)), ShAmt); - setValue(&I, Res); - return; - } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); @@ -6063,6 +6108,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -6075,12 +6122,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); @@ -6272,6 +6323,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op3)); return; } + case Intrinsic::umul_fix_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, + Op3)); + return; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( @@ -6347,29 +6406,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.setRoot(Res); return; } - case Intrinsic::objectsize: { - // If we don't know by now, we're never going to know. - ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); - - assert(CI && "Non-constant type in __builtin_object_size?"); - - SDValue Arg = getValue(I.getCalledValue()); - EVT Ty = Arg.getValueType(); - - if (CI->isZero()) - Res = DAG.getConstant(-1ULL, sdl, Ty); - else - Res = DAG.getConstant(0, sdl, Ty); - - setValue(&I, Res); - return; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); case Intrinsic::is_constant: - // If this wasn't constant-folded away by now, then it's not a - // constant. - setValue(&I, DAG.getConstant(0, sdl, MVT::i1)); - return; + llvm_unreachable("llvm.is.constant.* should have been lowered already"); case Intrinsic::annotation: case Intrinsic::ptr_annotation: @@ -6818,6 +6859,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Val); return; } + case Intrinsic::ptrmask: { + SDValue Ptr = getValue(I.getOperand(0)); + SDValue Const = getValue(I.getOperand(1)); + + EVT DestVT = + EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + + setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr, + DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT))); + return; + } } } @@ -6845,6 +6897,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptosi: + Opcode = ISD::STRICT_FP_TO_SINT; + break; + case Intrinsic::experimental_constrained_fptoui: + Opcode = ISD::STRICT_FP_TO_UINT; + break; case Intrinsic::experimental_constrained_fptrunc: Opcode = ISD::STRICT_FP_ROUND; break; @@ -6881,6 +6939,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_log2: Opcode = ISD::STRICT_FLOG2; break; + case Intrinsic::experimental_constrained_lrint: + Opcode = ISD::STRICT_LRINT; + break; + case Intrinsic::experimental_constrained_llrint: + Opcode = ISD::STRICT_LLRINT; + break; case Intrinsic::experimental_constrained_rint: Opcode = ISD::STRICT_FRINT; break; @@ -6899,6 +6963,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_floor: Opcode = ISD::STRICT_FFLOOR; break; + case Intrinsic::experimental_constrained_lround: + Opcode = ISD::STRICT_LROUND; + break; + case Intrinsic::experimental_constrained_llround: + Opcode = ISD::STRICT_LLROUND; + break; case Intrinsic::experimental_constrained_round: Opcode = ISD::STRICT_FROUND; break; @@ -7102,7 +7172,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - unsigned VReg = SwiftError.getOrCreateVRegDefAt( + Register VReg = SwiftError.getOrCreateVRegDefAt( CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); DAG.setRoot(CopyNode); @@ -8021,6 +8091,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(T, SDValue()); + if (T.ConstraintType == TargetLowering::C_Immediate && + OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand)) + // We've delayed emitting a diagnostic like the "n" constraint because + // inlining could cause an integer showing up. + return emitInlineAsmError( + CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an " + "integer constant expression"); + ExtraInfo.update(T); } @@ -8105,7 +8183,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory || - (OpInfo.ConstraintType == TargetLowering::C_Other && + ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect)) { unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); @@ -8119,13 +8198,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; - } else if ((OpInfo.ConstraintType == TargetLowering::C_Other && + } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && !OpInfo.isIndirect) || OpInfo.ConstraintType == TargetLowering::C_Register || OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { // Otherwise, this outputs to a register (directly for C_Register / - // C_RegisterClass, and a target-defined fashion for C_Other). Find a - // register that we can use. + // C_RegisterClass, and a target-defined fashion for + // C_Immediate/C_Other). Find a register that we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( CS, "couldn't allocate output register for constraint '" + @@ -8205,15 +8285,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Treat indirect 'X' constraint as memory. - if (OpInfo.ConstraintType == TargetLowering::C_Other && + if ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; - if (OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate) + if (isa<ConstantSDNode>(InOperandVal)) { + emitInlineAsmError(CS, "value out of range for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; @@ -8250,7 +8339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || - OpInfo.ConstraintType == TargetLowering::C_Register) && + OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_Immediate) && "Unknown constraint type!"); // TODO: Support this. @@ -8356,6 +8446,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { Val = OpInfo.AssignedRegs.getCopyFromRegs( DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); break; + case TargetLowering::C_Immediate: case TargetLowering::C_Other: Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), OpInfo, DAG); @@ -9018,7 +9109,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); if (Args[i].Ty->isPointerTy()) { Flags.setPointer(); @@ -9073,7 +9164,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { FrameAlign = Args[i].Alignment; else FrameAlign = getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Args[i].IsNest) Flags.setNest(); @@ -9129,7 +9220,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } @@ -9259,7 +9350,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert((Op.getOpcode() != ISD::CopyFromReg || cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); - assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + assert(!Register::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If this is an InlineAsm we have to match the registers required, not the @@ -9516,8 +9607,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = - TLI->getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment( + TLI->getABIAlignmentForCallingConv(ArgTy, DL)); if (Arg.getType()->isPointerTy()) { Flags.setPointer(); @@ -9577,7 +9668,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); @@ -9586,6 +9677,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setOrigAlign(OriginalAlignment); if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); + if (Arg.hasAttribute(Attribute::Returned)) + Flags.setReturned(); MVT RegisterVT = TLI->getRegisterTypeForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); @@ -9598,7 +9691,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } @@ -9650,7 +9743,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); - unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); + Register SRetReg = + RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); @@ -9748,10 +9842,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } + // Analyses past this point are naive and don't expect an assertion. + if (Res.getOpcode() == ISD::AssertZext) + Res = Res.getOperand(0); + // Update the SwiftErrorVRegDefMap. if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), Reg); } @@ -9763,7 +9861,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // FIXME: This isn't very clean... it would be nice to make this more // general. unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { FuncInfo->ValueMap[&Arg] = Reg; continue; } @@ -10087,8 +10185,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, break; } case CC_BitTests: { - // FIXME: If Fallthrough is unreachable, skip the range check. - // FIXME: Optimize away range check based on pivot comparisons. BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; @@ -10109,6 +10205,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->DefaultProb -= DefaultProb / 2; } + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + BTB->OmitRangeCheck = true; + } + // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 0072e33f23b7..bfcf30b430b6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -426,7 +426,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol) : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), - SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), + SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), SwiftError(swifterror) {} void init(GCFunctionInfo *gfi, AliasAnalysis *AA, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index da3049881d31..bc10f7621239 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -280,6 +280,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::SPLAT_VECTOR: return "splat_vector"; case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; @@ -305,6 +306,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SMULFIX: return "smulfix"; case ISD::SMULFIXSAT: return "smulfixsat"; case ISD::UMULFIX: return "umulfix"; + case ISD::UMULFIXSAT: return "umulfixsat"; // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; @@ -318,22 +320,27 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_ROUND: return "fp_round"; case ISD::STRICT_FP_ROUND: return "strict_fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; - case ISD::FP_ROUND_INREG: return "fp_round_inreg"; case ISD::FP_EXTEND: return "fp_extend"; case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::LROUND: return "lround"; + case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; + case ISD::STRICT_LLROUND: return "strict_llround"; case ISD::LRINT: return "lrint"; + case ISD::STRICT_LRINT: return "strict_lrint"; case ISD::LLRINT: return "llrint"; + case ISD::STRICT_LLRINT: return "strict_llrint"; // Control flow instructions case ISD::BR: return "br"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index bdf9f2c166e1..1f07a241a824 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -434,9 +435,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; - ORE = make_unique<OptimizationRemarkEmitter>(&Fn); + ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn); auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); @@ -524,8 +525,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (TargetRegisterInfo::isVirtualRegister(From) && - TargetRegisterInfo::isVirtualRegister(To)) + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -572,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { bool hasFI = MI->getOperand(0).isFI(); Register Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); @@ -582,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { Def->getParent()->insert(std::next(InsertPos), MI); } else LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg" - << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); + << Register::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. @@ -671,8 +671,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (TargetRegisterInfo::isVirtualRegister(From) && - TargetRegisterInfo::isVirtualRegister(To)) + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -760,7 +759,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + if (!Register::isVirtualRegister(DestReg)) continue; // Ignore non-integer values. @@ -1652,9 +1651,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // Make sure that the copy dest is not a vreg when the copy source is a // physical register. - if (!OPI2->isReg() || - (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && - TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && + Register::isPhysicalRegister(OPI2->getReg()))) return false; return true; @@ -2234,9 +2232,9 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - unsigned Reg = + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), - *CurDAG); + CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); @@ -2248,9 +2246,9 @@ void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getOperand(2).getValueType(), - *CurDAG); + CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); @@ -3323,10 +3321,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; } - case OPC_EmitCopyToReg: { + case OPC_EmitCopyToReg: + case OPC_EmitCopyToReg2: { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; + if (Opcode == OPC_EmitCopyToReg2) + DestPhysReg |= MatcherTable[MatcherIndex++] << 8; if (!InputChain.getNode()) InputChain = CurDAG->getEntryNode(); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 395e9a8a4fc5..fad98b6f50dc 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -378,7 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // We use TargetFrameIndex so that isel will not select it into LEA Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); -#ifndef NDEBUG // Right now we always allocate spill slots that are of the same // size as the value we're about to spill (the size of spillee can // vary since we spill vectors of pointers too). At some point we @@ -387,12 +386,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() && "Bad spill: stack slot does not match!"); -#endif + // Note: Using the alignment of the spill slot (rather than the abi or + // preferred alignment) is required for correctness when dealing with spill + // slots with preferred alignments larger than frame alignment.. auto &MF = Builder.DAG.getMachineFunction(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *StoreMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - PtrInfo); + StoreMMO); MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc)); @@ -1011,20 +1016,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { return; } - SDValue SpillSlot = - DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy()); + unsigned Index = *DerivedPtrLocation; + SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); // Note: We know all of these reloads are independent, but don't bother to // exploit that chain wise. DAGCombine will happily do so as needed, so // doing it here would be a small compile time win at most. SDValue Chain = getRoot(); - SDValue SpillLoad = - DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - Relocate.getType()), - getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), - *DerivedPtrLocation)); + auto &MF = DAG.getMachineFunction(); + auto &MFI = MF.getFrameInfo(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *LoadMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); + + auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + Relocate.getType()); + + SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, + SpillSlot, LoadMMO); DAG.setRoot(SpillLoad.getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b260cd91d468..9ab1324533f1 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -37,7 +36,7 @@ using namespace llvm; /// NOTE: The TargetMachine owns TLOF. TargetLowering::TargetLowering(const TargetMachine &tm) - : TargetLoweringBase(tm) {} + : TargetLoweringBase(tm) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; @@ -80,7 +79,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, const CCValAssign &ArgLoc = ArgLocs[I]; if (!ArgLoc.isRegLoc()) continue; - unsigned Reg = ArgLoc.getLocReg(); + Register Reg = ArgLoc.getLocReg(); // Only look at callee saved registers. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) continue; @@ -121,19 +120,25 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, /// result of type RetVT. std::pair<SDValue, SDValue> TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, - ArrayRef<SDValue> Ops, bool isSigned, - const SDLoc &dl, bool doesNotReturn, - bool isReturnValueUsed, - bool isPostTypeLegalization) const { + ArrayRef<SDValue> Ops, + MakeLibCallOptions CallOptions, + const SDLoc &dl) const { TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; - for (SDValue Op : Ops) { - Entry.Node = Op; + for (unsigned i = 0; i < Ops.size(); ++i) { + SDValue NewOp = Ops[i]; + Entry.Node = NewOp; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); - Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(), + CallOptions.IsSExt); + Entry.IsZExt = !Entry.IsSExt; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) { + Entry.IsSExt = Entry.IsZExt = false; + } Args.push_back(Entry); } @@ -144,15 +149,22 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); - bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt); + bool zeroExtend = !signExtend; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) { + signExtend = zeroExtend = false; + } + CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) - .setNoReturn(doesNotReturn) - .setDiscardResult(!isReturnValueUsed) - .setIsPostTypeLegalization(isPostTypeLegalization) + .setNoReturn(CallOptions.DoesNotReturn) + .setDiscardResult(!CallOptions.IsReturnValueUsed) + .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) .setSExtResult(signExtend) - .setZExtResult(!signExtend); + .setZExtResult(zeroExtend); return LowerCallTo(CLI); } @@ -263,7 +275,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &dl) const { + const SDLoc &dl, const SDValue OldLHS, + const SDValue OldRHS) const { assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); @@ -365,8 +378,11 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = {NewLHS, NewRHS}; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/, - dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { OldLHS.getValueType(), + OldRHS.getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first; NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); @@ -378,8 +394,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/, - dl).first; + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first; NewLHS = DAG.getNode( ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), @@ -564,6 +579,170 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, AssumeSingleUse); } +// TODO: Can we merge SelectionDAG::GetDemandedBits into this? +// TODO: Under what circumstances can we create nodes? Constant folding? +SDValue TargetLowering::SimplifyMultipleUseDemandedBits( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + // Limit search depth. + if (Depth >= SelectionDAG::MaxRecursionDepth) + return SDValue(); + + // Ignore UNDEFs. + if (Op.isUndef()) + return SDValue(); + + // Not demanding any bits/elts from Op. + if (DemandedBits == 0 || DemandedElts == 0) + return DAG.getUNDEF(Op.getValueType()); + + unsigned NumElts = DemandedElts.getBitWidth(); + KnownBits LHSKnown, RHSKnown; + switch (Op.getOpcode()) { + case ISD::BITCAST: { + SDValue Src = peekThroughBitcasts(Op.getOperand(0)); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); + unsigned NumDstEltBits = DstVT.getScalarSizeInBits(); + + if (NumSrcEltBits == NumDstEltBits) + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, DemandedElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + + // TODO - bigendian once we have test coverage. + if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumDstEltBits / NumSrcEltBits; + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != Scale; ++i) { + unsigned Offset = i * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + if (!Sub.isNullValue()) { + DemandedSrcBits |= Sub; + for (unsigned j = 0; j != NumElts; ++j) + if (DemandedElts[j]) + DemandedSrcElts.setBit((j * Scale) + i); + } + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + // TODO - bigendian once we have test coverage. + if ((NumSrcEltBits % NumDstEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumSrcEltBits / NumDstEltBits; + unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % Scale) * NumDstEltBits; + DemandedSrcBits.insertBits(DemandedBits, Offset); + DemandedSrcElts.setBit(i / Scale); + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + break; + } + case ISD::AND: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) + return Op.getOperand(1); + break; + } + case ISD::OR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::XOR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. + if (DemandedBits.isSubsetOf(RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::SIGN_EXTEND_INREG: { + // If none of the extended bits are demanded, eliminate the sextinreg. + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits()) + return Op.getOperand(0); + break; + } + case ISD::INSERT_VECTOR_ELT: { + // If we don't demand the inserted element, return the base vector. + SDValue Vec = Op.getOperand(0); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + EVT VecVT = Vec.getValueType(); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && + !DemandedElts[CIdx->getZExtValue()]) + return Vec; + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + + // If all the demanded elts are from one operand and are inline, + // then we can use the operand directly. + bool AllUndef = true, IdentityLHS = true, IdentityRHS = true; + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0 || !DemandedElts[i]) + continue; + AllUndef = false; + IdentityLHS &= (M == (int)i); + IdentityRHS &= ((M - NumElts) == i); + } + + if (AllUndef) + return DAG.getUNDEF(Op.getValueType()); + if (IdentityLHS) + return Op.getOperand(0); + if (IdentityRHS) + return Op.getOperand(1); + break; + } + default: + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) + if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode( + Op, DemandedBits, DemandedElts, DAG, Depth)) + return V; + break; + } + return SDValue(); +} + /// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -619,12 +798,15 @@ bool TargetLowering::SimplifyDemandedBits( } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); - } else if (Depth == 6) { // Limit search depth. + } else if (Depth >= SelectionDAG::MaxRecursionDepth) { + // Limit search depth. return false; } KnownBits Known2, KnownOut; switch (Op.getOpcode()) { + case ISD::TargetConstant: + llvm_unreachable("Can't simplify this node"); case ISD::SCALAR_TO_VECTOR: { if (!DemandedElts[0]) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -728,6 +910,21 @@ bool TargetLowering::SimplifyDemandedBits( } break; } + case ISD::EXTRACT_SUBVECTOR: { + // If index isn't constant, assume we need all the source vector elements. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt SrcElts = APInt::getAllOnesValue(NumSrcElts); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + } + if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1)) + return true; + break; + } case ISD::CONCAT_VECTORS: { Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -773,22 +970,37 @@ bool TargetLowering::SimplifyDemandedBits( } if (!!DemandedLHS || !!DemandedRHS) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + Known.Zero.setAllBits(); Known.One.setAllBits(); if (!!DemandedLHS) { - if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS, - Known2, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO, + Depth + 1)) return true; Known.One &= Known2.One; Known.Zero &= Known2.Zero; } if (!!DemandedRHS) { - if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS, - Known2, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO, + Depth + 1)) return true; Known.One &= Known2.One; Known.Zero &= Known2.Zero; } + + // Attempt to avoid multi-use ops if we don't need anything from them. + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask); + return TLO.CombineTo(Op, NewOp); + } } break; } @@ -834,6 +1046,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) @@ -869,6 +1095,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) @@ -901,6 +1141,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if (DemandedBits.isSubsetOf(Known.Zero)) @@ -1034,7 +1288,7 @@ bool TargetLowering::SimplifyDemandedBits( // out) are never demanded. // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { - if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { if (SA2->getAPIntValue().ult(BitWidth)) { @@ -1141,7 +1395,8 @@ bool TargetLowering::SimplifyDemandedBits( if (Op0.getOpcode() == ISD::SHL) { if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { - if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { + if (!DemandedBits.intersects( + APInt::getHighBitsSet(BitWidth, ShAmt))) { if (SA2->getAPIntValue().ult(BitWidth)) { unsigned C1 = SA2->getZExtValue(); unsigned Opc = ISD::SRL; @@ -1479,6 +1734,11 @@ bool TargetLowering::SimplifyDemandedBits( return true; Known = Known.trunc(BitWidth); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc)); + // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. if (Src.getNode()->hasOneUse()) { @@ -1595,9 +1855,7 @@ bool TargetLowering::SimplifyDemandedBits( // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. // Demand the elt/bit if any of the original elts/bits are demanded. // TODO - bigendian once we have test coverage. - // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support. - if (SrcVT.isVector() && NumSrcEltBits > 1 && - (BitWidth % NumSrcEltBits) == 0 && + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); @@ -1663,6 +1921,7 @@ bool TargetLowering::SimplifyDemandedBits( // Add, Sub, and Mul don't demand any bits in positions beyond that // of the highest bit demanded of them. SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + SDNodeFlags Flags = Op.getNode()->getFlags(); unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros(); APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, @@ -1671,7 +1930,6 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { - SDNodeFlags Flags = Op.getNode()->getFlags(); if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. @@ -1684,6 +1942,23 @@ bool TargetLowering::SimplifyDemandedBits( return true; } + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); + return TLO.CombineTo(Op, NewOp); + } + } + // If we have a constant operand, we may be able to turn it into -1 if we // do not demand the high bits. This can make the constant smaller to // encode, allow more general folding, or match specialized instruction @@ -1694,10 +1969,8 @@ bool TargetLowering::SimplifyDemandedBits( if (C && !C->isAllOnesValue() && !C->isOne() && (C->getAPIntValue() | HighMask).isAllOnesValue()) { SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); - // We can't guarantee that the new math op doesn't wrap, so explicitly - // clear those flags to prevent folding with a potential existing node - // that has those flags set. - SDNodeFlags Flags; + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); @@ -1837,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( } // Limit search depth. - if (Depth >= 6) + if (Depth >= SelectionDAG::MaxRecursionDepth) return false; SDLoc DL(Op); @@ -2001,6 +2274,15 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; APInt BaseElts = DemandedElts; BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); + + // If none of the base operand elements are demanded, replace it with undef. + if (!BaseElts && !Base.isUndef()) + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + TLO.DAG.getUNDEF(VT), + Op.getOperand(1), + Op.getOperand(2))); + if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; @@ -2134,11 +2416,13 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Update legal shuffle masks based on demanded elements if it won't reduce // to Identity which can cause premature removal of the shuffle mask. - if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps && - isShuffleMaskLegal(NewMask, VT)) - return TLO.CombineTo(Op, - TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0), - Op.getOperand(1), NewMask)); + if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) { + SDValue LegalShuffle = + buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1), + NewMask, TLO.DAG); + if (LegalShuffle) + return TLO.CombineTo(Op, LegalShuffle); + } // Propagate undef/zero elements from LHS/RHS. for (unsigned i = 0; i != NumElts; ++i) { @@ -2304,6 +2588,13 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.resetAll(); } +void TargetLowering::computeKnownBitsForTargetInstr( + GISelKnownBits &Analysis, Register R, KnownBits &Known, + const APInt &DemandedElts, const MachineRegisterInfo &MRI, + unsigned Depth) const { + Known.resetAll(); +} + void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, @@ -2357,6 +2648,36 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode( return false; } +SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + assert( + (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" + " is a target node!"); + return SDValue(); +} + +SDValue +TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, + SDValue N1, MutableArrayRef<int> Mask, + SelectionDAG &DAG) const { + bool LegalMask = isShuffleMaskLegal(Mask, VT); + if (!LegalMask) { + std::swap(N0, N1); + ShuffleVectorSDNode::commuteMask(Mask); + LegalMask = isShuffleMaskLegal(Mask, VT); + } + + if (!LegalMask) + return SDValue(); + + return DAG.getVectorShuffle(VT, DL, N0, N1, Mask); +} + const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const { return nullptr; } @@ -2610,6 +2931,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( return T2; } +// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 +SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const { + assert(isConstOrConstSplat(N1C) && + isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Valid only for [in]equality comparisons."); + + unsigned NewShiftOpcode; + SDValue X, C, Y; + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Look for '(C l>>/<< Y)'. + auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) { + // The shift should be one-use. + if (!V.hasOneUse()) + return false; + unsigned OldShiftOpcode = V.getOpcode(); + switch (OldShiftOpcode) { + case ISD::SHL: + NewShiftOpcode = ISD::SRL; + break; + case ISD::SRL: + NewShiftOpcode = ISD::SHL; + break; + default: + return false; // must be a logical shift. + } + // We should be shifting a constant. + // FIXME: best to use isConstantOrConstantVector(). + C = V.getOperand(0); + ConstantSDNode *CC = + isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + if (!CC) + return false; + Y = V.getOperand(1); + + ConstantSDNode *XC = + isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG); + }; + + // LHS of comparison should be an one-use 'and'. + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + + X = N0.getOperand(0); + SDValue Mask = N0.getOperand(1); + + // 'and' is commutative! + if (!Match(Mask)) { + std::swap(X, Mask); + if (!Match(Mask)) + return SDValue(); + } + + EVT VT = X.getValueType(); + + // Produce: + // ((X 'OppositeShiftOpcode' Y) & C) Cond 0 + SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y); + SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond); + return T2; +} + /// Try to fold an equality comparison with a {add/sub/xor} binary operation as /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to /// handle the commuted versions of these patterns. @@ -2726,9 +3118,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (ctpop x) u< 2 -> (x & x-1) == 0 // (ctpop x) u> 1 -> (x & x-1) != 0 if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ - SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, - DAG.getConstant(1, dl, CTVT)); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); } @@ -2852,7 +3244,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; - if (!Lod->isVolatile() && Lod->isUnindexed()) { + if (Lod->isSimple() && Lod->isUnindexed()) { unsigned origWidth = N0.getValueSizeInBits(); unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to @@ -3178,6 +3570,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 + if (C1.isNullValue()) + if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( + VT, N0, N1, Cond, DCI, dl)) + return CC; + } + // If we have "setcc X, C0", check to see if we can shrink the immediate // by changing cc. // TODO: Support this for vectors after legalize ops. @@ -3203,33 +3603,35 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Back to non-vector simplifications. // TODO: Can we do these for vector splats? if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const APInt &C1 = N1C->getAPIntValue(); + EVT ShValTy = N0.getValueType(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - (VT == N0.getValueType() || - (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); + EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. - if (AndRHS->getAPIntValue().isPowerOf2()) { + unsigned ShCt = AndRHS->getAPIntValue().logBase2(); + if (AndRHS->getAPIntValue().isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 // Perform the xform if C1 is a single bit. - if (C1.isPowerOf2()) { + unsigned ShCt = C1.logBase2(); + if (C1.isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(C1.logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } } @@ -3452,15 +3854,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Fold remainder of division by a constant. - if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) && + N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // When division is cheap or optimizing for minimum size, // fall through to DIVREM creation by skipping this fold. - if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) - if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) - return Folded; + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { + if (N0.getOpcode() == ISD::UREM) { + if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } else if (N0.getOpcode() == ISD::SREM) { + if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } + } } // Fold away ALL boolean setcc's. @@ -3567,15 +3975,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const { if (S == 1) { switch (Constraint[0]) { default: break; - case 'r': return C_RegisterClass; + case 'r': + return C_RegisterClass; case 'm': // memory case 'o': // offsetable case 'V': // not offsetable return C_Memory; - case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer case 'E': // Floating Point Constant case 'F': // Floating Point Constant + return C_Immediate; + case 'i': // Simple Integer or Relocatable Constant case 's': // Relocatable Constant case 'p': // Address. case 'X': // Allow ANY value. @@ -3950,6 +4360,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, /// Return an integer indicating how general CT is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { + case TargetLowering::C_Immediate: case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -4069,11 +4480,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); - // If this is an 'other' constraint, see if the operand is valid for it. - // For example, on X86 we might have an 'rI' constraint. If the operand - // is an integer in the range [0..31] we want to use I (saving a load - // of a register), otherwise we must use 'r'. - if (CType == TargetLowering::C_Other && Op.getNode()) { + // If this is an 'other' or 'immediate' constraint, see if the operand is + // valid for it. For example, on X86 we might have an 'rI' constraint. If + // the operand is an integer in the range [0..31] we want to use I (saving a + // load of a register), otherwise we must use 'r'. + if ((CType == TargetLowering::C_Other || + CType == TargetLowering::C_Immediate) && Op.getNode()) { assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector<SDValue> ResultOps; @@ -4455,6 +4867,34 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return DAG.getSelect(dl, VT, IsOne, N0, Q); } +/// If all values in Values that *don't* match the predicate are same 'splat' +/// value, then replace all values with that splat value. +/// Else, if AlternativeReplacement was provided, then replace all values that +/// do match predicate with AlternativeReplacement value. +static void +turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values, + std::function<bool(SDValue)> Predicate, + SDValue AlternativeReplacement = SDValue()) { + SDValue Replacement; + // Is there a value for which the Predicate does *NOT* match? What is it? + auto SplatValue = llvm::find_if_not(Values, Predicate); + if (SplatValue != Values.end()) { + // Does Values consist only of SplatValue's and values matching Predicate? + if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) { + return Value == *SplatValue || Predicate(Value); + })) // Then we shall replace values matching predicate with SplatValue. + Replacement = *SplatValue; + } + if (!Replacement) { + // Oops, we did not find the "baseline" splat value. + if (!AlternativeReplacement) + return; // Nothing to do. + // Let's replace with provided value then. + Replacement = AlternativeReplacement; + } + std::replace_if(Values.begin(), Values.end(), Predicate, Replacement); +} + /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE /// where the divisor is constant and the comparison target is zero, /// return a DAG expression that will generate the same comparison result @@ -4482,77 +4922,409 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, DAGCombinerInfo &DCI, const SDLoc &DL, SmallVectorImpl<SDNode *> &Created) const { // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) - // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1 + // - D must be constant, with D = D0 * 2^K where D0 is odd // - P is the multiplicative inverse of D0 modulo 2^W - // - Q = floor((2^W - 1) / D0) + // - Q = floor(((2^W) - 1) / D) // where W is the width of the common type of N and D. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons."); + SelectionDAG &DAG = DCI.DAG; + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); // If MUL is unavailable, we cannot proceed in any case. if (!isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); - // TODO: Add non-uniform constant support. - ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1)); + // TODO: Could support comparing with non-zero too. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!Divisor || !CompTarget || Divisor->isNullValue() || - !CompTarget->isNullValue()) + if (!CompTarget || !CompTarget->isNullValue()) return SDValue(); - const APInt &D = Divisor->getAPIntValue(); + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector<SDValue, 16> PAmts, KAmts, QAmts; + + auto BuildUREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; + + const APInt &D = C->getAPIntValue(); + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + // D is even if it has trailing zeros. + HadEvenDivisor |= (K != 0); + // D is a power-of-two if D0 is one. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // Q = floor((2^W - 1) / D) + APInt Q = APInt::getAllOnesValue(W).udiv(D); + + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. + if (D.isOneValue()) { + // Set P and K amount to a bogus values so we can try to splat them. + P = 0; + K = -1; + assert(Q.isAllOnesValue() && + "Expecting all-ones comparison for one divisor"); + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); - // Decompose D into D0 * 2^K - unsigned K = D.countTrailingZeros(); - bool DivisorIsEven = (K != 0); - APInt D0 = D.lshr(K); + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) + return SDValue(); - // The fold is invalid when D0 == 1. - // This is reachable because visitSetCC happens before visitREM. - if (D0.isOneValue()) + // If this is a urem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) return SDValue(); - // P = inv(D0, 2^W) - // 2^W requires W + 1 bits, so we have to extend and then truncate. - unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + // If this is a urem by a powers-of-two, avoid the fold since it can be + // best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); - // Q = floor((2^W - 1) / D) - APInt Q = APInt::getAllOnesValue(W).udiv(D); + SDValue PVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } - SelectionDAG &DAG = DCI.DAG; + PVal = DAG.getBuildVector(VT, DL, PAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } - SDValue PVal = DAG.getConstant(P, DL, VT); - SDValue QVal = DAG.getConstant(Q, DL, VT); // (mul N, P) - SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal); - Created.push_back(Op1.getNode()); + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); - // Rotate right only if D was even. - if (DivisorIsEven) { + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { // We need ROTR to do this. if (!isOperationLegalOrCustom(ISD::ROTR, VT)) return SDValue(); - SDValue ShAmt = - DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout())); SDNodeFlags Flags; Flags.setExact(true); // UREM: (rotr (mul N, P), K) - Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags); - Created.push_back(Op1.getNode()); + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); } // UREM: (setule/setugt (rotr (mul N, P), K), Q) - return DAG.getSetCC(DL, SETCCVT, Op1, QVal, + return DAG.getSetCC(DL, SETCCVT, Op0, QVal, ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); } +/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE +/// where the divisor is constant and the comparison target is zero, +/// return a DAG expression that will generate the same comparison result +/// using only multiplications, additions and shifts/rotations. +/// Ref: "Hacker's Delight" 10-17. +SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + SmallVector<SDNode *, 7> Built; + if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, + DCI, DL, Built)) { + assert(Built.size() <= 7 && "Max size prediction failed."); + for (SDNode *N : Built) + DCI.AddToWorklist(N); + return Folded; + } + + return SDValue(); +} + +SDValue +TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl<SDNode *> &Created) const { + // Fold: + // (seteq/ne (srem N, D), 0) + // To: + // (setule/ugt (rotr (add (mul N, P), A), K), Q) + // + // - D must be constant, with D = D0 * 2^K where D0 is odd + // - P is the multiplicative inverse of D0 modulo 2^W + // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k))) + // - Q = floor((2 * A) / (2^K)) + // where W is the width of the common type of N and D. + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Only applicable for (in)equality comparisons."); + + SelectionDAG &DAG = DCI.DAG; + + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + + // If MUL is unavailable, we cannot proceed in any case. + if (!isOperationLegalOrCustom(ISD::MUL, VT)) + return SDValue(); + + // TODO: Could support comparing with non-zero too. + ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); + if (!CompTarget || !CompTarget->isNullValue()) + return SDValue(); + + bool HadIntMinDivisor = false; + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool NeedToApplyOffset = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts; + + auto BuildSREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; + + // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. + + // WARNING: this fold is only valid for positive divisors! + APInt D = C->getAPIntValue(); + if (D.isNegative()) + D.negate(); // `rem %X, -C` is equivalent to `rem %X, C` + + HadIntMinDivisor |= D.isMinSignedValue(); + + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + if (!D.isMinSignedValue()) { + // D is even if it has trailing zeros; unless it's INT_MIN, in which case + // we don't care about this lane in this fold, we'll special-handle it. + HadEvenDivisor |= (K != 0); + } + + // D is a power-of-two if D0 is one. This includes INT_MIN. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // A = floor((2^(W - 1) - 1) / D0) & -2^K + APInt A = APInt::getSignedMaxValue(W).udiv(D0); + A.clearLowBits(K); + + if (!D.isMinSignedValue()) { + // If divisor INT_MIN, then we don't care about this lane in this fold, + // we'll special-handle it. + NeedToApplyOffset |= A != 0; + } + + // Q = floor((2 * A) / (2^K)) + APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); + + assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && + "We are expecting that A is always less than all-ones for SVT"); + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. Likewise, we + // don't care about INT_MIN lanes, those can be set to undef if appropriate. + if (D.isOneValue()) { + // Set P, A and K to a bogus values so we can try to splat them. + P = 0; + A = -1; + K = -1; + + // x ?% 1 == 0 <--> true <--> x u<= -1 + Q = -1; + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + AAmts.push_back(DAG.getConstant(A, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); + + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildSREMPattern)) + return SDValue(); + + // If this is a srem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) + return SDValue(); + + // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold + // since it can be best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); + + SDValue PVal, AVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn AAmts into a splat, since we don't care about the + // values that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(AAmts, isAllOnesConstant, + DAG.getConstant(0, DL, SVT)); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } + + PVal = DAG.getBuildVector(VT, DL, PAmts); + AVal = DAG.getBuildVector(VT, DL, AAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + AVal = AAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } + + // (mul N, P) + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); + + if (NeedToApplyOffset) { + // We need ADD to do this. + if (!isOperationLegalOrCustom(ISD::ADD, VT)) + return SDValue(); + + // (add (mul N, P), A) + Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal); + Created.push_back(Op0.getNode()); + } + + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { + // We need ROTR to do this. + if (!isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + SDNodeFlags Flags; + Flags.setExact(true); + // SREM: (rotr (add (mul N, P), A), K) + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); + } + + // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q) + SDValue Fold = + DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + + // If we didn't have lanes with INT_MIN divisor, then we're done. + if (!HadIntMinDivisor) + return Fold; + + // That fold is only valid for positive divisors. Which effectively means, + // it is invalid for INT_MIN divisors. So if we have such a lane, + // we must fix-up results for said lanes. + assert(VT.isVector() && "Can/should only get here for vectors."); + + if (!isOperationLegalOrCustom(ISD::SETEQ, VT) || + !isOperationLegalOrCustom(ISD::AND, VT) || + !isOperationLegalOrCustom(Cond, VT) || + !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + Created.push_back(Fold.getNode()); + + SDValue IntMin = DAG.getConstant( + APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue IntMax = DAG.getConstant( + APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue Zero = + DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT); + + // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded. + SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ); + Created.push_back(DivisorIsIntMin.getNode()); + + // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0 + SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax); + Created.push_back(Masked.getNode()); + SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond); + Created.push_back(MaskedIsZero.getNode()); + + // To produce final result we need to blend 2 vectors: 'SetCC' and + // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick + // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is + // constant-folded, select can get lowered to a shuffle with constant mask. + SDValue Blended = + DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold); + + return Blended; +} + bool TargetLowering:: verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { if (!isa<ConstantSDNode>(Op.getOperand(0))) { @@ -4564,6 +5336,246 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { return false; } +char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return 2; + + // Don't allow anything with multiple uses unless we know it is free. + EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); + const TargetOptions &Options = DAG.getTarget().Options; + if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType()))) + return 0; + + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return 0; + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + if (!LegalOperations) + return 1; + + // Don't invert constant FP values after legalization unless the target says + // the negated constant is legal. + return isOperationLegal(ISD::ConstantFP, VT) || + isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, + ForCodeSize); + } + case ISD::BUILD_VECTOR: { + // Only permit BUILD_VECTOR of constants. + if (llvm::any_of(Op->op_values(), [&](SDValue N) { + return !N.isUndef() && !isa<ConstantFPSDNode>(N); + })) + return 0; + if (!LegalOperations) + return 1; + if (isOperationLegal(ISD::ConstantFP, VT) && + isOperationLegal(ISD::BUILD_VECTOR, VT)) + return 1; + return llvm::all_of(Op->op_values(), [&](SDValue N) { + return N.isUndef() || + isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, + ForCodeSize); + }); + } + case ISD::FADD: + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT)) + return 0; + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + + // Ignore X * 2.0 because that is expected to be canonicalized to X + X. + if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) + if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) + return 0; + + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + case ISD::FMA: + case ISD::FMAD: { + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (!V2) + return 0; + + // One of Op0/Op1 must be cheaply negatible, then select the cheapest. + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V01 = std::max(V0, V1); + return V01 ? std::max(V01, V2) : 0; + } + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + } + + return 0; +} + +SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return Op.getOperand(0); + + assert(Depth <= SelectionDAG::MaxRecursionDepth && + "getNegatedExpression doesn't match isNegatibleForFree"); + const SDNodeFlags Flags = Op->getFlags(); + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); + } + case ISD::BUILD_VECTOR: { + SmallVector<SDValue, 4> Ops; + for (SDValue C : Op->op_values()) { + if (C.isUndef()) { + Ops.push_back(C); + continue; + } + APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); + V.changeSign(); + Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + } + return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); + } + case ISD::FADD: + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(0), Flags); + case ISD::FSUB: + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = + isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) + if (N0CFP->isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0), Flags); + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode( + Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), + getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1), + Flags); + + case ISD::FMA: + case ISD::FMAD: { + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V0 >= V1) { + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + SDValue Neg0 = getNegatedExpression( + Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0, + Op.getOperand(1), Neg2, Flags); + } + + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), Neg1, Neg2, Flags); + } + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1)); + } + + llvm_unreachable("Unknown code"); +} + //===----------------------------------------------------------------------===// // Legalization Utilities //===----------------------------------------------------------------------===// @@ -4862,7 +5874,8 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); SDLoc dl(SDValue(Node, 0)); @@ -4871,6 +5884,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, if (SrcVT != MVT::f32 || DstVT != MVT::i64) return false; + if (Node->isStrictFPOpcode()) + // When a NaN is converted to an integer a trap is allowed. We can't + // use this expansion here because it would eliminate that trap. Other + // traps are also allowed and cannot be eliminated. See + // IEEE 754-2008 sec 5.8. + return false; + // Expand f32 -> i64 conversion // This algorithm comes from compiler-rt's implementation of fixsfdi: // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c @@ -4924,9 +5944,11 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, } bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, + SDValue &Chain, SelectionDAG &DAG) const { SDLoc dl(SDValue(Node, 0)); - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -4934,7 +5956,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); // Only expand vector types if we have the appropriate vector bit operations. - if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) || + unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : + ISD::FP_TO_SINT; + if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT))) return false; @@ -4946,14 +5970,21 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { - Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + if (Node->isStrictFPOpcode()) { + Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), Src }); + Chain = Result.getValue(1); + } else + Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); return true; } SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); - bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + bool Strict = Node->isStrictFPOpcode() || + shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + if (Strict) { // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). @@ -4963,12 +5994,23 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // Result = fp_to_sint(Val) ^ Ofs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, - DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + SDValue SrcBiased; + if (Node->isStrictFPOpcode()) + SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Node->getOperand(0), Src, Cst }); + else + SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); + SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), DAG.getConstant(SignMask, dl, DstVT)); - Result = DAG.getNode(ISD::XOR, dl, DstVT, - DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs); + SDValue SInt; + if (Node->isStrictFPOpcode()) { + SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { SrcBiased.getValue(1), Val }); + Chain = SInt.getValue(1); + } else + SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) @@ -5918,7 +6960,8 @@ SDValue TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { assert((Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::UMULFIX || - Node->getOpcode() == ISD::SMULFIXSAT) && + Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT) && "Expected a fixed point multiplication opcode"); SDLoc dl(Node); @@ -5926,15 +6969,19 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { SDValue RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); unsigned Scale = Node->getConstantOperandVal(2); - bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT; + bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (Node->getOpcode() == ISD::SMULFIX || + Node->getOpcode() == ISD::SMULFIXSAT); EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); unsigned VTSize = VT.getScalarSizeInBits(); if (!Scale) { // [us]mul.fix(a, b, 0) -> mul(a, b) - if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) { - return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); - } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) { + if (!Saturating) { + if (isOperationLegalOrCustom(ISD::MUL, VT)) + return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) { SDValue Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); SDValue Product = Result.getValue(0); @@ -5948,11 +6995,18 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); return DAG.getSelect(dl, VT, Overflow, Result, Product); + } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) { + SDValue Result = + DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + SDValue Product = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMax, Product); } } - bool Signed = - Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT; assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) && "Expected scale to be less than the number of bits if signed or at " "most the number of bits if unsigned."); @@ -5978,7 +7032,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { if (Scale == VTSize) // Result is just the top half since we'd be shifting by the width of the - // operand. + // operand. Overflow impossible so this works for both UMULFIX and + // UMULFIXSAT. return Hi; // The result will need to be shifted right by the scale since both operands @@ -5990,20 +7045,55 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { if (!Saturating) return Result; - unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign - SDValue HiMask = - DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT); - SDValue LoMask = DAG.getConstant( - APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT); - APInt MaxVal = APInt::getSignedMaxValue(VTSize); - APInt MinVal = APInt::getSignedMinValue(VTSize); - - Result = DAG.getSelectCC(dl, Hi, LoMask, - DAG.getConstant(MaxVal, dl, VT), Result, - ISD::SETGT); - return DAG.getSelectCC(dl, Hi, HiMask, - DAG.getConstant(MinVal, dl, VT), Result, - ISD::SETLT); + if (!Signed) { + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the + // widened multiplication) aren't all zeroes. + + // Saturate to max if ((Hi >> Scale) != 0), + // which is the same as if (Hi > ((1 << Scale) - 1)) + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, + DAG.getConstant(MaxVal, dl, VT), Result, + ISD::SETUGT); + + return Result; + } + + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the + // widened multiplication) aren't all ones or all zeroes. + + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT); + + if (Scale == 0) { + SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo, + DAG.getConstant(VTSize - 1, dl, ShiftTy)); + SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE); + // Saturated to SatMin if wide product is negative, and SatMax if wide + // product is positive ... + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax, + ISD::SETLT); + // ... but only if we overflowed. + return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result); + } + + // We handled Scale==0 above so all the bits to examine is in Hi. + + // Saturate to max if ((Hi >> (Scale - 1)) > 0), + // which is the same as if (Hi > (1 << (Scale - 1)) - 1) + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT); + // Saturate to min if (Hi >> (Scale - 1)) < -1), + // which is the same as if (HI < (-1 << (Scale - 1)) + SDValue HighMask = + DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT); + return Result; } void TargetLowering::expandUADDSUBO( @@ -6060,24 +7150,19 @@ void TargetLowering::expandSADDSUBO( SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Result >= 0 - // - // Add: - // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) - // Sub: - // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) - SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); - SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); - SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, - IsAdd ? ISD::SETEQ : ISD::SETNE); - - SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE); - SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); - - SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType); + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT); + SDValue ConditionRHS = + DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT); + + Overflow = DAG.getBoolExtOrTrunc( + DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl, + ResultType, ResultType); } bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, @@ -6176,20 +7261,19 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, // being a legal type for the architecture and thus has to be split to // two arguments. SDValue Ret; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + CallOptions.setIsPostTypeLegalization(true); if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) { // Halves of WideVT are packed into registers in different order // depending on platform endianness. This is usually handled by // the C calling convention, but we can't defer to it in // the legalizer. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, - /* isPostTypeLegalization */ true).first; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; } else { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, - /* isPostTypeLegalization */ true).first; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; } assert(Ret.getOpcode() == ISD::MERGE_VALUES && "Ret value is a collection of constituent nodes holding result."); diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index 2db0ea570598..412a00095b9b 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -278,11 +278,10 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, // Ignore instructions like DBG_VALUE which don't read/def the register. if (!MO.isDef() && !MO.readsReg()) continue; - unsigned PhysReg = MO.getReg(); + Register PhysReg = MO.getReg(); if (!PhysReg) continue; - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && - "Unallocated register?!"); + assert(Register::isPhysicalRegister(PhysReg) && "Unallocated register?!"); // The stack pointer is not normally described as a callee-saved register // in calling convention definitions, so we need to watch for it // separately. An SP mentioned by a call instruction, we can ignore, diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 23e5ce0acae8..db520d4e6403 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -477,7 +477,10 @@ bool SjLjEHPrepare::runOnFunction(Function &F) { UnregisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), PointerType::getUnqual(FunctionContextTy)); - FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + FrameAddrFn = Intrinsic::getDeclaration( + &M, Intrinsic::frameaddress, + {Type::getInt8PtrTy(M.getContext(), + M.getDataLayout().getAllocaAddrSpace())}); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetupDispatchFn = diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 5c944fe3f6b3..0c1f1220c421 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "SplitKit.h" -#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" @@ -22,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -437,7 +437,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) { assert(DefMI != nullptr); LaneBitmask LM; for (const MachineOperand &DefOp : DefMI->defs()) { - unsigned R = DefOp.getReg(); + Register R = DefOp.getReg(); if (R != LI.reg) continue; if (unsigned SR = DefOp.getSubReg()) @@ -1373,7 +1373,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { assert(LI.hasSubRanges()); LiveRangeCalc SubLRC; - unsigned Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg(); + Register Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg(); LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub) : MRI.getMaxLaneMaskForVReg(Reg); for (LiveInterval::SubRange &S : LI.subranges()) { diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 86ad3811e3ad..78f0bbd24db5 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -14,7 +14,6 @@ #ifndef LLVM_LIB_CODEGEN_SPLITKIT_H #define LLVM_LIB_CODEGEN_SPLITKIT_H -#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" @@ -25,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SlotIndexes.h" diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index ae9401b89700..383c91259ffc 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -113,7 +113,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, unsigned Size = DL.getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; - unsigned Reg = (++MOI)->getReg(); + Register Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); Locs.emplace_back(StackMaps::Location::Direct, Size, getDwarfRegNum(Reg, TRI), Imm); @@ -122,7 +122,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, case StackMaps::IndirectMemRefOp: { int64_t Size = (++MOI)->getImm(); assert(Size > 0 && "Need a valid size for indirect memory locations."); - unsigned Reg = (++MOI)->getReg(); + Register Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); Locs.emplace_back(StackMaps::Location::Indirect, Size, getDwarfRegNum(Reg, TRI), Imm); @@ -148,14 +148,14 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, if (MOI->isImplicit()) return ++MOI; - assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && + assert(Register::isPhysicalRegister(MOI->getReg()) && "Virtreg operands should have been rewritten before now."); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); assert(!MOI->getSubReg() && "Physical subreg still around."); unsigned Offset = 0; unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI); - unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false); + unsigned LLVMRegNum = *TRI->getLLVMRegNum(DwarfRegNum, false); unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg()); if (SubRegIdx) Offset = TRI->getSubRegIdxOffset(SubRegIdx); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 809960c7fdf9..5683d1db473c 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" @@ -157,6 +156,68 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, return NeedsProtector; } +bool StackProtector::HasAddressTaken(const Instruction *AI) { + for (const User *U : AI->users()) { + const auto *I = cast<Instruction>(U); + switch (I->getOpcode()) { + case Instruction::Store: + if (AI == cast<StoreInst>(I)->getValueOperand()) + return true; + break; + case Instruction::AtomicCmpXchg: + // cmpxchg conceptually includes both a load and store from the same + // location. So, like store, the value being stored is what matters. + if (AI == cast<AtomicCmpXchgInst>(I)->getNewValOperand()) + return true; + break; + case Instruction::PtrToInt: + if (AI == cast<PtrToIntInst>(I)->getOperand(0)) + return true; + break; + case Instruction::Call: { + // Ignore intrinsics that do not become real instructions. + // TODO: Narrow this to intrinsics that have store-like effects. + const auto *CI = cast<CallInst>(I); + if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd()) + return true; + break; + } + case Instruction::Invoke: + return true; + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: + case Instruction::AddrSpaceCast: + if (HasAddressTaken(I)) + return true; + break; + case Instruction::PHI: { + // Keep track of what PHI nodes we have already visited to ensure + // they are only visited once. + const auto *PN = cast<PHINode>(I); + if (VisitedPHIs.insert(PN).second) + if (HasAddressTaken(PN)) + return true; + break; + } + case Instruction::Load: + case Instruction::AtomicRMW: + case Instruction::Ret: + // These instructions take an address operand, but have load-like or + // other innocuous behavior that should not trigger a stack protector. + // atomicrmw conceptually has both load and store semantics, but the + // value being stored must be integer; so if a pointer is being stored, + // we'll catch it in the PtrToInt case above. + break; + default: + // Conservatively return true for any instruction that takes an address + // operand, but is not handled above. + return true; + } + } + return false; +} + /// Search for the first call to the llvm.stackprotector intrinsic and return it /// if present. static const CallInst *findStackProtectorIntrinsic(Function &F) { @@ -264,9 +325,7 @@ bool StackProtector::RequiresStackProtector() { continue; } - if (Strong && PointerMayBeCaptured(AI, - /* ReturnCaptures */ false, - /* StoreCaptures */ true)) { + if (Strong && HasAddressTaken(AI)) { ++NumAddrTaken; Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf)); ORE.emit([&]() { diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 99b533e10b87..9c8143c55dc2 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -221,7 +221,7 @@ void StackSlotColoring::InitializeSlots() { for (auto *I : Intervals) { LiveInterval &li = I->second; LLVM_DEBUG(li.dump()); - int FI = TargetRegisterInfo::stackSlot2Index(li.reg); + int FI = Register::stackSlot2Index(li.reg); if (MFI->isDeadObjectIndex(FI)) continue; @@ -268,7 +268,7 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { int StackSlotColoring::ColorSlot(LiveInterval *li) { int Color = -1; bool Share = false; - int FI = TargetRegisterInfo::stackSlot2Index(li->reg); + int FI = Register::stackSlot2Index(li->reg); uint8_t StackID = MFI->getStackID(FI); if (!DisableSharing) { @@ -330,7 +330,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + int SS = Register::stackSlot2Index(li->reg); int NewSS = ColorSlot(li); assert(NewSS >= 0 && "Stack coloring failed?"); SlotMapping[SS] = NewSS; @@ -343,7 +343,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + int SS = Register::stackSlot2Index(li->reg); li->weight = SlotWeights[SS]; } // Sort them by new weight. diff --git a/lib/CodeGen/SwiftErrorValueTracking.cpp b/lib/CodeGen/SwiftErrorValueTracking.cpp index 96821cadb1b6..c72a04276a4f 100644 --- a/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -13,9 +13,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SwiftErrorValueTracking.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Value.h" diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index a0590a8a6cc6..03c68a37e459 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -235,8 +235,8 @@ bool TailDuplicator::tailDuplicateAndUpdate( MachineInstr *Copy = Copies[i]; if (!Copy->isCopy()) continue; - unsigned Dst = Copy->getOperand(0).getReg(); - unsigned Src = Copy->getOperand(1).getReg(); + Register Dst = Copy->getOperand(0).getReg(); + Register Src = Copy->getOperand(1).getReg(); if (MRI->hasOneNonDBGUse(Src) && MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { // Copy is the only use. Do trivial copy propagation here. @@ -312,7 +312,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB, if (!MI.isPHI()) break; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { - unsigned SrcReg = MI.getOperand(i).getReg(); + Register SrcReg = MI.getOperand(i).getReg(); UsedByPhi->insert(SrcReg); } } @@ -340,17 +340,17 @@ void TailDuplicator::processPHI( DenseMap<unsigned, RegSubRegPair> &LocalVRMap, SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies, const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { - unsigned DefReg = MI->getOperand(0).getReg(); + Register DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); - unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + Register SrcReg = MI->getOperand(SrcOpIdx).getReg(); unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); const TargetRegisterClass *RC = MRI->getRegClass(DefReg); LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); // Insert a copy from source to the end of the block. The def register is the // available value liveout of the block. - unsigned NewDef = MRI->createVirtualRegister(RC); + Register NewDef = MRI->createVirtualRegister(RC); Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) addSSAUpdateEntry(DefReg, NewDef, PredBB); @@ -384,12 +384,12 @@ void TailDuplicator::duplicateInstruction( MachineOperand &MO = NewMI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (MO.isDef()) { const TargetRegisterClass *RC = MRI->getRegClass(Reg); - unsigned NewReg = MRI->createVirtualRegister(RC); + Register NewReg = MRI->createVirtualRegister(RC); MO.setReg(NewReg); LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) @@ -433,7 +433,7 @@ void TailDuplicator::duplicateInstruction( auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); if (NewRC == nullptr) NewRC = OrigRC; - unsigned NewReg = MRI->createVirtualRegister(NewRC); + Register NewReg = MRI->createVirtualRegister(NewRC); BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(), TII->get(TargetOpcode::COPY), NewReg) .addReg(VI->second.Reg, 0, VI->second.SubReg); @@ -477,7 +477,7 @@ void TailDuplicator::updateSuccessorsPHIs( assert(Idx != 0); MachineOperand &MO0 = MI.getOperand(Idx); - unsigned Reg = MO0.getReg(); + Register Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 9c4483cb240d..9eeacc2584cb 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" @@ -71,7 +72,9 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, // When interprocedural register allocation is enabled caller saved registers // are preferred over callee saved registers. - if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction())) + if (MF.getTarget().Options.EnableIPRA && + isSafeForNoCSROpt(MF.getFunction()) && + isProfitableForNoCSROpt(MF.getFunction())) return; // Get the callee saved register list... @@ -118,6 +121,18 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( return 0; } +bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) { + if (!F.hasLocalLinkage() || F.hasAddressTaken() || + !F.hasFnAttribute(Attribute::NoRecurse)) + return false; + // Function should not be optimized as tail call. + for (const User *U : F.users()) + if (auto CS = ImmutableCallSite(U)) + if (CS.isTailCall()) + return false; + return true; +} + int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { llvm_unreachable("getInitialCFAOffset() not implemented!"); } @@ -125,4 +140,4 @@ int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) const { llvm_unreachable("getInitialCFARegister() not implemented!"); -}
\ No newline at end of file +} diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 868617ffe14d..6cae3b869501 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" @@ -142,7 +143,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, while (Tail != MBB->end()) { auto MI = Tail++; if (MI->isCall()) - MBB->getParent()->updateCallSiteInfo(&*MI); + MBB->getParent()->eraseCallSiteInfo(&*MI); MBB->erase(MI); } @@ -183,10 +184,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead(); // Avoid calling isRenamable for virtual registers since we assert that // renamable property is only queried/set for physical registers. - bool Reg1IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg1) + bool Reg1IsRenamable = Register::isPhysicalRegister(Reg1) ? MI.getOperand(Idx1).isRenamable() : false; - bool Reg2IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg2) + bool Reg2IsRenamable = Register::isPhysicalRegister(Reg2) ? MI.getOperand(Idx2).isRenamable() : false; // If destination is tied to either of the commuted source register, then @@ -228,9 +229,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); // Avoid calling setIsRenamable for virtual registers since we assert that // renamable property is only queried/set for physical registers. - if (TargetRegisterInfo::isPhysicalRegister(Reg1)) + if (Register::isPhysicalRegister(Reg1)) CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable); - if (TargetRegisterInfo::isPhysicalRegister(Reg2)) + if (Register::isPhysicalRegister(Reg2)) CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable); return CommutedMI; } @@ -281,7 +282,7 @@ bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1, return true; } -bool TargetInstrInfo::findCommutedOpIndices(MachineInstr &MI, +bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { assert(!MI.isBundle() && @@ -393,7 +394,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, if (BitOffset < 0 || BitOffset % 8) return false; - Size = BitSize /= 8; + Size = BitSize / 8; Offset = (unsigned)BitOffset / 8; assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); @@ -442,16 +443,15 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, if (FoldOp.getSubReg() || LiveOp.getSubReg()) return nullptr; - unsigned FoldReg = FoldOp.getReg(); - unsigned LiveReg = LiveOp.getReg(); + Register FoldReg = FoldOp.getReg(); + Register LiveReg = LiveOp.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && - "Cannot fold physregs"); + assert(Register::isVirtualRegister(FoldReg) && "Cannot fold physregs"); const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); - if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + if (Register::isPhysicalRegister(LiveOp.getReg())) return RC->contains(LiveOp.getReg()) ? RC : nullptr; if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) @@ -674,9 +674,9 @@ bool TargetInstrInfo::hasReassociableOperands( // reassociate. MachineInstr *MI1 = nullptr; MachineInstr *MI2 = nullptr; - if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) + if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) MI1 = MRI.getUniqueVRegDef(Op1.getReg()); - if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) + if (Op2.isReg() && Register::isVirtualRegister(Op2.getReg())) MI2 = MRI.getUniqueVRegDef(Op2.getReg()); // And they need to be in the trace (otherwise, they won't have a depth). @@ -805,27 +805,27 @@ void TargetInstrInfo::reassociateOps( MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]); MachineOperand &OpC = Root.getOperand(0); - unsigned RegA = OpA.getReg(); - unsigned RegB = OpB.getReg(); - unsigned RegX = OpX.getReg(); - unsigned RegY = OpY.getReg(); - unsigned RegC = OpC.getReg(); + Register RegA = OpA.getReg(); + Register RegB = OpB.getReg(); + Register RegX = OpX.getReg(); + Register RegY = OpY.getReg(); + Register RegC = OpC.getReg(); - if (TargetRegisterInfo::isVirtualRegister(RegA)) + if (Register::isVirtualRegister(RegA)) MRI.constrainRegClass(RegA, RC); - if (TargetRegisterInfo::isVirtualRegister(RegB)) + if (Register::isVirtualRegister(RegB)) MRI.constrainRegClass(RegB, RC); - if (TargetRegisterInfo::isVirtualRegister(RegX)) + if (Register::isVirtualRegister(RegX)) MRI.constrainRegClass(RegX, RC); - if (TargetRegisterInfo::isVirtualRegister(RegY)) + if (Register::isVirtualRegister(RegY)) MRI.constrainRegClass(RegY, RC); - if (TargetRegisterInfo::isVirtualRegister(RegC)) + if (Register::isVirtualRegister(RegC)) MRI.constrainRegClass(RegC, RC); // Create a new virtual register for the result of (X op Y) instead of // recycling RegB because the MachineCombiner's computation of the critical // path requires a new register definition rather than an existing one. - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); unsigned Opcode = Root.getOpcode(); @@ -880,21 +880,21 @@ void TargetInstrInfo::genAlternativeCodeSequence( } bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( - const MachineInstr &MI, AliasAnalysis *AA) const { + const MachineInstr &MI, AAResults *AA) const { const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Remat clients assume operand 0 is the defined register. if (!MI.getNumOperands() || !MI.getOperand(0).isReg()) return false; - unsigned DefReg = MI.getOperand(0).getReg(); + Register DefReg = MI.getOperand(0).getReg(); // A sub-register definition can only be rematerialized if the instruction // doesn't read the other parts of the register. Otherwise it is really a // read-modify-write operation on the full virtual register which cannot be // moved safely. - if (TargetRegisterInfo::isVirtualRegister(DefReg) && - MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg)) + if (Register::isVirtualRegister(DefReg) && MI.getOperand(0).getSubReg() && + MI.readsVirtualRegister(DefReg)) return false; // A load from a fixed stack slot can be rematerialized. This may be @@ -924,12 +924,12 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // Check for a well-behaved physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -1120,6 +1120,24 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, return (DefCycle != -1 && DefCycle <= 1); } +Optional<ParamLoadedValue> +TargetInstrInfo::describeLoadedValue(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getMF(); + const MachineOperand *Op = nullptr; + DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});; + const MachineOperand *SrcRegOp, *DestRegOp; + + if (isCopyInstr(MI, SrcRegOp, DestRegOp)) { + Op = SrcRegOp; + return ParamLoadedValue(*Op, Expr); + } else if (MI.isMoveImmediate()) { + Op = &MI.getOperand(1); + return ParamLoadedValue(*Op, Expr); + } + + return None; +} + /// Both DefMI and UseMI must be valid. By default, call directly to the /// itinerary. This may be overriden by the target. int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, @@ -1227,3 +1245,5 @@ bool TargetInstrInfo::getInsertSubregInputs( InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm(); return true; } + +TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 9b28c1a6c450..9b23012f47e3 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -167,6 +167,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::BZERO, "__bzero"); break; case Triple::aarch64: + case Triple::aarch64_32: setLibcallName(RTLIB::BZERO, "bzero"); break; default: @@ -197,6 +198,11 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl"); } + if (TT.isPS4CPU()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + } + if (TT.isOSOpenBSD()) { setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr); } @@ -578,13 +584,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { BooleanFloatContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; SchedPreferenceInfo = Sched::ILP; - JumpBufSize = 0; - JumpBufAlignment = 0; - MinFunctionAlignment = 0; - PrefFunctionAlignment = 0; - PrefLoopAlignment = 0; GatherAllAliasesMaxDepth = 18; - MinStackArgumentAlignment = 1; // TODO: the default will be switched to 0 in the next commit, along // with the Target-specific changes necessary. MaxAtomicSizeInBitsSupported = 1024; @@ -653,6 +653,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULFIX, VT, Expand); setOperationAction(ISD::SMULFIXSAT, VT, Expand); setOperationAction(ISD::UMULFIX, VT, Expand); + setOperationAction(ISD::UMULFIXSAT, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); @@ -689,6 +690,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); + setOperationAction(ISD::SPLAT_VECTOR, VT, Expand); } // Constrained floating-point operations default to expand. @@ -708,16 +710,22 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::STRICT_FLOG, VT, Expand); setOperationAction(ISD::STRICT_FLOG10, VT, Expand); setOperationAction(ISD::STRICT_FLOG2, VT, Expand); + setOperationAction(ISD::STRICT_LRINT, VT, Expand); + setOperationAction(ISD::STRICT_LLRINT, VT, Expand); setOperationAction(ISD::STRICT_FRINT, VT, Expand); setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); setOperationAction(ISD::STRICT_FCEIL, VT, Expand); setOperationAction(ISD::STRICT_FFLOOR, VT, Expand); + setOperationAction(ISD::STRICT_LROUND, VT, Expand); + setOperationAction(ISD::STRICT_LLROUND, VT, Expand); setOperationAction(ISD::STRICT_FROUND, VT, Expand); setOperationAction(ISD::STRICT_FTRUNC, VT, Expand); setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand); setOperationAction(ISD::STRICT_FMINNUM, VT, Expand); setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand); + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand); + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand); // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); @@ -824,7 +832,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); assert((LA == TypeLegal || LA == TypeSoftenFloat || - ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) && + (NVT.isVector() || + ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) && "Promote may not follow Expand or Promote"); if (LA == TypeSplitVector) @@ -1257,17 +1266,23 @@ void TargetLoweringBase::computeRegisterProperties( MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); bool IsLegalWiderType = false; + bool IsScalable = VT.isScalableVector(); LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); switch (PreferredAction) { - case TypePromoteInteger: + case TypePromoteInteger: { + MVT::SimpleValueType EndVT = IsScalable ? + MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE : + MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; // Try to promote the elements of integer vectors. If no legal // promotion was found, fall through to the widen-vector method. - for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) { + for (unsigned nVT = i + 1; + (MVT::SimpleValueType)nVT <= EndVT; ++nVT) { MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() && - SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) { + SVT.getVectorNumElements() == NElts && + SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1279,23 +1294,37 @@ void TargetLoweringBase::computeRegisterProperties( if (IsLegalWiderType) break; LLVM_FALLTHROUGH; + } case TypeWidenVector: - // Try to widen the vector. - for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType) nVT; - if (SVT.getVectorElementType() == EltVT - && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; + if (isPowerOf2_32(NElts)) { + // Try to widen the vector. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + if (SVT.getVectorElementType() == EltVT + && SVT.getVectorNumElements() > NElts + && SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) + break; + } else { + // Only widen to the next power of 2 to keep consistency with EVT. + MVT NVT = VT.getPow2VectorType(); + if (isTypeLegal(NVT)) { + TransformToType[i] = NVT; ValueTypeActions.setTypeAction(VT, TypeWidenVector); - IsLegalWiderType = true; + RegisterTypeForVT[i] = NVT; + NumRegistersForVT[i] = 1; break; } } - if (IsLegalWiderType) - break; LLVM_FALLTHROUGH; case TypeSplitVector: @@ -1488,12 +1517,9 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, return DL.getABITypeAlignment(Ty); } -bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, - const DataLayout &DL, EVT VT, - unsigned AddrSpace, - unsigned Alignment, - MachineMemOperand::Flags Flags, - bool *Fast) const { +bool TargetLoweringBase::allowsMemoryAccessForAlignment( + LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, + unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { // Check if the specified alignment is sufficient based on the data layout. // TODO: While using the data layout works in practice, a better solution // would be to implement this check directly (make this a virtual function). @@ -1511,6 +1537,21 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); } +bool TargetLoweringBase::allowsMemoryAccessForAlignment( + LLVMContext &Context, const DataLayout &DL, EVT VT, + const MachineMemOperand &MMO, bool *Fast) const { + return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(), + MMO.getAlignment(), MMO.getFlags(), + Fast); +} + +bool TargetLoweringBase::allowsMemoryAccess( + LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, + unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { + return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, + Flags, Fast); +} + bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 4c8f75b237aa..4978f4b9500b 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -43,6 +43,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionWasm.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" @@ -154,6 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, break; case Triple::aarch64: case Triple::aarch64_be: + case Triple::aarch64_32: // The small model guarantees static code/data size < 4GB, but not where it // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. @@ -375,7 +377,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0)); + Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0).value()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); Streamer.emitELFSize(Label, E); @@ -524,8 +526,8 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO, if (!VM) report_fatal_error("MD_associated operand is not ValueAsMetadata"); - GlobalObject *OtherGO = dyn_cast<GlobalObject>(VM->getValue()); - return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr; + auto *OtherGV = dyn_cast<GlobalValue>(VM->getValue()); + return OtherGV ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGV)) : nullptr; } static unsigned getEntrySizeForKind(SectionKind Kind) { @@ -566,6 +568,8 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( SectionName = Attrs.getAttribute("bss-section").getValueAsString(); } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) { SectionName = Attrs.getAttribute("rodata-section").getValueAsString(); + } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) { + SectionName = Attrs.getAttribute("relro-section").getValueAsString(); } else if (Attrs.hasAttribute("data-section") && Kind.isData()) { SectionName = Attrs.getAttribute("data-section").getValueAsString(); } @@ -1107,8 +1111,8 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( } const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( - const MCSymbol *Sym, const MCValue &MV, int64_t Offset, - MachineModuleInfo *MMI, MCStreamer &Streamer) const { + const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV, + int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation // as 64-bit do, we replace the GOT equivalent by accessing the final symbol // through a non_lazy_ptr stub instead. One advantage is that it allows the @@ -1165,12 +1169,10 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( MCSymbol *Stub = Ctx.getOrCreateSymbol(Name); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub); - if (!StubSym.getPointer()) { - bool IsIndirectLocal = Sym->isDefined() && !Sym->isExternal(); - // With the assumption that IsIndirectLocal == GV->hasLocalLinkage(). + + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl::StubValueTy(const_cast<MCSymbol *>(Sym), - !IsIndirectLocal); - } + !GV->hasLocalLinkage()); const MCExpr *BSymExpr = MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); @@ -1519,7 +1521,8 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, // internally, so we use ".CRT$XCA00001" for them. SmallString<24> Name; raw_svector_ostream OS(Name); - OS << ".CRT$XC" << (Priority < 200 ? 'A' : 'T') << format("%05u", Priority); + OS << ".CRT$X" << (IsCtor ? "C" : "T") << + (Priority < 200 ? 'A' : 'T') << format("%05u", Priority); MCSectionCOFF *Sec = Ctx.getCOFFSection( Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); @@ -1595,7 +1598,8 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( static std::string APIntToHexString(const APInt &AI) { unsigned Width = (AI.getBitWidth() / 8) * 2; - std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true); + std::string HexString = AI.toString(16, /*Signed=*/false); + transform(HexString.begin(), HexString.end(), HexString.begin(), tolower); unsigned Size = HexString.size(); assert(Width >= Size && "hex string is too large!"); HexString.insert(HexString.begin(), Width - Size, '0'); @@ -1819,3 +1823,82 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( llvm_unreachable("@llvm.global_dtors should have been lowered already"); return nullptr; } + +//===----------------------------------------------------------------------===// +// XCOFF +//===----------------------------------------------------------------------===// +MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + report_fatal_error("XCOFF explicit sections not yet implemented."); +} + +MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + assert(!TM.getFunctionSections() && !TM.getDataSections() && + "XCOFF unique sections not yet implemented."); + + // Common symbols go into a csect with matching name which will get mapped + // into the .bss section. + if (Kind.isBSSLocal() || Kind.isCommon()) { + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); + return getContext().getXCOFFSection( + Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM, + SC, Kind, /* BeginSymbolName */ nullptr); + } + + if (Kind.isText()) + return TextSection; + + if (Kind.isData()) + return DataSection; + + report_fatal_error("XCOFF other section types not yet implemented."); +} + +bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection( + bool UsesLabelDifference, const Function &F) const { + report_fatal_error("TLOF XCOFF not yet implemented."); +} + +void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx, + const TargetMachine &TgtM) { + TargetLoweringObjectFile::Initialize(Ctx, TgtM); + TTypeEncoding = 0; + PersonalityEncoding = 0; + LSDAEncoding = 0; +} + +MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + report_fatal_error("XCOFF ctor section not yet implemented."); +} + +MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + report_fatal_error("XCOFF dtor section not yet implemented."); +} + +const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference( + const GlobalValue *LHS, const GlobalValue *RHS, + const TargetMachine &TM) const { + report_fatal_error("XCOFF not yet implemented."); +} + +XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal( + const GlobalObject *GO) { + switch (GO->getLinkage()) { + case GlobalValue::InternalLinkage: + return XCOFF::C_HIDEXT; + case GlobalValue::ExternalLinkage: + case GlobalValue::CommonLinkage: + return XCOFF::C_EXT; + case GlobalValue::ExternalWeakLinkage: + return XCOFF::C_WEAKEXT; + default: + report_fatal_error( + "Unhandled linkage when mapping linkage to StorageClass."); + } +} diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 36df02692f86..f1f4f65adf7c 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -49,9 +49,10 @@ using namespace llvm; -cl::opt<bool> EnableIPRA("enable-ipra", cl::init(false), cl::Hidden, - cl::desc("Enable interprocedural register allocation " - "to reduce load/store at procedure calls.")); +static cl::opt<bool> + EnableIPRA("enable-ipra", cl::init(false), cl::Hidden, + cl::desc("Enable interprocedural register allocation " + "to reduce load/store at procedure calls.")); static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc Scheduler")); static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -152,8 +153,10 @@ static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort( // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). // Targets can return true in targetSchedulesPostRAScheduling() and // insert a PostRA scheduling pass wherever it wants. -cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, - cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); +static cl::opt<bool> MISchedPostRA( + "misched-postra", cl::Hidden, + cl::desc( + "Run MachineScheduler post regalloc (independent of preRA sched)")); // Experimental option to run live interval analysis early. static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, @@ -175,10 +178,10 @@ static cl::opt<CFLAAType> UseCFLAA( /// Option names for limiting the codegen pipeline. /// Those are used in error reporting and we didn't want /// to duplicate their names all over the place. -const char *StartAfterOptName = "start-after"; -const char *StartBeforeOptName = "start-before"; -const char *StopAfterOptName = "stop-after"; -const char *StopBeforeOptName = "stop-before"; +static const char *StartAfterOptName = "start-after"; +static const char *StartBeforeOptName = "start-before"; +static const char *StopAfterOptName = "stop-after"; +static const char *StopBeforeOptName = "stop-before"; static cl::opt<std::string> StartAfterOpt(StringRef(StartAfterOptName), @@ -654,6 +657,7 @@ void TargetPassConfig::addIRPasses() { // TODO: add a pass insertion point here addPass(createGCLoweringPass()); addPass(createShadowStackGCLoweringPass()); + addPass(createLowerConstantIntrinsicsPass()); // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); @@ -1231,5 +1235,5 @@ bool TargetPassConfig::isGISelCSEEnabled() const { } std::unique_ptr<CSEConfigBase> TargetPassConfig::getCSEConfig() const { - return make_unique<CSEConfigBase>(); + return std::make_unique<CSEConfigBase>(); } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index f1b2ecf3243b..e5592c31098a 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -86,22 +86,21 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, namespace llvm { -Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI, +Printable printReg(Register Reg, const TargetRegisterInfo *TRI, unsigned SubIdx, const MachineRegisterInfo *MRI) { return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) { if (!Reg) OS << "$noreg"; - else if (TargetRegisterInfo::isStackSlot(Reg)) - OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + else if (Register::isStackSlot(Reg)) + OS << "SS#" << Register::stackSlot2Index(Reg); + else if (Register::isVirtualRegister(Reg)) { StringRef Name = MRI ? MRI->getVRegName(Reg) : ""; if (Name != "") { OS << '%' << Name; } else { - OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); + OS << '%' << Register::virtReg2Index(Reg); } - } - else if (!TRI) + } else if (!TRI) OS << '$' << "physreg" << Reg; else if (Reg < TRI->getNumRegs()) { OS << '$'; @@ -143,8 +142,8 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { - if (TRI && TRI->isVirtualRegister(Unit)) { - OS << '%' << TargetRegisterInfo::virtReg2Index(Unit); + if (Register::isVirtualRegister(Unit)) { + OS << '%' << Register::virtReg2Index(Unit); } else { OS << printRegUnit(Unit, TRI); } @@ -189,7 +188,8 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { /// the right type that contains this physreg. const TargetRegisterClass * TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { - assert(isPhysicalRegister(reg) && "reg must be a physical register"); + assert(Register::isPhysicalRegister(reg) && + "reg must be a physical register"); // Pick the most sub register class of the right type that contains // this physreg. @@ -238,24 +238,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, static inline const TargetRegisterClass *firstCommonClass(const uint32_t *A, const uint32_t *B, - const TargetRegisterInfo *TRI, - const MVT::SimpleValueType SVT = - MVT::SimpleValueType::Any) { - const MVT VT(SVT); + const TargetRegisterInfo *TRI) { for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) - if (unsigned Common = *A++ & *B++) { - const TargetRegisterClass *RC = - TRI->getRegClass(I + countTrailingZeros(Common)); - if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT)) - return RC; - } + if (unsigned Common = *A++ & *B++) + return TRI->getRegClass(I + countTrailingZeros(Common)); return nullptr; } const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - const MVT::SimpleValueType SVT) const { + const TargetRegisterClass *B) const { // First take care of the trivial cases. if (A == B) return A; @@ -264,7 +256,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. - return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); } const TargetRegisterClass * @@ -409,7 +401,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, // Target-independent hints are either a physical or a virtual register. unsigned Phys = Reg; - if (VRM && isVirtualRegister(Phys)) + if (VRM && Register::isVirtualRegister(Phys)) Phys = VRM->getPhys(Phys); // Don't add the same reg twice (Hints_MRI may contain multiple virtual @@ -417,7 +409,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (!HintedRegs.insert(Phys).second) continue; // Check that Phys is a valid hint in VirtReg's register class. - if (!isPhysicalRegister(Phys)) + if (!Register::isPhysicalRegister(Phys)) continue; if (MRI.isReserved(Phys)) continue; @@ -433,6 +425,20 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, return false; } +bool TargetRegisterInfo::isCalleeSavedPhysReg( + unsigned PhysReg, const MachineFunction &MF) const { + if (PhysReg == 0) + return false; + const uint32_t *callerPreservedRegs = + getCallPreservedMask(MF, MF.getFunction().getCallingConv()); + if (callerPreservedRegs) { + assert(Register::isPhysicalRegister(PhysReg) && + "Expected physical register"); + return (callerPreservedRegs[PhysReg / 32] >> PhysReg % 32) & 1; + } + return false; +} + bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { return !MF.getFunction().hasFnAttribute("no-realign-stack"); } @@ -466,7 +472,7 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI) const { const TargetRegisterClass *RC{}; - if (isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. @@ -501,7 +507,7 @@ TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg, CopySrcReg = MI->getOperand(2).getReg(); } - if (!isVirtualRegister(CopySrcReg)) + if (!Register::isVirtualRegister(CopySrcReg)) return CopySrcReg; SrcReg = CopySrcReg; diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 195279719ad4..ce59452fd1b8 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -300,7 +300,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, // TODO: The following hack exists because predication passes do not // correctly append imp-use operands, and readsReg() strangely returns false // for predicated defs. - unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); + Register Reg = DefMI->getOperand(DefOperIdx).getReg(); const MachineFunction &MF = *DefMI->getMF(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI)) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 43d876646967..ea971809d4e4 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -230,7 +230,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isUse() && MOReg != SavedReg) @@ -299,7 +299,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, MachineOperand &MO = OtherMI.getOperand(i); if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (DefReg == MOReg) @@ -418,8 +418,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, } else return false; - IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + IsSrcPhys = Register::isPhysicalRegister(SrcReg); + IsDstPhys = Register::isPhysicalRegister(DstReg); return true; } @@ -427,8 +427,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, /// given instruction, is killed by the given instruction. static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS) { - if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && - !LIS->isNotInMIMap(*MI)) { + if (LIS && Register::isVirtualRegister(Reg) && !LIS->isNotInMIMap(*MI)) { // FIXME: Sometimes tryInstructionTransform() will add instructions and // test whether they can be folded before keeping them. In this case it // sets a kill before recursively calling tryInstructionTransform() again. @@ -475,12 +474,12 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, MachineInstr *DefMI = &MI; while (true) { // All uses of physical registers are likely to be kills. - if (TargetRegisterInfo::isPhysicalRegister(Reg) && + if (Register::isPhysicalRegister(Reg) && (allowFalsePositives || MRI->hasOneUse(Reg))) return true; if (!isPlainlyKilled(DefMI, Reg, LIS)) return false; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return true; MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); // If there are multiple defs, we can't do a simple analysis, so just @@ -536,7 +535,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, } IsDstPhys = false; if (isTwoAddrUse(UseMI, Reg, DstReg)) { - IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + IsDstPhys = Register::isPhysicalRegister(DstReg); return &UseMI; } return nullptr; @@ -546,13 +545,13 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, /// to. static unsigned getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) { - while (TargetRegisterInfo::isVirtualRegister(Reg)) { + while (Register::isVirtualRegister(Reg)) { DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg); if (SI == RegMap.end()) return 0; Reg = SI->second; } - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg; return 0; } @@ -683,7 +682,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, unsigned RegBIdx, unsigned RegCIdx, unsigned Dist) { - unsigned RegC = MI->getOperand(RegCIdx).getReg(); + Register RegC = MI->getOperand(RegCIdx).getReg(); LLVM_DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx); @@ -700,7 +699,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, // Update source register map. unsigned FromRegC = getMappedReg(RegC, SrcRegMap); if (FromRegC) { - unsigned RegA = MI->getOperand(DstIdx).getReg(); + Register RegA = MI->getOperand(DstIdx).getReg(); SrcRegMap[RegA] = FromRegC; } @@ -911,7 +910,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isDef()) @@ -955,7 +954,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : OtherMI.operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isDef()) { @@ -1093,7 +1092,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : KillMI->operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (MO.isUse()) { if (!MOReg) continue; @@ -1105,7 +1104,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, Uses.insert(MOReg); if (isKill && MOReg != Reg) Kills.insert(MOReg); - } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + } else if (Register::isPhysicalRegister(MOReg)) { Defs.insert(MOReg); if (!MO.isDead()) LiveDefs.insert(MOReg); @@ -1130,7 +1129,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : OtherMI.operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isUse()) { @@ -1154,8 +1153,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, unsigned MOReg = OtherDefs[i]; if (Uses.count(MOReg)) return false; - if (TargetRegisterInfo::isPhysicalRegister(MOReg) && - LiveDefs.count(MOReg)) + if (Register::isPhysicalRegister(MOReg) && LiveDefs.count(MOReg)) return false; // Physical register def is seen. Defs.erase(MOReg); @@ -1208,8 +1206,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, return false; bool MadeChange = false; - unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); - unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); + Register DstOpReg = MI->getOperand(DstOpIdx).getReg(); + Register BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); unsigned OpsNum = MI->getDesc().getNumOperands(); unsigned OtherOpIdx = MI->getDesc().getNumDefs(); for (; OtherOpIdx < OpsNum; OtherOpIdx++) { @@ -1221,7 +1219,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx)) continue; - unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); + Register OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); bool AggressiveCommute = false; // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp @@ -1276,14 +1274,14 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, return false; MachineInstr &MI = *mi; - unsigned regA = MI.getOperand(DstIdx).getReg(); - unsigned regB = MI.getOperand(SrcIdx).getReg(); + Register regA = MI.getOperand(DstIdx).getReg(); + Register regB = MI.getOperand(SrcIdx).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(regB) && + assert(Register::isVirtualRegister(regB) && "cannot make instruction into two-address form"); bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); - if (TargetRegisterInfo::isVirtualRegister(regA)) + if (Register::isVirtualRegister(regA)) scanUses(regA); bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); @@ -1363,7 +1361,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, const TargetRegisterClass *RC = TRI->getAllocatableClass( TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); - unsigned Reg = MRI->createVirtualRegister(RC); + Register Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; if (!TII->unfoldMemoryOperand(*MF, MI, Reg, /*UnfoldLoad=*/true, @@ -1399,8 +1397,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (LV) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { if (MO.isUse()) { if (MO.isKill()) { if (NewMIs[0]->killsRegister(MO.getReg())) @@ -1474,8 +1471,8 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { AnyOps = true; MachineOperand &SrcMO = MI->getOperand(SrcIdx); MachineOperand &DstMO = MI->getOperand(DstIdx); - unsigned SrcReg = SrcMO.getReg(); - unsigned DstReg = DstMO.getReg(); + Register SrcReg = SrcMO.getReg(); + Register DstReg = DstMO.getReg(); // Tied constraint already satisfied? if (SrcReg == DstReg) continue; @@ -1485,7 +1482,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { // Deal with undef uses immediately - simply rewrite the src operand. if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. - if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (Register::isVirtualRegister(DstReg)) if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, TRI, *MF)) MRI->constrainRegClass(DstReg, RC); @@ -1522,7 +1519,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, unsigned DstIdx = TiedPairs[tpi].second; const MachineOperand &DstMO = MI->getOperand(DstIdx); - unsigned RegA = DstMO.getReg(); + Register RegA = DstMO.getReg(); // Grab RegB from the instruction because it may have changed if the // instruction was commuted. @@ -1538,7 +1535,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } LastCopiedReg = RegA; - assert(TargetRegisterInfo::isVirtualRegister(RegB) && + assert(Register::isVirtualRegister(RegB) && "cannot make instruction into two-address form"); #ifndef NDEBUG @@ -1559,14 +1556,13 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, MIB.addReg(RegB, 0, SubRegB); const TargetRegisterClass *RC = MRI->getRegClass(RegB); if (SubRegB) { - if (TargetRegisterInfo::isVirtualRegister(RegA)) { + if (Register::isVirtualRegister(RegA)) { assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA), SubRegB) && "tied subregister must be a truncation"); // The superreg class will not be used to constrain the subreg class. RC = nullptr; - } - else { + } else { assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB)) && "tied subregister must be a truncation"); } @@ -1581,7 +1577,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (LIS) { LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot(); - if (TargetRegisterInfo::isVirtualRegister(RegA)) { + if (Register::isVirtualRegister(RegA)) { LiveInterval &LI = LIS->getInterval(RegA); VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); SlotIndex endIdx = @@ -1601,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } // Make sure regA is a legal regclass for the SrcIdx operand. - if (TargetRegisterInfo::isVirtualRegister(RegA) && - TargetRegisterInfo::isVirtualRegister(RegB)) + if (Register::isVirtualRegister(RegA) && Register::isVirtualRegister(RegB)) MRI->constrainRegClass(RegA, RC); MO.setReg(RegA); // The getMatchingSuper asserts guarantee that the register class projected @@ -1744,8 +1739,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; - unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); - unsigned DstReg = mi->getOperand(DstIdx).getReg(); + Register SrcReg = mi->getOperand(SrcIdx).getReg(); + Register DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { // The tied operands have been eliminated or shifted further down @@ -1803,9 +1798,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; - unsigned DstReg = MI.getOperand(0).getReg(); - if (MI.getOperand(0).getSubReg() || - TargetRegisterInfo::isPhysicalRegister(DstReg) || + Register DstReg = MI.getOperand(0).getReg(); + if (MI.getOperand(0).getSubReg() || Register::isPhysicalRegister(DstReg) || !(MI.getNumOperands() & 1)) { LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); llvm_unreachable(nullptr); @@ -1821,7 +1815,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { bool DefEmitted = false; for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) { MachineOperand &UseMO = MI.getOperand(i); - unsigned SrcReg = UseMO.getReg(); + Register SrcReg = UseMO.getReg(); unsigned SubIdx = MI.getOperand(i+1).getImm(); // Nothing needs to be inserted for undef operands. if (UseMO.isUndef()) @@ -1855,7 +1849,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { DefEmitted = true; // Update LiveVariables' kill info. - if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (LV && isKill && !Register::isPhysicalRegister(SrcReg)) LV->replaceKillInstruction(SrcReg, MI, *CopyMI); LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI); diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 177bab32bccc..3289eff71336 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -103,7 +103,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { df_iterator_default_set<MachineBasicBlock*> Reachable; bool ModifiedPHI = false; - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); + MMI = MMIWP ? &MMIWP->getMMI() : nullptr; MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); @@ -146,8 +147,14 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { } // Actually remove the blocks now. - for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + // Remove any call site information for calls in the block. + for (auto &I : DeadBlocks[i]->instrs()) + if (I.isCall(MachineInstr::IgnoreBundle)) + DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I); + DeadBlocks[i]->eraseFromParent(); + } // Cleanup PHI nodes. for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { @@ -167,8 +174,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { if (phi->getNumOperands() == 3) { const MachineOperand &Input = phi->getOperand(1); const MachineOperand &Output = phi->getOperand(0); - unsigned InputReg = Input.getReg(); - unsigned OutputReg = Output.getReg(); + Register InputReg = Input.getReg(); + Register OutputReg = Output.getReg(); assert(Output.getSubReg() == 0 && "Cannot have output subregister"); ModifiedPHI = true; diff --git a/lib/CodeGen/ValueTypes.cpp b/lib/CodeGen/ValueTypes.cpp index a911cdcbec9d..73b862d51c0f 100644 --- a/lib/CodeGen/ValueTypes.cpp +++ b/lib/CodeGen/ValueTypes.cpp @@ -115,8 +115,8 @@ std::string EVT::getEVTString() const { switch (V.SimpleTy) { default: if (isVector()) - return "v" + utostr(getVectorNumElements()) + - getVectorElementType().getEVTString(); + return (isScalableVector() ? "nxv" : "v") + utostr(getVectorNumElements()) + + getVectorElementType().getEVTString(); if (isInteger()) return "i" + utostr(getSizeInBits()); llvm_unreachable("Invalid EVT!"); @@ -144,6 +144,7 @@ std::string EVT::getEVTString() const { case MVT::v32i1: return "v32i1"; case MVT::v64i1: return "v64i1"; case MVT::v128i1: return "v128i1"; + case MVT::v256i1: return "v256i1"; case MVT::v512i1: return "v512i1"; case MVT::v1024i1: return "v1024i1"; case MVT::v1i8: return "v1i8"; @@ -157,6 +158,7 @@ std::string EVT::getEVTString() const { case MVT::v256i8: return "v256i8"; case MVT::v1i16: return "v1i16"; case MVT::v2i16: return "v2i16"; + case MVT::v3i16: return "v3i16"; case MVT::v4i16: return "v4i16"; case MVT::v8i16: return "v8i16"; case MVT::v16i16: return "v16i16"; @@ -187,8 +189,11 @@ std::string EVT::getEVTString() const { case MVT::v1f32: return "v1f32"; case MVT::v2f32: return "v2f32"; case MVT::v2f16: return "v2f16"; + case MVT::v3f16: return "v3f16"; case MVT::v4f16: return "v4f16"; case MVT::v8f16: return "v8f16"; + case MVT::v16f16: return "v16f16"; + case MVT::v32f16: return "v32f16"; case MVT::v3f32: return "v3f32"; case MVT::v4f32: return "v4f32"; case MVT::v5f32: return "v5f32"; @@ -205,6 +210,48 @@ std::string EVT::getEVTString() const { case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; case MVT::v8f64: return "v8f64"; + case MVT::nxv1i1: return "nxv1i1"; + case MVT::nxv2i1: return "nxv2i1"; + case MVT::nxv4i1: return "nxv4i1"; + case MVT::nxv8i1: return "nxv8i1"; + case MVT::nxv16i1: return "nxv16i1"; + case MVT::nxv32i1: return "nxv32i1"; + case MVT::nxv1i8: return "nxv1i8"; + case MVT::nxv2i8: return "nxv2i8"; + case MVT::nxv4i8: return "nxv4i8"; + case MVT::nxv8i8: return "nxv8i8"; + case MVT::nxv16i8: return "nxv16i8"; + case MVT::nxv32i8: return "nxv32i8"; + case MVT::nxv1i16: return "nxv1i16"; + case MVT::nxv2i16: return "nxv2i16"; + case MVT::nxv4i16: return "nxv4i16"; + case MVT::nxv8i16: return "nxv8i16"; + case MVT::nxv16i16:return "nxv16i16"; + case MVT::nxv32i16:return "nxv32i16"; + case MVT::nxv1i32: return "nxv1i32"; + case MVT::nxv2i32: return "nxv2i32"; + case MVT::nxv4i32: return "nxv4i32"; + case MVT::nxv8i32: return "nxv8i32"; + case MVT::nxv16i32:return "nxv16i32"; + case MVT::nxv32i32:return "nxv32i32"; + case MVT::nxv1i64: return "nxv1i64"; + case MVT::nxv2i64: return "nxv2i64"; + case MVT::nxv4i64: return "nxv4i64"; + case MVT::nxv8i64: return "nxv8i64"; + case MVT::nxv16i64:return "nxv16i64"; + case MVT::nxv32i64:return "nxv32i64"; + case MVT::nxv2f16: return "nxv2f16"; + case MVT::nxv4f16: return "nxv4f16"; + case MVT::nxv8f16: return "nxv8f16"; + case MVT::nxv1f32: return "nxv1f32"; + case MVT::nxv2f32: return "nxv2f32"; + case MVT::nxv4f32: return "nxv4f32"; + case MVT::nxv8f32: return "nxv8f32"; + case MVT::nxv16f32:return "nxv16f32"; + case MVT::nxv1f64: return "nxv1f64"; + case MVT::nxv2f64: return "nxv2f64"; + case MVT::nxv4f64: return "nxv4f64"; + case MVT::nxv8f64: return "nxv8f64"; case MVT::Metadata:return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::exnref : return "exnref"; @@ -241,6 +288,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128); + case MVT::v256i1: return VectorType::get(Type::getInt1Ty(Context), 256); case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512); case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024); case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); @@ -254,6 +302,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256); case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1); case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2); + case MVT::v3i16: return VectorType::get(Type::getInt16Ty(Context), 3); case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4); case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8); case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16); @@ -282,8 +331,11 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32); case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); + case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3); case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4); case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); + case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16); + case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32); case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3); @@ -302,8 +354,92 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); + case MVT::nxv1i1: + return VectorType::get(Type::getInt1Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i1: + return VectorType::get(Type::getInt1Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i1: + return VectorType::get(Type::getInt1Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i1: + return VectorType::get(Type::getInt1Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i1: + return VectorType::get(Type::getInt1Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i1: + return VectorType::get(Type::getInt1Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv1i8: + return VectorType::get(Type::getInt8Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i8: + return VectorType::get(Type::getInt8Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i8: + return VectorType::get(Type::getInt8Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i8: + return VectorType::get(Type::getInt8Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i8: + return VectorType::get(Type::getInt8Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i8: + return VectorType::get(Type::getInt8Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv1i16: + return VectorType::get(Type::getInt16Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i16: + return VectorType::get(Type::getInt16Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i16: + return VectorType::get(Type::getInt16Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i16: + return VectorType::get(Type::getInt16Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i16: + return VectorType::get(Type::getInt16Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i16: + return VectorType::get(Type::getInt16Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv1i32: + return VectorType::get(Type::getInt32Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i32: + return VectorType::get(Type::getInt32Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i32: + return VectorType::get(Type::getInt32Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i32: + return VectorType::get(Type::getInt32Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i32: + return VectorType::get(Type::getInt32Ty(Context), 16,/*Scalable=*/ true); + case MVT::nxv32i32: + return VectorType::get(Type::getInt32Ty(Context), 32,/*Scalable=*/ true); + case MVT::nxv1i64: + return VectorType::get(Type::getInt64Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i64: + return VectorType::get(Type::getInt64Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i64: + return VectorType::get(Type::getInt64Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i64: + return VectorType::get(Type::getInt64Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i64: + return VectorType::get(Type::getInt64Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i64: + return VectorType::get(Type::getInt64Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv2f16: + return VectorType::get(Type::getHalfTy(Context), 2, /*Scalable=*/ true); + case MVT::nxv4f16: + return VectorType::get(Type::getHalfTy(Context), 4, /*Scalable=*/ true); + case MVT::nxv8f16: + return VectorType::get(Type::getHalfTy(Context), 8, /*Scalable=*/ true); + case MVT::nxv1f32: + return VectorType::get(Type::getFloatTy(Context), 1, /*Scalable=*/ true); + case MVT::nxv2f32: + return VectorType::get(Type::getFloatTy(Context), 2, /*Scalable=*/ true); + case MVT::nxv4f32: + return VectorType::get(Type::getFloatTy(Context), 4, /*Scalable=*/ true); + case MVT::nxv8f32: + return VectorType::get(Type::getFloatTy(Context), 8, /*Scalable=*/ true); + case MVT::nxv16f32: + return VectorType::get(Type::getFloatTy(Context), 16, /*Scalable=*/ true); + case MVT::nxv1f64: + return VectorType::get(Type::getDoubleTy(Context), 1, /*Scalable=*/ true); + case MVT::nxv2f64: + return VectorType::get(Type::getDoubleTy(Context), 2, /*Scalable=*/ true); + case MVT::nxv4f64: + return VectorType::get(Type::getDoubleTy(Context), 4, /*Scalable=*/ true); + case MVT::nxv8f64: + return VectorType::get(Type::getDoubleTy(Context), 8, /*Scalable=*/ true); case MVT::Metadata: return Type::getMetadataTy(Context); - } + } } /// Return the value type corresponding to the specified type. This returns all @@ -329,7 +465,8 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ case Type::VectorTyID: { VectorType *VTy = cast<VectorType>(Ty); return getVectorVT( - getVT(VTy->getElementType(), false), VTy->getNumElements()); + getVT(VTy->getElementType(), /*HandleUnknown=*/ false), + VTy->getElementCount()); } } } @@ -345,8 +482,9 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth()); case Type::VectorTyID: { VectorType *VTy = cast<VectorType>(Ty); - return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false), - VTy->getNumElements()); + return getVectorVT(Ty->getContext(), + getEVT(VTy->getElementType(), /*HandleUnknown=*/ false), + VTy->getElementCount()); } } } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 4a06704a8876..5312e2eea96b 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -80,15 +80,14 @@ void VirtRegMap::grow() { Virt2SplitMap.resize(NumRegs); } -void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg) && - TargetRegisterInfo::isPhysicalRegister(physReg)); - assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && +void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) { + assert(virtReg.isVirtual() && Register::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg.id()] == NO_PHYS_REG && "attempt to assign physical register to already mapped " "virtual register"); assert(!getRegInfo().isReserved(physReg) && "Attempt to map virtReg to a reserved physReg"); - Virt2PhysMap[virtReg] = physReg; + Virt2PhysMap[virtReg.id()] = physReg; } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { @@ -99,46 +98,46 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { return SS; } -bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) { - unsigned Hint = MRI->getSimpleHint(VirtReg); - if (!Hint) +bool VirtRegMap::hasPreferredPhys(Register VirtReg) { + Register Hint = MRI->getSimpleHint(VirtReg); + if (!Hint.isValid()) return false; - if (TargetRegisterInfo::isVirtualRegister(Hint)) + if (Hint.isVirtual()) Hint = getPhys(Hint); return getPhys(VirtReg) == Hint; } -bool VirtRegMap::hasKnownPreference(unsigned VirtReg) { +bool VirtRegMap::hasKnownPreference(Register VirtReg) { std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg); - if (TargetRegisterInfo::isPhysicalRegister(Hint.second)) + if (Register::isPhysicalRegister(Hint.second)) return true; - if (TargetRegisterInfo::isVirtualRegister(Hint.second)) + if (Register::isVirtualRegister(Hint.second)) return hasPhys(Hint.second); return false; } -int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && +int VirtRegMap::assignVirt2StackSlot(Register virtReg) { + assert(virtReg.isVirtual()); + assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); - return Virt2StackSlotMap[virtReg] = createSpillSlot(RC); + return Virt2StackSlotMap[virtReg.id()] = createSpillSlot(RC); } -void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && +void VirtRegMap::assignVirt2StackSlot(Register virtReg, int SS) { + assert(virtReg.isVirtual()); + assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); assert((SS >= 0 || (SS >= MF->getFrameInfo().getObjectIndexBegin())) && "illegal fixed frame index"); - Virt2StackSlotMap[virtReg] = SS; + Virt2StackSlotMap[virtReg.id()] = SS; } void VirtRegMap::print(raw_ostream &OS, const Module*) const { OS << "********** REGISTER MAP **********\n"; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { OS << '[' << printReg(Reg, TRI) << " -> " << printReg(Virt2PhysMap[Reg], TRI) << "] " @@ -147,7 +146,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { } for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; @@ -185,10 +184,10 @@ class VirtRegRewriter : public MachineFunctionPass { void rewrite(); void addMBBLiveIns(); bool readsUndefSubreg(const MachineOperand &MO) const; - void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; + void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const; void handleIdentityCopy(MachineInstr &MI) const; void expandCopyBundle(MachineInstr &MI) const; - bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const; + bool subRegLiveThrough(const MachineInstr &MI, Register SuperPhysReg) const; public: static char ID; @@ -265,7 +264,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { } void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, - unsigned PhysReg) const { + Register PhysReg) const { assert(!LI.empty()); assert(LI.hasSubRanges()); @@ -312,7 +311,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, // assignments. void VirtRegRewriter::addMBBLiveIns() { for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { - unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx); + Register VirtReg = Register::index2VirtReg(Idx); if (MRI->reg_nodbg_empty(VirtReg)) continue; LiveInterval &LI = LIS->getInterval(VirtReg); @@ -320,7 +319,7 @@ void VirtRegRewriter::addMBBLiveIns() { continue; // This is a virtual register that is live across basic blocks. Its // assigned PhysReg must be marked as live-in to those blocks. - unsigned PhysReg = VRM->getPhys(VirtReg); + Register PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); if (LI.hasSubRanges()) { @@ -353,7 +352,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { if (MO.isUndef()) return true; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); const LiveInterval &LI = LIS->getInterval(Reg); const MachineInstr &MI = *MO.getParent(); SlotIndex BaseIndex = LIS->getInstructionIndex(MI); @@ -469,7 +468,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { /// \pre \p MI defines a subregister of a virtual register that /// has been assigned to \p SuperPhysReg. bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, - unsigned SuperPhysReg) const { + Register SuperPhysReg) const { SlotIndex MIIndex = LIS->getInstructionIndex(MI); SlotIndex BeforeMIUses = MIIndex.getBaseIndex(); SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex(); @@ -493,9 +492,9 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, void VirtRegRewriter::rewrite() { bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); - SmallVector<unsigned, 8> SuperDeads; - SmallVector<unsigned, 8> SuperDefs; - SmallVector<unsigned, 8> SuperKills; + SmallVector<Register, 8> SuperDeads; + SmallVector<Register, 8> SuperDefs; + SmallVector<Register, 8> SuperKills; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { @@ -513,10 +512,10 @@ void VirtRegRewriter::rewrite() { if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; - unsigned VirtReg = MO.getReg(); - unsigned PhysReg = VRM->getPhys(VirtReg); + Register VirtReg = MO.getReg(); + Register PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Instruction uses unmapped VirtReg"); assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); @@ -562,7 +561,7 @@ void VirtRegRewriter::rewrite() { // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, SubReg); - assert(PhysReg && "Invalid SubReg for physical register"); + assert(PhysReg.isValid() && "Invalid SubReg for physical register"); MO.setSubReg(0); } // Rewrite. Note we could have used MachineOperand::substPhysReg(), but diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 19c59e9542b4..119c3fd1ec7f 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -111,7 +111,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet( MIB.add(MO); Terminators.push_back(&T); if (T.isCall()) - MF.updateCallSiteInfo(&T); + MF.eraseCallSiteInfo(&T); } } } |